Skip to content

Commit f17c9f5

Browse files
committed
Fix erroneous "unused lora tensors"
1 parent d22f183 commit f17c9f5

File tree

1 file changed

+32
-41
lines changed

1 file changed

+32
-41
lines changed

lora.hpp

Lines changed: 32 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -302,31 +302,22 @@ struct LoraModel : public GGMLRunner {
302302
lora_down = ggml_cont(compute_ctx, lora_down_concat);
303303
lora_up = ggml_cont(compute_ctx, lora_up_concat);
304304

305-
lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
306-
lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
305+
// lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
306+
// lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
307307

308-
lora_tensors[lora_down_name] = lora_down;
309-
lora_tensors[lora_up_name] = lora_up;
308+
// lora_tensors[lora_down_name] = lora_down;
309+
// lora_tensors[lora_up_name] = lora_up;
310310

311311
// Would be nice to be able to clean up lora_tensors, but it breaks because this is called twice :/
312-
// lora_tensors.erase(split_q_u_name);
313-
// lora_tensors.erase(split_k_u_name);
314-
// lora_tensors.erase(split_v_u_name);
315-
// lora_tensors.erase(split_m_u_name);
316-
317-
// lora_tensors.erase(split_q_d_name);
318-
// lora_tensors.erase(split_k_d_name);
319-
// lora_tensors.erase(split_v_d_name);
320-
// lora_tensors.erase(split_m_d_name);
321-
322-
} else {
323-
// lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
324-
// lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
325-
// if (lora_tensors.find(lora_up_name) != lora_tensors.end()) {
326-
// // print_ggml_tensor(lora_tensors[lora_down_name], true); // [3072, R, 1, 1]
327-
// // print_ggml_tensor(lora_tensors[lora_up_name], true); // [R, 21504, 1, 1]
328-
// // print_ggml_tensor(it.second, true); // [3072, 21504, 1, 1]
329-
// }
312+
applied_lora_tensors.insert(split_q_u_name);
313+
applied_lora_tensors.insert(split_k_u_name);
314+
applied_lora_tensors.insert(split_v_u_name);
315+
applied_lora_tensors.insert(split_m_u_name);
316+
317+
applied_lora_tensors.insert(split_q_d_name);
318+
applied_lora_tensors.insert(split_k_d_name);
319+
applied_lora_tensors.insert(split_v_d_name);
320+
applied_lora_tensors.insert(split_m_d_name);
330321
}
331322
} else if (linear2 != std::string::npos) {
332323
linear2--;
@@ -341,8 +332,8 @@ struct LoraModel : public GGMLRunner {
341332
lora_down = lora_tensors[lora_down_name];
342333
}
343334

344-
applied_lora_tensors.insert(lora_up_name);
345335
applied_lora_tensors.insert(lora_down_name);
336+
applied_lora_tensors.insert(lora_up_name);
346337
}
347338
} else if (modulation != std::string::npos) {
348339
modulation--;
@@ -357,8 +348,8 @@ struct LoraModel : public GGMLRunner {
357348
lora_down = lora_tensors[lora_down_name];
358349
}
359350

360-
applied_lora_tensors.insert(lora_up_name);
361351
applied_lora_tensors.insert(lora_down_name);
352+
applied_lora_tensors.insert(lora_up_name);
362353
}
363354
}
364355
// Double blocks
@@ -446,20 +437,20 @@ struct LoraModel : public GGMLRunner {
446437
lora_down = ggml_cont(compute_ctx, lora_down_concat);
447438
lora_up = ggml_cont(compute_ctx, lora_up_concat);
448439

449-
lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
450-
lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
440+
// lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
441+
// lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
451442

452-
lora_tensors[lora_down_name] = lora_down;
453-
lora_tensors[lora_up_name] = lora_up;
443+
// lora_tensors[lora_down_name] = lora_down;
444+
// lora_tensors[lora_up_name] = lora_up;
454445

455446
// Would be nice to be able to clean up lora_tensors, but it breaks because this is called twice :/
456-
// lora_tensors.erase(split_q_u_name);
457-
// lora_tensors.erase(split_k_u_name);
458-
// lora_tensors.erase(split_v_u_name);
447+
applied_lora_tensors.insert(split_q_u_name);
448+
applied_lora_tensors.insert(split_k_u_name);
449+
applied_lora_tensors.insert(split_v_u_name);
459450

460-
// lora_tensors.erase(split_q_d_name);
461-
// lora_tensors.erase(split_k_d_name);
462-
// lora_tensors.erase(split_v_d_name);
451+
applied_lora_tensors.insert(split_q_d_name);
452+
applied_lora_tensors.insert(split_k_d_name);
453+
applied_lora_tensors.insert(split_v_d_name);
463454
}
464455
} else if (txt_attn_proj != std::string::npos || img_attn_proj != std::string::npos) {
465456
size_t match = txt_attn_proj;
@@ -481,8 +472,8 @@ struct LoraModel : public GGMLRunner {
481472
lora_down = lora_tensors[lora_down_name];
482473
}
483474

484-
applied_lora_tensors.insert(lora_up_name);
485475
applied_lora_tensors.insert(lora_down_name);
476+
applied_lora_tensors.insert(lora_up_name);
486477
}
487478
} else if (txt_mlp_0 != std::string::npos || txt_mlp_2 != std::string::npos || img_mlp_0 != std::string::npos || img_mlp_2 != std::string::npos) {
488479
bool has_two = txt_mlp_2 != std::string::npos || img_mlp_2 != std::string::npos;
@@ -514,8 +505,8 @@ struct LoraModel : public GGMLRunner {
514505
lora_down = lora_tensors[lora_down_name];
515506
}
516507

517-
applied_lora_tensors.insert(lora_up_name);
518508
applied_lora_tensors.insert(lora_down_name);
509+
applied_lora_tensors.insert(lora_up_name);
519510
}
520511
} else if (txt_mod_lin != std::string::npos || img_mod_lin != std::string::npos) {
521512
size_t match = txt_mod_lin;
@@ -537,8 +528,8 @@ struct LoraModel : public GGMLRunner {
537528
lora_down = lora_tensors[lora_down_name];
538529
}
539530

540-
applied_lora_tensors.insert(lora_up_name);
541531
applied_lora_tensors.insert(lora_down_name);
532+
applied_lora_tensors.insert(lora_up_name);
542533
}
543534
}
544535
}
@@ -564,11 +555,11 @@ struct LoraModel : public GGMLRunner {
564555
if (lora_tensors.find(lora_down_name) != lora_tensors.end()) {
565556
lora_down = lora_tensors[lora_down_name];
566557
}
558+
applied_lora_tensors.insert(lora_up_name);
559+
applied_lora_tensors.insert(lora_down_name);
560+
applied_lora_tensors.insert(alpha_name);
561+
applied_lora_tensors.insert(scale_name);
567562
}
568-
applied_lora_tensors.insert(lora_up_name);
569-
applied_lora_tensors.insert(lora_down_name);
570-
applied_lora_tensors.insert(alpha_name);
571-
applied_lora_tensors.insert(scale_name);
572563

573564
if (lora_up == NULL || lora_down == NULL) {
574565
continue;

0 commit comments

Comments
 (0)