@@ -302,31 +302,22 @@ struct LoraModel : public GGMLRunner {
302302 lora_down = ggml_cont (compute_ctx, lora_down_concat);
303303 lora_up = ggml_cont (compute_ctx, lora_up_concat);
304304
305- lora_down_name = lora_pre[type] + key + lora_downs[type] + " .weight" ;
306- lora_up_name = lora_pre[type] + key + lora_ups[type] + " .weight" ;
305+ // lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
306+ // lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
307307
308- lora_tensors[lora_down_name] = lora_down;
309- lora_tensors[lora_up_name] = lora_up;
308+ // lora_tensors[lora_down_name] = lora_down;
309+ // lora_tensors[lora_up_name] = lora_up;
310310
311311 // Would be nice to be able to clean up lora_tensors, but it breaks because this is called twice :/
312- // lora_tensors.erase(split_q_u_name);
313- // lora_tensors.erase(split_k_u_name);
314- // lora_tensors.erase(split_v_u_name);
315- // lora_tensors.erase(split_m_u_name);
316-
317- // lora_tensors.erase(split_q_d_name);
318- // lora_tensors.erase(split_k_d_name);
319- // lora_tensors.erase(split_v_d_name);
320- // lora_tensors.erase(split_m_d_name);
321-
322- } else {
323- // lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
324- // lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
325- // if (lora_tensors.find(lora_up_name) != lora_tensors.end()) {
326- // // print_ggml_tensor(lora_tensors[lora_down_name], true); // [3072, R, 1, 1]
327- // // print_ggml_tensor(lora_tensors[lora_up_name], true); // [R, 21504, 1, 1]
328- // // print_ggml_tensor(it.second, true); // [3072, 21504, 1, 1]
329- // }
312+ applied_lora_tensors.insert (split_q_u_name);
313+ applied_lora_tensors.insert (split_k_u_name);
314+ applied_lora_tensors.insert (split_v_u_name);
315+ applied_lora_tensors.insert (split_m_u_name);
316+
317+ applied_lora_tensors.insert (split_q_d_name);
318+ applied_lora_tensors.insert (split_k_d_name);
319+ applied_lora_tensors.insert (split_v_d_name);
320+ applied_lora_tensors.insert (split_m_d_name);
330321 }
331322 } else if (linear2 != std::string::npos) {
332323 linear2--;
@@ -341,8 +332,8 @@ struct LoraModel : public GGMLRunner {
341332 lora_down = lora_tensors[lora_down_name];
342333 }
343334
344- applied_lora_tensors.insert (lora_up_name);
345335 applied_lora_tensors.insert (lora_down_name);
336+ applied_lora_tensors.insert (lora_up_name);
346337 }
347338 } else if (modulation != std::string::npos) {
348339 modulation--;
@@ -357,8 +348,8 @@ struct LoraModel : public GGMLRunner {
357348 lora_down = lora_tensors[lora_down_name];
358349 }
359350
360- applied_lora_tensors.insert (lora_up_name);
361351 applied_lora_tensors.insert (lora_down_name);
352+ applied_lora_tensors.insert (lora_up_name);
362353 }
363354 }
364355 // Double blocks
@@ -446,20 +437,20 @@ struct LoraModel : public GGMLRunner {
446437 lora_down = ggml_cont (compute_ctx, lora_down_concat);
447438 lora_up = ggml_cont (compute_ctx, lora_up_concat);
448439
449- lora_down_name = lora_pre[type] + key + lora_downs[type] + " .weight" ;
450- lora_up_name = lora_pre[type] + key + lora_ups[type] + " .weight" ;
440+ // lora_down_name = lora_pre[type] + key + lora_downs[type] + ".weight";
441+ // lora_up_name = lora_pre[type] + key + lora_ups[type] + ".weight";
451442
452- lora_tensors[lora_down_name] = lora_down;
453- lora_tensors[lora_up_name] = lora_up;
443+ // lora_tensors[lora_down_name] = lora_down;
444+ // lora_tensors[lora_up_name] = lora_up;
454445
455446 // Would be nice to be able to clean up lora_tensors, but it breaks because this is called twice :/
456- // lora_tensors.erase (split_q_u_name);
457- // lora_tensors.erase (split_k_u_name);
458- // lora_tensors.erase (split_v_u_name);
447+ applied_lora_tensors. insert (split_q_u_name);
448+ applied_lora_tensors. insert (split_k_u_name);
449+ applied_lora_tensors. insert (split_v_u_name);
459450
460- // lora_tensors.erase (split_q_d_name);
461- // lora_tensors.erase (split_k_d_name);
462- // lora_tensors.erase (split_v_d_name);
451+ applied_lora_tensors. insert (split_q_d_name);
452+ applied_lora_tensors. insert (split_k_d_name);
453+ applied_lora_tensors. insert (split_v_d_name);
463454 }
464455 } else if (txt_attn_proj != std::string::npos || img_attn_proj != std::string::npos) {
465456 size_t match = txt_attn_proj;
@@ -481,8 +472,8 @@ struct LoraModel : public GGMLRunner {
481472 lora_down = lora_tensors[lora_down_name];
482473 }
483474
484- applied_lora_tensors.insert (lora_up_name);
485475 applied_lora_tensors.insert (lora_down_name);
476+ applied_lora_tensors.insert (lora_up_name);
486477 }
487478 } else if (txt_mlp_0 != std::string::npos || txt_mlp_2 != std::string::npos || img_mlp_0 != std::string::npos || img_mlp_2 != std::string::npos) {
488479 bool has_two = txt_mlp_2 != std::string::npos || img_mlp_2 != std::string::npos;
@@ -514,8 +505,8 @@ struct LoraModel : public GGMLRunner {
514505 lora_down = lora_tensors[lora_down_name];
515506 }
516507
517- applied_lora_tensors.insert (lora_up_name);
518508 applied_lora_tensors.insert (lora_down_name);
509+ applied_lora_tensors.insert (lora_up_name);
519510 }
520511 } else if (txt_mod_lin != std::string::npos || img_mod_lin != std::string::npos) {
521512 size_t match = txt_mod_lin;
@@ -537,8 +528,8 @@ struct LoraModel : public GGMLRunner {
537528 lora_down = lora_tensors[lora_down_name];
538529 }
539530
540- applied_lora_tensors.insert (lora_up_name);
541531 applied_lora_tensors.insert (lora_down_name);
532+ applied_lora_tensors.insert (lora_up_name);
542533 }
543534 }
544535 }
@@ -564,11 +555,11 @@ struct LoraModel : public GGMLRunner {
564555 if (lora_tensors.find (lora_down_name) != lora_tensors.end ()) {
565556 lora_down = lora_tensors[lora_down_name];
566557 }
558+ applied_lora_tensors.insert (lora_up_name);
559+ applied_lora_tensors.insert (lora_down_name);
560+ applied_lora_tensors.insert (alpha_name);
561+ applied_lora_tensors.insert (scale_name);
567562 }
568- applied_lora_tensors.insert (lora_up_name);
569- applied_lora_tensors.insert (lora_down_name);
570- applied_lora_tensors.insert (alpha_name);
571- applied_lora_tensors.insert (scale_name);
572563
573564 if (lora_up == NULL || lora_down == NULL ) {
574565 continue ;
0 commit comments