Skip to content

Commit 4008102

Browse files
committed
save more memory
1 parent 7195efa commit 4008102

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

lora.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,19 +525,30 @@ struct LoraModel : public GGMLRunner {
525525
ggml_tensor* lora_mid = nullptr;
526526
ggml_tensor* lora_down = nullptr;
527527

528+
bool is_conv2d = forward_params.op_type == WeightAdapter::ForwardParams::op_type_t::OP_CONV2D;
529+
528530
auto iter = lora_tensors.find(lora_up_name);
529531
if (iter != lora_tensors.end()) {
530532
lora_up = iter->second;
533+
if (is_conv2d && lora_up->type != GGML_TYPE_F16) {
534+
lora_up = ggml_cast(ctx, lora_up, GGML_TYPE_F16);
535+
}
531536
}
532537

533538
iter = lora_tensors.find(lora_mid_name);
534539
if (iter != lora_tensors.end()) {
535540
lora_mid = iter->second;
541+
if (is_conv2d && lora_mid->type != GGML_TYPE_F16) {
542+
lora_mid = ggml_cast(ctx, lora_mid, GGML_TYPE_F16);
543+
}
536544
}
537545

538546
iter = lora_tensors.find(lora_down_name);
539547
if (iter != lora_tensors.end()) {
540548
lora_down = iter->second;
549+
if (is_conv2d && lora_down->type != GGML_TYPE_F16) {
550+
lora_down = ggml_cast(ctx, lora_down, GGML_TYPE_F16);
551+
}
541552
}
542553

543554
if (lora_up == nullptr || lora_down == nullptr) {
@@ -570,7 +581,7 @@ struct LoraModel : public GGMLRunner {
570581
scale_value *= multiplier;
571582

572583
ggml_tensor* lx;
573-
if (forward_params.op_type == WeightAdapter::ForwardParams::op_type_t::OP_LINEAR) {
584+
if (!is_conv2d) {
574585
lx = ggml_ext_linear(ctx, x, lora_down, nullptr, forward_params.linear.force_prec_f32, forward_params.linear.scale);
575586
if (lora_mid) {
576587
lx = ggml_ext_linear(ctx, lx, lora_mid, nullptr, forward_params.linear.force_prec_f32, forward_params.linear.scale);

0 commit comments

Comments
 (0)