Skip to content

Commit c19ea0f

Browse files
committed
Optimise masked attention
1 parent ea4c9ca commit c19ea0f

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml_extend.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
956956
auto kq = ggml_mul_mat(ctx, k, q); // [N * n_head, L_q, L_k]
957957
kq = ggml_scale_inplace(ctx, kq, scale);
958958
if (mask) {
959-
kq = ggml_add(ctx, kq, mask);
959+
kq = ggml_add_inplace(ctx, kq, mask);
960960
}
961961
if (diag_mask_inf) {
962962
kq = ggml_diag_mask_inf_inplace(ctx, kq, 0);

0 commit comments

Comments
 (0)