@@ -16,6 +16,14 @@ tensor mlp(model_ref m, tensor x) {
1616 return named (m, x);
1717}
1818
19+ // Ensures that the tensor's data is not overwritten during computation.
20+ tensor make_constant (tensor x, tensor_name name) {
21+ ggml_set_name (x, name.c_str ());
22+ ggml_set_input (x); // allocate at the beginning of the graph buffer
23+ ggml_set_output (x); // don't reuse memory for computations
24+ return x;
25+ }
26+
1927void compute_relative_position_index (span<int32_t > dst, int window_size) {
2028 int n = window_size;
2129 int n2 = n * n;
@@ -34,7 +42,7 @@ tensor_data create_relative_position_index(ggml_context* ctx, int window_size) {
3442 auto result = tensor_alloc (ggml_new_tensor_1d (ctx, GGML_TYPE_I32, n * n * n * n));
3543 auto name = format<tensor_name>(" window_attention_{}.rel_pos_index" , n);
3644 compute_relative_position_index (result.as_i32 (), n);
37- ggml_set_name (result.x , name. c_str () );
45+ make_constant (result.x , name);
3846 return result;
3947}
4048
@@ -226,7 +234,7 @@ tensor_data create_attention_mask(ggml_context* ctx, int64_t w, int64_t h, int w
226234 auto result = tensor_alloc (ggml_new_tensor_3d (ctx, GGML_TYPE_F32, n * n, n * n, nw_x * nw_y));
227235 auto name = format<tensor_name>(" swin_layer_{}x{}.attn_mask" , w, h);
228236 compute_attention_mask (result.as_f32 (), w, h, window_size);
229- ggml_set_name (result.x , name. c_str () );
237+ make_constant (result.x , name);
230238 return result;
231239}
232240
0 commit comments