Skip to content

Commit 46d5524

Browse files
committed
ml: add tests for model weight conversion
1 parent f122dc5 commit 46d5524

File tree

5 files changed

+150
-20
lines changed

5 files changed

+150
-20
lines changed

include/visp/ml.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,15 @@ VISP_API void model_transfer(
128128
ggml_type float_type = GGML_TYPE_COUNT,
129129
tensor_data_layout = tensor_data_layout::unknown);
130130

131+
VISP_API void model_transfer(
132+
ggml_context* const& src_ctx,
133+
model_weights& weights,
134+
backend_device const& device,
135+
ggml_type float_type = GGML_TYPE_COUNT,
136+
tensor_data_layout src_layout = tensor_data_layout::unknown,
137+
tensor_data_layout dst_layout = tensor_data_layout::unknown,
138+
span<int32_t const> conv2d_weights = {});
139+
131140
//
132141
// Compute graph - wrapper for ggml_cgraph and its associated backend memory
133142

src/visp/ml.cpp

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -361,55 +361,74 @@ span<int32_t const> find_conv2d_weight_indices(model_file const& f) {
361361
} // namespace
362362

363363
void model_transfer(
364-
model_file const& file,
364+
ggml_context* const& src_ctx,
365365
model_weights& weights,
366366
backend_device const& device,
367367
ggml_type float_type,
368-
tensor_data_layout layout) {
368+
tensor_data_layout src_layout,
369+
tensor_data_layout dst_layout,
370+
span<int32_t const> conv2d_weights) {
369371

370-
gguf_context* gguf = file.gguf.get();
371-
ggml_context* src_ctx = file.data.get();
372372
ggml_context* dst_ctx = weights.context.get();
373-
374-
tensor_data_layout file_layout = file.tensor_layout();
375-
bool to_cwhn = file_layout == tensor_data_layout::whcn && layout == tensor_data_layout::cwhn;
373+
bool to_cwhn = src_layout == tensor_data_layout::whcn && dst_layout == tensor_data_layout::cwhn;
376374
tensor_converter convert(src_ctx, float_type, to_cwhn);
377-
// Try to find a list of tensor indices which are weights of 2D operations
378-
span<int32_t const> conv2d_weights = find_conv2d_weight_indices(file);
379375

380-
for (int64_t i = 0, conv2d_idx = 0; i < gguf_get_n_tensors(gguf); ++i) {
381-
auto name = gguf_get_tensor_name(gguf, i);
382-
tensor orig = ggml_get_tensor(src_ctx, name); // TODO: don't use name lookup
376+
tensor orig = ggml_get_first_tensor(src_ctx);
377+
for (int64_t i = 0, conv2d_idx = 0; orig;) {
378+
if (strncmp(orig->name, "GGUF", 4) == 0) {
379+
orig = ggml_get_next_tensor(src_ctx, orig); // skip "GGUF tensor data binary blob"
380+
continue; // (why is there no way to iterate over GGUF tensors directly?)
381+
}
383382
auto ne = nelements(orig);
384383
if (to_cwhn && conv2d_idx < ssize(conv2d_weights) && conv2d_weights[conv2d_idx] == i) {
385384
permute_whcn_to_cwhn(ne.data(), ne[2] == 1);
386385
++conv2d_idx;
387386
}
388387
tensor dup = ggml_new_tensor(dst_ctx, convert.target_type(orig), GGML_MAX_DIMS, ne.data());
389-
ggml_set_name(dup, name);
388+
ggml_set_name(dup, ggml_get_name(orig));
389+
orig = ggml_get_next_tensor(src_ctx, orig);
390+
++i;
390391
}
391392

392393
ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(dst_ctx, device);
393394
weights.weights_buffer = ggml_backend_buffer_ptr(buffer);
394395
weights.buffer_type = device.type();
395-
weights.flags = model_get_build_flags(file);
396396
if (to_cwhn) {
397397
weights.flags |= model_build_flag::cwhn;
398398
}
399399

400-
ggml_tensor* t = ggml_get_first_tensor(dst_ctx);
401-
for (int i = 0, conv2d_idx = 0; t; ++i) {
402-
tensor data_tensor = ggml_get_tensor(src_ctx, ggml_get_name(t));
400+
tensor src = ggml_get_first_tensor(src_ctx);
401+
tensor dst = ggml_get_first_tensor(dst_ctx);
402+
for (int i = 0, conv2d_idx = 0; src && dst;) {
403+
if (strncmp(src->name, "GGUF", 4) == 0) {
404+
src = ggml_get_next_tensor(src_ctx, src);
405+
continue; // skip "GGUF tensor data binary blob"
406+
}
403407
bool is_2d = conv2d_idx < int(conv2d_weights.size()) && conv2d_weights[conv2d_idx] == i;
404408
if (is_2d) {
405409
++conv2d_idx;
406410
}
407-
void const* data = convert(data_tensor, t, is_2d && to_cwhn);
408-
ggml_backend_tensor_set(t, data, 0, ggml_nbytes(t));
409-
t = ggml_get_next_tensor(dst_ctx, t);
411+
void const* data = convert(src, dst, is_2d && to_cwhn);
412+
ggml_backend_tensor_set(dst, data, 0, ggml_nbytes(dst));
413+
src = ggml_get_next_tensor(src_ctx, src);
414+
dst = ggml_get_next_tensor(dst_ctx, dst);
415+
++i;
410416
}
411417
}
412418

419+
void model_transfer(
420+
model_file const& file,
421+
model_weights& weights,
422+
backend_device const& device,
423+
ggml_type float_type,
424+
tensor_data_layout layout) {
425+
426+
weights.flags = model_get_build_flags(file);
427+
model_transfer(
428+
file.data.get(), weights, device, float_type, file.tensor_layout(), layout,
429+
find_conv2d_weight_indices(file));
430+
}
431+
413432
ggml_type model_weights::float_type() const {
414433
for (ggml_tensor* t = ggml_get_first_tensor(context.get()); t != nullptr;
415434
t = ggml_get_next_tensor(context.get(), t)) {

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_executable(test-vision)
44
target_sources(test-vision PRIVATE
55
testing.cpp
66
test-image.cpp
7+
test-ml.cpp
78
)
89
target_include_directories(test-vision PRIVATE . ../src)
910
target_compile_definitions(test-vision PRIVATE ${VISP_ASSERT} ${VISP_DEFINITIONS})

tests/test-ml.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#include "testing.h"
2+
#include "visp/ml.h"
3+
4+
#include <numeric>
5+
6+
namespace visp {
7+
8+
VISP_TEST(model_transfer_type_conversion) {
9+
model_weights src = model_init(2);
10+
11+
tensor i = ggml_new_tensor_1d(src, GGML_TYPE_I32, 2);
12+
ggml_set_name(i, "i32_tensor");
13+
auto i32_data = std::array{4, -1};
14+
i->data = i32_data.data();
15+
16+
tensor f = ggml_new_tensor_1d(src, GGML_TYPE_F16, 2);
17+
ggml_set_name(f, "f16_tensor");
18+
auto f16_data = std::array{ggml_fp32_to_fp16(2.5f), ggml_fp32_to_fp16(-0.5f)};
19+
f->data = f16_data.data();
20+
21+
backend_device dev = backend_init(backend_type::cpu);
22+
model_weights dst = model_init(2);
23+
model_transfer(src, dst, dev, GGML_TYPE_F32); // f16 -> f32 conversion
24+
25+
int32_t const* i32_result = (int32_t const*)ggml_get_tensor(dst, "i32_tensor")->data;
26+
CHECK_EQUAL(i32_result[0], 4);
27+
CHECK_EQUAL(i32_result[1], -1);
28+
29+
tensor f_result = ggml_get_tensor(dst, "f16_tensor");
30+
CHECK(f_result->type == GGML_TYPE_F32);
31+
float const* f32_result = (float const*)f_result->data;
32+
CHECK_EQUAL(f32_result[0], 2.5f);
33+
CHECK_EQUAL(f32_result[1], -0.5f);
34+
}
35+
36+
VISP_TEST(model_transfer_layout_conversion) {
37+
model_weights src = model_init(3);
38+
39+
tensor conv_dw = ggml_new_tensor_4d(src, GGML_TYPE_F32, 2, 2, 1, 3); // wh1c
40+
ggml_set_name(conv_dw, "conv_dw");
41+
auto conv_dw_data = std::array<float, 2 * 2 * 1 * 3>{};
42+
std::iota(conv_dw_data.begin(), conv_dw_data.end(), 1.0f);
43+
conv_dw->data = conv_dw_data.data();
44+
45+
tensor conv = ggml_new_tensor_4d(src, GGML_TYPE_F32, 2, 2, 4, 3); // whco
46+
ggml_set_name(conv, "conv");
47+
auto conv_data = std::array<float, 2 * 2 * 3 * 4>{};
48+
std::iota(conv_data.begin(), conv_data.end(), 1.0f);
49+
conv->data = conv_data.data();
50+
51+
tensor no_conv = ggml_new_tensor_1d(src, GGML_TYPE_F32, 2);
52+
ggml_set_name(no_conv, "no_conv");
53+
auto no_conv_data = std::array<float, 2>{1.0f, 2.0f};
54+
no_conv->data = no_conv_data.data();
55+
56+
auto conv_weights = std::array{0, 1};
57+
auto src_layout = tensor_data_layout::whcn;
58+
auto dst_layout = tensor_data_layout::cwhn;
59+
60+
backend_device dev = backend_init(backend_type::cpu);
61+
model_weights dst = model_init(3);
62+
model_transfer(src, dst, dev, GGML_TYPE_COUNT, src_layout, dst_layout, conv_weights);
63+
64+
auto conv_dw_expected = std::array{
65+
1.0f, 5.0f, 9.0f, //
66+
2.0f, 6.0f, 10.0f, //
67+
3.0f, 7.0f, 11.0f, //
68+
4.0f, 8.0f, 12.0f //
69+
};
70+
float const* conv_dw_result = (float const*)ggml_get_tensor(dst, "conv_dw")->data;
71+
for (int i = 0; i < int(conv_dw_expected.size()); ++i) {
72+
CHECK_EQUAL(conv_dw_result[i], conv_dw_expected[i]);
73+
}
74+
75+
auto conv_expected = std::array{
76+
1.0f, 5.0f, 9.0f, 13.0f, 2.0f, 6.0f, 10.0f, 14.0f, //
77+
3.0f, 7.0f, 11.0f, 15.0f, 4.0f, 8.0f, 12.0f, 16.0f, //
78+
79+
17.0f, 21.0f, 25.0f, 29.0f, 18.0f, 22.0f, 26.0f, 30.0f, //
80+
19.0f, 23.0f, 27.0f, 31.0f, 20.0f, 24.0f, 28.0f, 32.0f, //
81+
82+
33.0f, 37.0f, 41.0f, 45.0f, 34.0f, 38.0f, 42.0f, 46.0f, //
83+
35.0f, 39.0f, 43.0f, 47.0f, 36.0f, 40.0f, 44.0f, 48.0f //
84+
};
85+
float const* conv_result = (float const*)ggml_get_tensor(dst, "conv")->data;
86+
for (int i = 0; i < int(conv_expected.size()); ++i) {
87+
CHECK_EQUAL(conv_result[i], conv_expected[i]);
88+
}
89+
90+
float const* no_conv_result = (float const*)ggml_get_tensor(dst, "no_conv")->data;
91+
CHECK_EQUAL(no_conv_result[0], 1.0f);
92+
CHECK_EQUAL(no_conv_result[1], 2.0f);
93+
}
94+
95+
} // namespace visp

tests/testing.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ std::string extra_info;
1818
int main(int argc, char** argv) {
1919
using namespace visp;
2020

21+
ggml_backend_load_all();
22+
2123
auto& registry = test_registry_instance();
2224

2325
int passed = 0;
@@ -81,6 +83,10 @@ int main(int argc, char** argv) {
8183
printf("%s %s\n", verbose ? "" : name, "\033[31mERROR\033[0m");
8284
printf(" \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line);
8385
printf(" \033[93m%s\033[0m\n", e.what());
86+
} catch (...) {
87+
++errors;
88+
printf("%s %s\n", verbose ? "" : name, "\033[31mERROR\033[0m");
89+
printf(" \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line);
8490
}
8591
visp::extra_info.clear();
8692
};

0 commit comments

Comments
 (0)