diff --git a/include/visp/ml.h b/include/visp/ml.h index 2a3826d..4b9af39 100644 --- a/include/visp/ml.h +++ b/include/visp/ml.h @@ -128,6 +128,15 @@ VISP_API void model_transfer( ggml_type float_type = GGML_TYPE_COUNT, tensor_data_layout = tensor_data_layout::unknown); +VISP_API void model_transfer( + ggml_context* const& src_ctx, + model_weights& weights, + backend_device const& device, + ggml_type float_type = GGML_TYPE_COUNT, + tensor_data_layout src_layout = tensor_data_layout::unknown, + tensor_data_layout dst_layout = tensor_data_layout::unknown, + span conv2d_weights = {}); + // // Compute graph - wrapper for ggml_cgraph and its associated backend memory diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp index dbc6a1c..1ac564c 100644 --- a/src/visp/ml.cpp +++ b/src/visp/ml.cpp @@ -361,55 +361,74 @@ span find_conv2d_weight_indices(model_file const& f) { } // namespace void model_transfer( - model_file const& file, + ggml_context* const& src_ctx, model_weights& weights, backend_device const& device, ggml_type float_type, - tensor_data_layout layout) { + tensor_data_layout src_layout, + tensor_data_layout dst_layout, + span conv2d_weights) { - gguf_context* gguf = file.gguf.get(); - ggml_context* src_ctx = file.data.get(); ggml_context* dst_ctx = weights.context.get(); - - tensor_data_layout file_layout = file.tensor_layout(); - bool to_cwhn = file_layout == tensor_data_layout::whcn && layout == tensor_data_layout::cwhn; + bool to_cwhn = src_layout == tensor_data_layout::whcn && dst_layout == tensor_data_layout::cwhn; tensor_converter convert(src_ctx, float_type, to_cwhn); - // Try to find a list of tensor indices which are weights of 2D operations - span conv2d_weights = find_conv2d_weight_indices(file); - for (int64_t i = 0, conv2d_idx = 0; i < gguf_get_n_tensors(gguf); ++i) { - auto name = gguf_get_tensor_name(gguf, i); - tensor orig = ggml_get_tensor(src_ctx, name); // TODO: don't use name lookup + tensor orig = ggml_get_first_tensor(src_ctx); + for (int64_t i = 0, conv2d_idx = 0; orig;) { + if (strncmp(orig->name, "GGUF", 4) == 0) { + orig = ggml_get_next_tensor(src_ctx, orig); // skip "GGUF tensor data binary blob" + continue; // (why is there no way to iterate over GGUF tensors directly?) + } auto ne = nelements(orig); if (to_cwhn && conv2d_idx < ssize(conv2d_weights) && conv2d_weights[conv2d_idx] == i) { permute_whcn_to_cwhn(ne.data(), ne[2] == 1); ++conv2d_idx; } tensor dup = ggml_new_tensor(dst_ctx, convert.target_type(orig), GGML_MAX_DIMS, ne.data()); - ggml_set_name(dup, name); + ggml_set_name(dup, ggml_get_name(orig)); + orig = ggml_get_next_tensor(src_ctx, orig); + ++i; } ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(dst_ctx, device); weights.weights_buffer = ggml_backend_buffer_ptr(buffer); weights.buffer_type = device.type(); - weights.flags = model_get_build_flags(file); if (to_cwhn) { weights.flags |= model_build_flag::cwhn; } - ggml_tensor* t = ggml_get_first_tensor(dst_ctx); - for (int i = 0, conv2d_idx = 0; t; ++i) { - tensor data_tensor = ggml_get_tensor(src_ctx, ggml_get_name(t)); + tensor src = ggml_get_first_tensor(src_ctx); + tensor dst = ggml_get_first_tensor(dst_ctx); + for (int i = 0, conv2d_idx = 0; src && dst;) { + if (strncmp(src->name, "GGUF", 4) == 0) { + src = ggml_get_next_tensor(src_ctx, src); + continue; // skip "GGUF tensor data binary blob" + } bool is_2d = conv2d_idx < int(conv2d_weights.size()) && conv2d_weights[conv2d_idx] == i; if (is_2d) { ++conv2d_idx; } - void const* data = convert(data_tensor, t, is_2d && to_cwhn); - ggml_backend_tensor_set(t, data, 0, ggml_nbytes(t)); - t = ggml_get_next_tensor(dst_ctx, t); + void const* data = convert(src, dst, is_2d && to_cwhn); + ggml_backend_tensor_set(dst, data, 0, ggml_nbytes(dst)); + src = ggml_get_next_tensor(src_ctx, src); + dst = ggml_get_next_tensor(dst_ctx, dst); + ++i; } } +void model_transfer( + model_file const& file, + model_weights& weights, + backend_device const& device, + ggml_type float_type, + tensor_data_layout layout) { + + weights.flags = model_get_build_flags(file); + model_transfer( + file.data.get(), weights, device, float_type, file.tensor_layout(), layout, + find_conv2d_weight_indices(file)); +} + ggml_type model_weights::float_type() const { for (ggml_tensor* t = ggml_get_first_tensor(context.get()); t != nullptr; t = ggml_get_next_tensor(context.get(), t)) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6d06f46..c3c8a5f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,6 +4,7 @@ add_executable(test-vision) target_sources(test-vision PRIVATE testing.cpp test-image.cpp + test-ml.cpp ) target_include_directories(test-vision PRIVATE . ../src) target_compile_definitions(test-vision PRIVATE ${VISP_ASSERT} ${VISP_DEFINITIONS}) diff --git a/tests/test-image.cpp b/tests/test-image.cpp index 25fc10b..85a94c6 100644 --- a/tests/test-image.cpp +++ b/tests/test-image.cpp @@ -252,6 +252,34 @@ VISP_TEST(image_blur) { CHECK_IMAGES_EQUAL(output, expected); } +VISP_TEST(image_erosion) { + constexpr i32x2 extent{6, 6}; + std::array input_data = { + 0.0f, 0.5f, 0.0f, 0.0f, 0.0f, 0.0f, // + 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, // + 0.8f, 0.8f, 1.0f, 0.5f, 0.0f, 1.0f, // + 0.8f, 0.8f, 1.0f, 0.5f, 0.5f, 0.0f, // + 0.8f, 1.0f, 1.0f, 0.5f, 0.5f, 0.0f, // + 0.0f, 1.0f, 1.0f, 0.2f, 0.5f, 0.0f // + }; + std::array expected_data = { + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, // + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, // + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, // + 0.8f, 0.8f, 0.5f, 0.0f, 0.0f, 0.0f, // + 0.0f, 0.0f, 0.2f, 0.2f, 0.0f, 0.0f, // + 0.0f, 0.0f, 0.2f, 0.2f, 0.0f, 0.0f // + }; + std::array output_data{}; + + auto input = image_view(extent, input_data); + auto output = image_span(extent, output_data); + image_erosion(input, output, 1); + + auto expected = image_view(extent, expected_data); + CHECK_IMAGES_EQUAL(output, expected); +} + VISP_TEST(tile_merge) { std::array, 4> tiles; for (int t = 0; t < 4; ++t) { diff --git a/tests/test-ml.cpp b/tests/test-ml.cpp new file mode 100644 index 0000000..949adcd --- /dev/null +++ b/tests/test-ml.cpp @@ -0,0 +1,95 @@ +#include "testing.h" +#include "visp/ml.h" + +#include + +namespace visp { + +VISP_TEST(model_transfer_type_conversion) { + model_weights src = model_init(2); + + tensor i = ggml_new_tensor_1d(src, GGML_TYPE_I32, 2); + ggml_set_name(i, "i32_tensor"); + auto i32_data = std::array{4, -1}; + i->data = i32_data.data(); + + tensor f = ggml_new_tensor_1d(src, GGML_TYPE_F16, 2); + ggml_set_name(f, "f16_tensor"); + auto f16_data = std::array{ggml_fp32_to_fp16(2.5f), ggml_fp32_to_fp16(-0.5f)}; + f->data = f16_data.data(); + + backend_device dev = backend_init(backend_type::cpu); + model_weights dst = model_init(2); + model_transfer(src, dst, dev, GGML_TYPE_F32); // f16 -> f32 conversion + + int32_t const* i32_result = (int32_t const*)ggml_get_tensor(dst, "i32_tensor")->data; + CHECK_EQUAL(i32_result[0], 4); + CHECK_EQUAL(i32_result[1], -1); + + tensor f_result = ggml_get_tensor(dst, "f16_tensor"); + CHECK(f_result->type == GGML_TYPE_F32); + float const* f32_result = (float const*)f_result->data; + CHECK_EQUAL(f32_result[0], 2.5f); + CHECK_EQUAL(f32_result[1], -0.5f); +} + +VISP_TEST(model_transfer_layout_conversion) { + model_weights src = model_init(3); + + tensor conv_dw = ggml_new_tensor_4d(src, GGML_TYPE_F32, 2, 2, 1, 3); // wh1c + ggml_set_name(conv_dw, "conv_dw"); + auto conv_dw_data = std::array{}; + std::iota(conv_dw_data.begin(), conv_dw_data.end(), 1.0f); + conv_dw->data = conv_dw_data.data(); + + tensor conv = ggml_new_tensor_4d(src, GGML_TYPE_F32, 2, 2, 4, 3); // whco + ggml_set_name(conv, "conv"); + auto conv_data = std::array{}; + std::iota(conv_data.begin(), conv_data.end(), 1.0f); + conv->data = conv_data.data(); + + tensor no_conv = ggml_new_tensor_1d(src, GGML_TYPE_F32, 2); + ggml_set_name(no_conv, "no_conv"); + auto no_conv_data = std::array{1.0f, 2.0f}; + no_conv->data = no_conv_data.data(); + + auto conv_weights = std::array{0, 1}; + auto src_layout = tensor_data_layout::whcn; + auto dst_layout = tensor_data_layout::cwhn; + + backend_device dev = backend_init(backend_type::cpu); + model_weights dst = model_init(3); + model_transfer(src, dst, dev, GGML_TYPE_COUNT, src_layout, dst_layout, conv_weights); + + auto conv_dw_expected = std::array{ + 1.0f, 5.0f, 9.0f, // + 2.0f, 6.0f, 10.0f, // + 3.0f, 7.0f, 11.0f, // + 4.0f, 8.0f, 12.0f // + }; + float const* conv_dw_result = (float const*)ggml_get_tensor(dst, "conv_dw")->data; + for (int i = 0; i < int(conv_dw_expected.size()); ++i) { + CHECK_EQUAL(conv_dw_result[i], conv_dw_expected[i]); + } + + auto conv_expected = std::array{ + 1.0f, 5.0f, 9.0f, 13.0f, 2.0f, 6.0f, 10.0f, 14.0f, // + 3.0f, 7.0f, 11.0f, 15.0f, 4.0f, 8.0f, 12.0f, 16.0f, // + + 17.0f, 21.0f, 25.0f, 29.0f, 18.0f, 22.0f, 26.0f, 30.0f, // + 19.0f, 23.0f, 27.0f, 31.0f, 20.0f, 24.0f, 28.0f, 32.0f, // + + 33.0f, 37.0f, 41.0f, 45.0f, 34.0f, 38.0f, 42.0f, 46.0f, // + 35.0f, 39.0f, 43.0f, 47.0f, 36.0f, 40.0f, 44.0f, 48.0f // + }; + float const* conv_result = (float const*)ggml_get_tensor(dst, "conv")->data; + for (int i = 0; i < int(conv_expected.size()); ++i) { + CHECK_EQUAL(conv_result[i], conv_expected[i]); + } + + float const* no_conv_result = (float const*)ggml_get_tensor(dst, "no_conv")->data; + CHECK_EQUAL(no_conv_result[0], 1.0f); + CHECK_EQUAL(no_conv_result[1], 2.0f); +} + +} // namespace visp \ No newline at end of file diff --git a/tests/testing.cpp b/tests/testing.cpp index d92c327..2a9403f 100644 --- a/tests/testing.cpp +++ b/tests/testing.cpp @@ -18,6 +18,8 @@ std::string extra_info; int main(int argc, char** argv) { using namespace visp; + ggml_backend_load_all(); + auto& registry = test_registry_instance(); int passed = 0; @@ -81,6 +83,10 @@ int main(int argc, char** argv) { printf("%s %s\n", verbose ? "" : name, "\033[31mERROR\033[0m"); printf(" \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line); printf(" \033[93m%s\033[0m\n", e.what()); + } catch (...) { + ++errors; + printf("%s %s\n", verbose ? "" : name, "\033[31mERROR\033[0m"); + printf(" \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line); } visp::extra_info.clear(); };