diff --git a/include/visp/ml.h b/include/visp/ml.h
index 2a3826d..4b9af39 100644
--- a/include/visp/ml.h
+++ b/include/visp/ml.h
@@ -128,6 +128,15 @@ VISP_API void model_transfer(
     ggml_type float_type = GGML_TYPE_COUNT,
     tensor_data_layout = tensor_data_layout::unknown);
 
+VISP_API void model_transfer(
+    ggml_context* const& src_ctx,
+    model_weights& weights,
+    backend_device const& device,
+    ggml_type float_type = GGML_TYPE_COUNT,
+    tensor_data_layout src_layout = tensor_data_layout::unknown,
+    tensor_data_layout dst_layout = tensor_data_layout::unknown,
+    span<int32_t const> conv2d_weights = {});
+
 //
 // Compute graph - wrapper for ggml_cgraph and its associated backend memory
 
diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp
index dbc6a1c..1ac564c 100644
--- a/src/visp/ml.cpp
+++ b/src/visp/ml.cpp
@@ -361,55 +361,74 @@ span<int32_t const> find_conv2d_weight_indices(model_file const& f) {
 } // namespace
 
 void model_transfer(
-    model_file const& file,
+    ggml_context* const& src_ctx,
     model_weights& weights,
     backend_device const& device,
     ggml_type float_type,
-    tensor_data_layout layout) {
+    tensor_data_layout src_layout,
+    tensor_data_layout dst_layout,
+    span<int32_t const> conv2d_weights) {
 
-    gguf_context* gguf = file.gguf.get();
-    ggml_context* src_ctx = file.data.get();
     ggml_context* dst_ctx = weights.context.get();
-
-    tensor_data_layout file_layout = file.tensor_layout();
-    bool to_cwhn = file_layout == tensor_data_layout::whcn && layout == tensor_data_layout::cwhn;
+    bool to_cwhn = src_layout == tensor_data_layout::whcn && dst_layout == tensor_data_layout::cwhn;
     tensor_converter convert(src_ctx, float_type, to_cwhn);
-    // Try to find a list of tensor indices which are weights of 2D operations
-    span<int32_t const> conv2d_weights = find_conv2d_weight_indices(file);
 
-    for (int64_t i = 0, conv2d_idx = 0; i < gguf_get_n_tensors(gguf); ++i) {
-        auto name = gguf_get_tensor_name(gguf, i);
-        tensor orig = ggml_get_tensor(src_ctx, name); // TODO: don't use name lookup
+    tensor orig = ggml_get_first_tensor(src_ctx);
+    for (int64_t i = 0, conv2d_idx = 0; orig;) {
+        if (strncmp(orig->name, "GGUF", 4) == 0) {
+            orig = ggml_get_next_tensor(src_ctx, orig); // skip "GGUF tensor data binary blob"
+            continue; // (why is there no way to iterate over GGUF tensors directly?)
+        }
         auto ne = nelements(orig);
         if (to_cwhn && conv2d_idx < ssize(conv2d_weights) && conv2d_weights[conv2d_idx] == i) {
             permute_whcn_to_cwhn(ne.data(), ne[2] == 1);
             ++conv2d_idx;
         }
         tensor dup = ggml_new_tensor(dst_ctx, convert.target_type(orig), GGML_MAX_DIMS, ne.data());
-        ggml_set_name(dup, name);
+        ggml_set_name(dup, ggml_get_name(orig));
+        orig = ggml_get_next_tensor(src_ctx, orig);
+        ++i;
     }
 
     ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(dst_ctx, device);
     weights.weights_buffer = ggml_backend_buffer_ptr(buffer);
     weights.buffer_type = device.type();
-    weights.flags = model_get_build_flags(file);
     if (to_cwhn) {
         weights.flags |= model_build_flag::cwhn;
     }
 
-    ggml_tensor* t = ggml_get_first_tensor(dst_ctx);
-    for (int i = 0, conv2d_idx = 0; t; ++i) {
-        tensor data_tensor = ggml_get_tensor(src_ctx, ggml_get_name(t));
+    tensor src = ggml_get_first_tensor(src_ctx);
+    tensor dst = ggml_get_first_tensor(dst_ctx);
+    for (int i = 0, conv2d_idx = 0; src && dst;) {
+        if (strncmp(src->name, "GGUF", 4) == 0) {
+            src = ggml_get_next_tensor(src_ctx, src);
+            continue; // skip "GGUF tensor data binary blob"
+        }
         bool is_2d = conv2d_idx < int(conv2d_weights.size()) && conv2d_weights[conv2d_idx] == i;
         if (is_2d) {
             ++conv2d_idx;
         }
-        void const* data = convert(data_tensor, t, is_2d && to_cwhn);
-        ggml_backend_tensor_set(t, data, 0, ggml_nbytes(t));
-        t = ggml_get_next_tensor(dst_ctx, t);
+        void const* data = convert(src, dst, is_2d && to_cwhn);
+        ggml_backend_tensor_set(dst, data, 0, ggml_nbytes(dst));
+        src = ggml_get_next_tensor(src_ctx, src);
+        dst = ggml_get_next_tensor(dst_ctx, dst);
+        ++i;
     }
 }
 
+void model_transfer(
+    model_file const& file,
+    model_weights& weights,
+    backend_device const& device,
+    ggml_type float_type,
+    tensor_data_layout layout) {
+
+    weights.flags = model_get_build_flags(file);
+    model_transfer(
+        file.data.get(), weights, device, float_type, file.tensor_layout(), layout,
+        find_conv2d_weight_indices(file));
+}
+
 ggml_type model_weights::float_type() const {
     for (ggml_tensor* t = ggml_get_first_tensor(context.get()); t != nullptr;
          t = ggml_get_next_tensor(context.get(), t)) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 6d06f46..c3c8a5f 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -4,6 +4,7 @@ add_executable(test-vision)
 target_sources(test-vision PRIVATE
   testing.cpp
   test-image.cpp
+  test-ml.cpp
 )
 target_include_directories(test-vision PRIVATE . ../src)
 target_compile_definitions(test-vision PRIVATE ${VISP_ASSERT} ${VISP_DEFINITIONS})
diff --git a/tests/test-image.cpp b/tests/test-image.cpp
index 25fc10b..85a94c6 100644
--- a/tests/test-image.cpp
+++ b/tests/test-image.cpp
@@ -252,6 +252,34 @@ VISP_TEST(image_blur) {
     CHECK_IMAGES_EQUAL(output, expected);
 }
 
+VISP_TEST(image_erosion) {
+    constexpr i32x2 extent{6, 6};
+    std::array<float, extent[0] * extent[1]> input_data = {
+        0.0f, 0.5f, 0.0f, 0.0f, 0.0f, 0.0f, //
+        0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, //
+        0.8f, 0.8f, 1.0f, 0.5f, 0.0f, 1.0f, //
+        0.8f, 0.8f, 1.0f, 0.5f, 0.5f, 0.0f, //
+        0.8f, 1.0f, 1.0f, 0.5f, 0.5f, 0.0f, //
+        0.0f, 1.0f, 1.0f, 0.2f, 0.5f, 0.0f  //
+    };
+    std::array<float, extent[0] * extent[1]> expected_data = {
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, //
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, //
+        0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, //
+        0.8f, 0.8f, 0.5f, 0.0f, 0.0f, 0.0f, //
+        0.0f, 0.0f, 0.2f, 0.2f, 0.0f, 0.0f, //
+        0.0f, 0.0f, 0.2f, 0.2f, 0.0f, 0.0f  //
+    };
+    std::array<float, extent[0] * extent[1]> output_data{};
+
+    auto input = image_view(extent, input_data);
+    auto output = image_span(extent, output_data);
+    image_erosion(input, output, 1);
+
+    auto expected = image_view(extent, expected_data);
+    CHECK_IMAGES_EQUAL(output, expected);
+}
+
 VISP_TEST(tile_merge) {
     std::array<std::array<f32x3, 5 * 5>, 4> tiles;
     for (int t = 0; t < 4; ++t) {
diff --git a/tests/test-ml.cpp b/tests/test-ml.cpp
new file mode 100644
index 0000000..949adcd
--- /dev/null
+++ b/tests/test-ml.cpp
@@ -0,0 +1,95 @@
+#include "testing.h"
+#include "visp/ml.h"
+
+#include <numeric>
+
+namespace visp {
+
+VISP_TEST(model_transfer_type_conversion) {
+    model_weights src = model_init(2);
+
+    tensor i = ggml_new_tensor_1d(src, GGML_TYPE_I32, 2);
+    ggml_set_name(i, "i32_tensor");
+    auto i32_data = std::array{4, -1};
+    i->data = i32_data.data();
+
+    tensor f = ggml_new_tensor_1d(src, GGML_TYPE_F16, 2);
+    ggml_set_name(f, "f16_tensor");
+    auto f16_data = std::array{ggml_fp32_to_fp16(2.5f), ggml_fp32_to_fp16(-0.5f)};
+    f->data = f16_data.data();
+
+    backend_device dev = backend_init(backend_type::cpu);
+    model_weights dst = model_init(2);
+    model_transfer(src, dst, dev, GGML_TYPE_F32); // f16 -> f32 conversion
+
+    int32_t const* i32_result = (int32_t const*)ggml_get_tensor(dst, "i32_tensor")->data;
+    CHECK_EQUAL(i32_result[0], 4);
+    CHECK_EQUAL(i32_result[1], -1);
+
+    tensor f_result = ggml_get_tensor(dst, "f16_tensor");
+    CHECK(f_result->type == GGML_TYPE_F32);
+    float const* f32_result = (float const*)f_result->data;
+    CHECK_EQUAL(f32_result[0], 2.5f);
+    CHECK_EQUAL(f32_result[1], -0.5f);
+}
+
+VISP_TEST(model_transfer_layout_conversion) {
+    model_weights src = model_init(3);
+
+    tensor conv_dw = ggml_new_tensor_4d(src, GGML_TYPE_F32, 2, 2, 1, 3); // wh1c
+    ggml_set_name(conv_dw, "conv_dw");
+    auto conv_dw_data = std::array<float, 2 * 2 * 1 * 3>{};
+    std::iota(conv_dw_data.begin(), conv_dw_data.end(), 1.0f);
+    conv_dw->data = conv_dw_data.data();
+
+    tensor conv = ggml_new_tensor_4d(src, GGML_TYPE_F32, 2, 2, 4, 3); // whco
+    ggml_set_name(conv, "conv");
+    auto conv_data = std::array<float, 2 * 2 * 3 * 4>{};
+    std::iota(conv_data.begin(), conv_data.end(), 1.0f);
+    conv->data = conv_data.data();
+
+    tensor no_conv = ggml_new_tensor_1d(src, GGML_TYPE_F32, 2);
+    ggml_set_name(no_conv, "no_conv");
+    auto no_conv_data = std::array<float, 2>{1.0f, 2.0f};
+    no_conv->data = no_conv_data.data();
+
+    auto conv_weights = std::array{0, 1};
+    auto src_layout = tensor_data_layout::whcn;
+    auto dst_layout = tensor_data_layout::cwhn;
+
+    backend_device dev = backend_init(backend_type::cpu);
+    model_weights dst = model_init(3);
+    model_transfer(src, dst, dev, GGML_TYPE_COUNT, src_layout, dst_layout, conv_weights);
+
+    auto conv_dw_expected = std::array{
+        1.0f, 5.0f, 9.0f,  //
+        2.0f, 6.0f, 10.0f, //
+        3.0f, 7.0f, 11.0f, //
+        4.0f, 8.0f, 12.0f  //
+    };
+    float const* conv_dw_result = (float const*)ggml_get_tensor(dst, "conv_dw")->data;
+    for (int i = 0; i < int(conv_dw_expected.size()); ++i) {
+        CHECK_EQUAL(conv_dw_result[i], conv_dw_expected[i]);
+    }
+
+    auto conv_expected = std::array{
+        1.0f,  5.0f,  9.0f,  13.0f, 2.0f, 6.0f, 10.0f, 14.0f, //
+        3.0f,  7.0f,  11.0f, 15.0f, 4.0f, 8.0f, 12.0f, 16.0f, //
+
+        17.0f,  21.0f,  25.0f, 29.0f, 18.0f, 22.0f, 26.0f, 30.0f, //
+        19.0f, 23.0f, 27.0f, 31.0f, 20.0f, 24.0f, 28.0f, 32.0f, //
+
+        33.0f, 37.0f, 41.0f, 45.0f, 34.0f, 38.0f, 42.0f, 46.0f, //
+        35.0f, 39.0f, 43.0f, 47.0f, 36.0f, 40.0f, 44.0f, 48.0f  //
+    };
+    float const* conv_result = (float const*)ggml_get_tensor(dst, "conv")->data;
+    for (int i = 0; i < int(conv_expected.size()); ++i) {
+        CHECK_EQUAL(conv_result[i], conv_expected[i]);
+    }
+
+    float const* no_conv_result = (float const*)ggml_get_tensor(dst, "no_conv")->data;
+    CHECK_EQUAL(no_conv_result[0], 1.0f);
+    CHECK_EQUAL(no_conv_result[1], 2.0f);
+}
+
+} // namespace visp
\ No newline at end of file
diff --git a/tests/testing.cpp b/tests/testing.cpp
index d92c327..2a9403f 100644
--- a/tests/testing.cpp
+++ b/tests/testing.cpp
@@ -18,6 +18,8 @@ std::string extra_info;
 int main(int argc, char** argv) {
     using namespace visp;
 
+    ggml_backend_load_all();
+
     auto& registry = test_registry_instance();
 
     int passed = 0;
@@ -81,6 +83,10 @@ int main(int argc, char** argv) {
             printf("%s %s\n", verbose ? "" : name, "\033[31mERROR\033[0m");
             printf("  \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line);
             printf("  \033[93m%s\033[0m\n", e.what());
+        } catch (...) {
+            ++errors;
+            printf("%s %s\n", verbose ? "" : name, "\033[31mERROR\033[0m");
+            printf("  \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line);
         }
         visp::extra_info.clear();
     };