From 653248a51e85e98f43aee06ca92e089e1003d5b3 Mon Sep 17 00:00:00 2001
From: Acly <aclysia@gmail.com>
Date: Fri, 17 Oct 2025 10:30:03 +0200
Subject: [PATCH 1/2] ml: add model_file::float_type() which reads type from
 GGUF metadata

---
 include/visp/ml.h | 1 +
 src/cli/cli.cpp   | 8 ++++++--
 src/visp/ml.cpp   | 9 +++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/visp/ml.h b/include/visp/ml.h
index 93a0af1..98e60c0 100644
--- a/include/visp/ml.h
+++ b/include/visp/ml.h
@@ -83,6 +83,7 @@ struct model_file {
 
     VISP_API int64_t n_tensors() const;
     VISP_API std::string_view arch() const;
+    VISP_API ggml_type float_type() const;
     VISP_API tensor_data_layout tensor_layout() const;
 
     VISP_API int64_t key(char const* name) const;
diff --git a/src/cli/cli.cpp b/src/cli/cli.cpp
index fc7f2a1..81360de 100644
--- a/src/cli/cli.cpp
+++ b/src/cli/cli.cpp
@@ -262,9 +262,13 @@ std::tuple<model_file, model_weights> load_model_weights(
         preferred_layout = file.tensor_layout();
     }
     model_transfer(file, weights, dev, dev.preferred_float_type(), preferred_layout);
-
     printf("done (%s)\n", t.elapsed_str());
-    printf("- float type: %s\n", ggml_type_name(weights.float_type()));
+
+    ggml_type ftype = file.float_type();
+    if (ftype == GGML_TYPE_COUNT) {
+        ftype = weights.float_type();
+    }
+    printf("- float type: %s\n", ggml_type_name(ftype));
     if (preferred_layout != tensor_data_layout::unknown) {
         printf("- tensor layout: %s\n", to_string(preferred_layout));
     }
diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp
index ad5ae9e..65bed3f 100644
--- a/src/visp/ml.cpp
+++ b/src/visp/ml.cpp
@@ -227,6 +227,15 @@ std::string_view model_file::arch() const {
     return get_string("general.architecture");
 }
 
+ggml_type model_file::float_type() const {
+    if (int64_t key_id = gguf_find_key(gguf.get(), "general.file_type"); key_id != -1) {
+        if (gguf_get_kv_type(gguf.get(), key_id) == GGUF_TYPE_UINT32) {
+            return (ggml_type)gguf_get_val_u32(gguf.get(), key_id);
+        }
+    }
+    return GGML_TYPE_COUNT;
+}
+
 tensor_data_layout model_file::tensor_layout() const {
     fixed_string<64> str;
     int64_t key = gguf_find_key(gguf.get(), format(str, "{}.tensor_data_layout", arch()));

From 25dc41818237b8cb16f2bb8743a2db5bec1a10fe Mon Sep 17 00:00:00 2001
From: Acly <aclysia@gmail.com>
Date: Fri, 17 Oct 2025 11:32:20 +0200
Subject: [PATCH 2/2] ml: extend backend_type to allow selecting specific
 backends in the future

---
 include/visp/ml.h   | 13 ++++++++++++-
 src/visp/ml.cpp     | 34 +++++++++++++++++++++++++++++-----
 tests/benchmark.cpp |  8 +++++---
 tests/test-ml.cpp   |  7 +++++++
 4 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/include/visp/ml.h b/include/visp/ml.h
index 98e60c0..cb9e2a7 100644
--- a/include/visp/ml.h
+++ b/include/visp/ml.h
@@ -29,7 +29,14 @@ enum tensor_data_layout { unknown, whcn, cwhn };
 //
 // Backend device - represents the compute hardware
 
-enum class backend_type { cpu = 1, gpu = 2 };
+enum class backend_type {
+    cpu = 1,
+    gpu = 2,
+    vulkan = gpu | 1 << 8,
+};
+
+constexpr bool operator&(backend_type a, backend_type b);
+VISP_API std::string_view to_string(backend_type);
 
 // True if the backend library is loaded and has at least one supported device.
 VISP_API bool backend_is_available(backend_type);
@@ -283,6 +290,10 @@ VISP_API tensor interpolate(model_ref const&, tensor x, i64x2 target, int32_t mo
 //
 // implementation
 
+constexpr bool operator&(backend_type a, backend_type b) {
+    return (int(a) & int(b)) != 0;
+}
+
 constexpr model_build_flags operator|(model_build_flag lhs, model_build_flag rhs) {
     return model_build_flags(uint32_t(lhs) | uint32_t(rhs));
 }
diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp
index 65bed3f..107cbc6 100644
--- a/src/visp/ml.cpp
+++ b/src/visp/ml.cpp
@@ -12,6 +12,15 @@ namespace visp {
 //
 // backend
 
+std::string_view to_string(backend_type type) {
+    switch (type) {
+        case backend_type::cpu: return "cpu";
+        case backend_type::gpu: return "gpu";
+        case backend_type::vulkan: return "vulkan";
+        default: return "unknown";
+    }
+}
+
 bool load_ggml_backends() {
     static const bool loaded = []() {
         if (ggml_backend_reg_count() > 0) {
@@ -37,6 +46,10 @@ bool backend_is_available(backend_type type) {
         case backend_type::gpu:
             return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
                 ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr;
+        case backend_type::vulkan: {
+            ggml_backend_reg_t reg = ggml_backend_reg_by_name("Vulkan");
+            return reg && ggml_backend_reg_dev_count(reg) > 0;
+        }
         default: ASSERT(false, "Invalid backend type");
     }
     return false;
@@ -60,6 +73,7 @@ backend_device backend_init(backend_type type) {
             b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr));
             break;
         case backend_type::gpu:
+        case backend_type::vulkan:
             b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr));
             if (!b.handle) {
                 b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU, nullptr));
@@ -82,7 +96,13 @@ backend_type backend_device::type() const {
     switch (ggml_backend_dev_type(dev)) {
         case GGML_BACKEND_DEVICE_TYPE_CPU: return backend_type::cpu;
         case GGML_BACKEND_DEVICE_TYPE_GPU:
-        case GGML_BACKEND_DEVICE_TYPE_IGPU: return backend_type::gpu;
+        case GGML_BACKEND_DEVICE_TYPE_IGPU: {
+            std::string_view dev_name = ggml_backend_dev_name(dev);
+            if (dev_name.find("Vulkan") != std::string_view::npos) {
+                return backend_type::vulkan;
+            }
+            return backend_type::gpu;
+        }
         default: ASSERT(false, "Unsupported backend device type"); return backend_type::cpu;
     }
 }
@@ -90,7 +110,7 @@ backend_type backend_device::type() const {
 typedef bool (*ggml_backend_dev_supports_f16_t)(ggml_backend_dev_t);
 
 ggml_type backend_device::preferred_float_type() const {
-    if (type() == backend_type::cpu) {
+    if (type() & backend_type::cpu) {
         return GGML_TYPE_F32; // not all operations support F16
     } else {
         ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(device);
@@ -105,7 +125,7 @@ ggml_type backend_device::preferred_float_type() const {
 }
 
 tensor_data_layout backend_device::preferred_layout() const {
-    if (type() == backend_type::cpu) {
+    if (type() & backend_type::cpu) {
         return tensor_data_layout::cwhn;
     }
     return tensor_data_layout::unknown; // no preference, keep model weight layout
@@ -120,7 +140,10 @@ size_t backend_device::total_memory() const {
 
 size_t backend_device::max_alloc() const {
     const size_t vulkan_max = 4 * 1024 * 1024 * 1024ULL; // TODO: query from backend
-    return type() == backend_type::cpu ? SIZE_MAX : vulkan_max;
+    switch (type()) {
+        case backend_type::vulkan: return vulkan_max;
+        default: return SIZE_MAX;
+    }
 }
 
 void backend_set_n_threads(backend_device& b, int n_threads) {
@@ -154,7 +177,8 @@ model_build_flags backend_default_flags(backend_type type) {
         case backend_type::cpu:
             return conv_2d_direct_cwhn | concat_n | f16_conv_transpose | window_partition |
                 flash_attn_flag(false);
-        case backend_type::gpu: return flash_attn_flag(true);
+        case backend_type::gpu:
+        case backend_type::vulkan: return flash_attn_flag(true);
     }
     return {};
 }
diff --git a/tests/benchmark.cpp b/tests/benchmark.cpp
index d10bcfb..57123b0 100644
--- a/tests/benchmark.cpp
+++ b/tests/benchmark.cpp
@@ -33,7 +33,7 @@ bench_timings run_benchmark(
     int iterations,
     std::vector<input_transfer> const& transfers = {}) {
 
-    if (backend.type() == backend_type::gpu) {
+    if (backend.type() & backend_type::gpu) {
         iterations *= 4;
     }
 
@@ -139,10 +139,12 @@ backend_device initialize_backend(std::string_view backend_type) {
         backend_device cpu = backend_init(backend_type::cpu);
         backend_set_n_threads(cpu, (int)std::thread::hardware_concurrency());
         return cpu;
+    } else if (backend_type == "vulkan") {
+        return backend_init(backend_type::vulkan);
     } else if (backend_type == "gpu") {
         return backend_init(backend_type::gpu);
     } else {
-        throw std::invalid_argument("Invalid backend type. Use 'cpu' or 'gpu'.");
+        throw std::invalid_argument("Invalid backend type. Use 'cpu', 'gpu' or 'vulkan'.");
     }
 }
 
@@ -159,7 +161,7 @@ bench_result benchmark_model(
     bench_result result;
     result.arch = arch;
     result.model = model;
-    result.backend = backend.type() == backend_type::cpu ? "cpu" : "gpu";
+    result.backend = to_string(backend.type());
 
     auto select_model = [&](std::string_view model, std::string_view fallback) {
         if (model.empty()) {
diff --git a/tests/test-ml.cpp b/tests/test-ml.cpp
index 949adcd..6649ae3 100644
--- a/tests/test-ml.cpp
+++ b/tests/test-ml.cpp
@@ -5,6 +5,13 @@
 
 namespace visp {
 
+VISP_TEST(backend_available) {
+    CHECK(backend_is_available(backend_type::cpu));
+    if (backend_is_available(backend_type::gpu)) {
+        CHECK(backend_is_available(backend_type::vulkan));
+    }
+}
+
 VISP_TEST(model_transfer_type_conversion) {
     model_weights src = model_init(2);