From 653248a51e85e98f43aee06ca92e089e1003d5b3 Mon Sep 17 00:00:00 2001 From: Acly Date: Fri, 17 Oct 2025 10:30:03 +0200 Subject: [PATCH 1/2] ml: add model_file::float_type() which reads type from GGUF metadata --- include/visp/ml.h | 1 + src/cli/cli.cpp | 8 ++++++-- src/visp/ml.cpp | 9 +++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/visp/ml.h b/include/visp/ml.h index 93a0af1..98e60c0 100644 --- a/include/visp/ml.h +++ b/include/visp/ml.h @@ -83,6 +83,7 @@ struct model_file { VISP_API int64_t n_tensors() const; VISP_API std::string_view arch() const; + VISP_API ggml_type float_type() const; VISP_API tensor_data_layout tensor_layout() const; VISP_API int64_t key(char const* name) const; diff --git a/src/cli/cli.cpp b/src/cli/cli.cpp index fc7f2a1..81360de 100644 --- a/src/cli/cli.cpp +++ b/src/cli/cli.cpp @@ -262,9 +262,13 @@ std::tuple load_model_weights( preferred_layout = file.tensor_layout(); } model_transfer(file, weights, dev, dev.preferred_float_type(), preferred_layout); - printf("done (%s)\n", t.elapsed_str()); - printf("- float type: %s\n", ggml_type_name(weights.float_type())); + + ggml_type ftype = file.float_type(); + if (ftype == GGML_TYPE_COUNT) { + ftype = weights.float_type(); + } + printf("- float type: %s\n", ggml_type_name(ftype)); if (preferred_layout != tensor_data_layout::unknown) { printf("- tensor layout: %s\n", to_string(preferred_layout)); } diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp index ad5ae9e..65bed3f 100644 --- a/src/visp/ml.cpp +++ b/src/visp/ml.cpp @@ -227,6 +227,15 @@ std::string_view model_file::arch() const { return get_string("general.architecture"); } +ggml_type model_file::float_type() const { + if (int64_t key_id = gguf_find_key(gguf.get(), "general.file_type"); key_id != -1) { + if (gguf_get_kv_type(gguf.get(), key_id) == GGUF_TYPE_UINT32) { + return (ggml_type)gguf_get_val_u32(gguf.get(), key_id); + } + } + return GGML_TYPE_COUNT; +} + tensor_data_layout model_file::tensor_layout() const { fixed_string<64> str; int64_t key = gguf_find_key(gguf.get(), format(str, "{}.tensor_data_layout", arch())); From 25dc41818237b8cb16f2bb8743a2db5bec1a10fe Mon Sep 17 00:00:00 2001 From: Acly Date: Fri, 17 Oct 2025 11:32:20 +0200 Subject: [PATCH 2/2] ml: extend backend_type to allow selecting specific backends in the future --- include/visp/ml.h | 13 ++++++++++++- src/visp/ml.cpp | 34 +++++++++++++++++++++++++++++----- tests/benchmark.cpp | 8 +++++--- tests/test-ml.cpp | 7 +++++++ 4 files changed, 53 insertions(+), 9 deletions(-) diff --git a/include/visp/ml.h b/include/visp/ml.h index 98e60c0..cb9e2a7 100644 --- a/include/visp/ml.h +++ b/include/visp/ml.h @@ -29,7 +29,14 @@ enum tensor_data_layout { unknown, whcn, cwhn }; // // Backend device - represents the compute hardware -enum class backend_type { cpu = 1, gpu = 2 }; +enum class backend_type { + cpu = 1, + gpu = 2, + vulkan = gpu | 1 << 8, +}; + +constexpr bool operator&(backend_type a, backend_type b); +VISP_API std::string_view to_string(backend_type); // True if the backend library is loaded and has at least one supported device. VISP_API bool backend_is_available(backend_type); @@ -283,6 +290,10 @@ VISP_API tensor interpolate(model_ref const&, tensor x, i64x2 target, int32_t mo // // implementation +constexpr bool operator&(backend_type a, backend_type b) { + return (int(a) & int(b)) != 0; +} + constexpr model_build_flags operator|(model_build_flag lhs, model_build_flag rhs) { return model_build_flags(uint32_t(lhs) | uint32_t(rhs)); } diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp index 65bed3f..107cbc6 100644 --- a/src/visp/ml.cpp +++ b/src/visp/ml.cpp @@ -12,6 +12,15 @@ namespace visp { // // backend +std::string_view to_string(backend_type type) { + switch (type) { + case backend_type::cpu: return "cpu"; + case backend_type::gpu: return "gpu"; + case backend_type::vulkan: return "vulkan"; + default: return "unknown"; + } +} + bool load_ggml_backends() { static const bool loaded = []() { if (ggml_backend_reg_count() > 0) { @@ -37,6 +46,10 @@ bool backend_is_available(backend_type type) { case backend_type::gpu: return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr || ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr; + case backend_type::vulkan: { + ggml_backend_reg_t reg = ggml_backend_reg_by_name("Vulkan"); + return reg && ggml_backend_reg_dev_count(reg) > 0; + } default: ASSERT(false, "Invalid backend type"); } return false; @@ -60,6 +73,7 @@ backend_device backend_init(backend_type type) { b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr)); break; case backend_type::gpu: + case backend_type::vulkan: b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)); if (!b.handle) { b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU, nullptr)); @@ -82,7 +96,13 @@ backend_type backend_device::type() const { switch (ggml_backend_dev_type(dev)) { case GGML_BACKEND_DEVICE_TYPE_CPU: return backend_type::cpu; case GGML_BACKEND_DEVICE_TYPE_GPU: - case GGML_BACKEND_DEVICE_TYPE_IGPU: return backend_type::gpu; + case GGML_BACKEND_DEVICE_TYPE_IGPU: { + std::string_view dev_name = ggml_backend_dev_name(dev); + if (dev_name.find("Vulkan") != std::string_view::npos) { + return backend_type::vulkan; + } + return backend_type::gpu; + } default: ASSERT(false, "Unsupported backend device type"); return backend_type::cpu; } } @@ -90,7 +110,7 @@ backend_type backend_device::type() const { typedef bool (*ggml_backend_dev_supports_f16_t)(ggml_backend_dev_t); ggml_type backend_device::preferred_float_type() const { - if (type() == backend_type::cpu) { + if (type() & backend_type::cpu) { return GGML_TYPE_F32; // not all operations support F16 } else { ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(device); @@ -105,7 +125,7 @@ ggml_type backend_device::preferred_float_type() const { } tensor_data_layout backend_device::preferred_layout() const { - if (type() == backend_type::cpu) { + if (type() & backend_type::cpu) { return tensor_data_layout::cwhn; } return tensor_data_layout::unknown; // no preference, keep model weight layout @@ -120,7 +140,10 @@ size_t backend_device::total_memory() const { size_t backend_device::max_alloc() const { const size_t vulkan_max = 4 * 1024 * 1024 * 1024ULL; // TODO: query from backend - return type() == backend_type::cpu ? SIZE_MAX : vulkan_max; + switch (type()) { + case backend_type::vulkan: return vulkan_max; + default: return SIZE_MAX; + } } void backend_set_n_threads(backend_device& b, int n_threads) { @@ -154,7 +177,8 @@ model_build_flags backend_default_flags(backend_type type) { case backend_type::cpu: return conv_2d_direct_cwhn | concat_n | f16_conv_transpose | window_partition | flash_attn_flag(false); - case backend_type::gpu: return flash_attn_flag(true); + case backend_type::gpu: + case backend_type::vulkan: return flash_attn_flag(true); } return {}; } diff --git a/tests/benchmark.cpp b/tests/benchmark.cpp index d10bcfb..57123b0 100644 --- a/tests/benchmark.cpp +++ b/tests/benchmark.cpp @@ -33,7 +33,7 @@ bench_timings run_benchmark( int iterations, std::vector const& transfers = {}) { - if (backend.type() == backend_type::gpu) { + if (backend.type() & backend_type::gpu) { iterations *= 4; } @@ -139,10 +139,12 @@ backend_device initialize_backend(std::string_view backend_type) { backend_device cpu = backend_init(backend_type::cpu); backend_set_n_threads(cpu, (int)std::thread::hardware_concurrency()); return cpu; + } else if (backend_type == "vulkan") { + return backend_init(backend_type::vulkan); } else if (backend_type == "gpu") { return backend_init(backend_type::gpu); } else { - throw std::invalid_argument("Invalid backend type. Use 'cpu' or 'gpu'."); + throw std::invalid_argument("Invalid backend type. Use 'cpu', 'gpu' or 'vulkan'."); } } @@ -159,7 +161,7 @@ bench_result benchmark_model( bench_result result; result.arch = arch; result.model = model; - result.backend = backend.type() == backend_type::cpu ? "cpu" : "gpu"; + result.backend = to_string(backend.type()); auto select_model = [&](std::string_view model, std::string_view fallback) { if (model.empty()) { diff --git a/tests/test-ml.cpp b/tests/test-ml.cpp index 949adcd..6649ae3 100644 --- a/tests/test-ml.cpp +++ b/tests/test-ml.cpp @@ -5,6 +5,13 @@ namespace visp { +VISP_TEST(backend_available) { + CHECK(backend_is_available(backend_type::cpu)); + if (backend_is_available(backend_type::gpu)) { + CHECK(backend_is_available(backend_type::vulkan)); + } +} + VISP_TEST(model_transfer_type_conversion) { model_weights src = model_init(2);