Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion include/visp/ml.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ enum tensor_data_layout { unknown, whcn, cwhn };
//
// Backend device - represents the compute hardware

enum class backend_type { cpu = 1, gpu = 2 };
enum class backend_type {
cpu = 1,
gpu = 2,
vulkan = gpu | 1 << 8,
};

constexpr bool operator&(backend_type a, backend_type b);
VISP_API std::string_view to_string(backend_type);

// True if the backend library is loaded and has at least one supported device.
VISP_API bool backend_is_available(backend_type);
Expand Down Expand Up @@ -83,6 +90,7 @@ struct model_file {

VISP_API int64_t n_tensors() const;
VISP_API std::string_view arch() const;
VISP_API ggml_type float_type() const;
VISP_API tensor_data_layout tensor_layout() const;

VISP_API int64_t key(char const* name) const;
Expand Down Expand Up @@ -282,6 +290,10 @@ VISP_API tensor interpolate(model_ref const&, tensor x, i64x2 target, int32_t mo
//
// implementation

constexpr bool operator&(backend_type a, backend_type b) {
return (int(a) & int(b)) != 0;
}

constexpr model_build_flags operator|(model_build_flag lhs, model_build_flag rhs) {
return model_build_flags(uint32_t(lhs) | uint32_t(rhs));
}
Expand Down
8 changes: 6 additions & 2 deletions src/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,13 @@ std::tuple<model_file, model_weights> load_model_weights(
preferred_layout = file.tensor_layout();
}
model_transfer(file, weights, dev, dev.preferred_float_type(), preferred_layout);

printf("done (%s)\n", t.elapsed_str());
printf("- float type: %s\n", ggml_type_name(weights.float_type()));

ggml_type ftype = file.float_type();
if (ftype == GGML_TYPE_COUNT) {
ftype = weights.float_type();
}
printf("- float type: %s\n", ggml_type_name(ftype));
if (preferred_layout != tensor_data_layout::unknown) {
printf("- tensor layout: %s\n", to_string(preferred_layout));
}
Expand Down
43 changes: 38 additions & 5 deletions src/visp/ml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ namespace visp {
//
// backend

std::string_view to_string(backend_type type) {
switch (type) {
case backend_type::cpu: return "cpu";
case backend_type::gpu: return "gpu";
case backend_type::vulkan: return "vulkan";
default: return "unknown";
}
}

bool load_ggml_backends() {
static const bool loaded = []() {
if (ggml_backend_reg_count() > 0) {
Expand All @@ -37,6 +46,10 @@ bool backend_is_available(backend_type type) {
case backend_type::gpu:
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr;
case backend_type::vulkan: {
ggml_backend_reg_t reg = ggml_backend_reg_by_name("Vulkan");
return reg && ggml_backend_reg_dev_count(reg) > 0;
}
default: ASSERT(false, "Invalid backend type");
}
return false;
Expand All @@ -60,6 +73,7 @@ backend_device backend_init(backend_type type) {
b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr));
break;
case backend_type::gpu:
case backend_type::vulkan:
b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr));
if (!b.handle) {
b.handle.reset(ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU, nullptr));
Expand All @@ -82,15 +96,21 @@ backend_type backend_device::type() const {
switch (ggml_backend_dev_type(dev)) {
case GGML_BACKEND_DEVICE_TYPE_CPU: return backend_type::cpu;
case GGML_BACKEND_DEVICE_TYPE_GPU:
case GGML_BACKEND_DEVICE_TYPE_IGPU: return backend_type::gpu;
case GGML_BACKEND_DEVICE_TYPE_IGPU: {
std::string_view dev_name = ggml_backend_dev_name(dev);
if (dev_name.find("Vulkan") != std::string_view::npos) {
return backend_type::vulkan;
}
return backend_type::gpu;
}
default: ASSERT(false, "Unsupported backend device type"); return backend_type::cpu;
}
}

typedef bool (*ggml_backend_dev_supports_f16_t)(ggml_backend_dev_t);

ggml_type backend_device::preferred_float_type() const {
if (type() == backend_type::cpu) {
if (type() & backend_type::cpu) {
return GGML_TYPE_F32; // not all operations support F16
} else {
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(device);
Expand All @@ -105,7 +125,7 @@ ggml_type backend_device::preferred_float_type() const {
}

tensor_data_layout backend_device::preferred_layout() const {
if (type() == backend_type::cpu) {
if (type() & backend_type::cpu) {
return tensor_data_layout::cwhn;
}
return tensor_data_layout::unknown; // no preference, keep model weight layout
Expand All @@ -120,7 +140,10 @@ size_t backend_device::total_memory() const {

size_t backend_device::max_alloc() const {
const size_t vulkan_max = 4 * 1024 * 1024 * 1024ULL; // TODO: query from backend
return type() == backend_type::cpu ? SIZE_MAX : vulkan_max;
switch (type()) {
case backend_type::vulkan: return vulkan_max;
default: return SIZE_MAX;
}
}

void backend_set_n_threads(backend_device& b, int n_threads) {
Expand Down Expand Up @@ -154,7 +177,8 @@ model_build_flags backend_default_flags(backend_type type) {
case backend_type::cpu:
return conv_2d_direct_cwhn | concat_n | f16_conv_transpose | window_partition |
flash_attn_flag(false);
case backend_type::gpu: return flash_attn_flag(true);
case backend_type::gpu:
case backend_type::vulkan: return flash_attn_flag(true);
}
return {};
}
Expand Down Expand Up @@ -227,6 +251,15 @@ std::string_view model_file::arch() const {
return get_string("general.architecture");
}

ggml_type model_file::float_type() const {
if (int64_t key_id = gguf_find_key(gguf.get(), "general.file_type"); key_id != -1) {
if (gguf_get_kv_type(gguf.get(), key_id) == GGUF_TYPE_UINT32) {
return (ggml_type)gguf_get_val_u32(gguf.get(), key_id);
}
}
return GGML_TYPE_COUNT;
}

tensor_data_layout model_file::tensor_layout() const {
fixed_string<64> str;
int64_t key = gguf_find_key(gguf.get(), format(str, "{}.tensor_data_layout", arch()));
Expand Down
8 changes: 5 additions & 3 deletions tests/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ bench_timings run_benchmark(
int iterations,
std::vector<input_transfer> const& transfers = {}) {

if (backend.type() == backend_type::gpu) {
if (backend.type() & backend_type::gpu) {
iterations *= 4;
}

Expand Down Expand Up @@ -139,10 +139,12 @@ backend_device initialize_backend(std::string_view backend_type) {
backend_device cpu = backend_init(backend_type::cpu);
backend_set_n_threads(cpu, (int)std::thread::hardware_concurrency());
return cpu;
} else if (backend_type == "vulkan") {
return backend_init(backend_type::vulkan);
} else if (backend_type == "gpu") {
return backend_init(backend_type::gpu);
} else {
throw std::invalid_argument("Invalid backend type. Use 'cpu' or 'gpu'.");
throw std::invalid_argument("Invalid backend type. Use 'cpu', 'gpu' or 'vulkan'.");
}
}

Expand All @@ -159,7 +161,7 @@ bench_result benchmark_model(
bench_result result;
result.arch = arch;
result.model = model;
result.backend = backend.type() == backend_type::cpu ? "cpu" : "gpu";
result.backend = to_string(backend.type());

auto select_model = [&](std::string_view model, std::string_view fallback) {
if (model.empty()) {
Expand Down
7 changes: 7 additions & 0 deletions tests/test-ml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@

namespace visp {

VISP_TEST(backend_available) {
CHECK(backend_is_available(backend_type::cpu));
if (backend_is_available(backend_type::gpu)) {
CHECK(backend_is_available(backend_type::vulkan));
}
}

VISP_TEST(model_transfer_type_conversion) {
model_weights src = model_init(2);

Expand Down