diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..b713a40
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,97 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        os: [ubuntu-22.04, windows-latest, macos-14]
+
+    runs-on: ${{ matrix.os }}
+
+    name: Build & Test on ${{ matrix.os }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - name: Dependencies (Linux)
+        if: matrix.os == 'ubuntu-22.04'
+        run: |
+          wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
+          sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
+          sudo apt-get update -y
+          sudo apt-get install -y build-essential cmake g++ ninja-build mesa-vulkan-drivers vulkan-sdk
+
+      - name: Dependencies (Windows)
+        if: matrix.os == 'windows-latest'
+        uses: microsoft/setup-msbuild@v2
+
+      - name: Configure (Linux)
+        if: matrix.os == 'ubuntu-22.04'
+        run: >
+          cmake . -B build -G Ninja
+          -D CMAKE_BUILD_TYPE=Release
+          -D VISP_CI=ON
+          -D VISP_VULKAN=ON
+          -D VISP_FMT_LIB=ON
+
+      - name: Configure (Windows)
+        if: matrix.os == 'windows-latest'
+        run: >
+          cmake . -B build -A x64
+          -D CMAKE_BUILD_TYPE=Release
+          -D VISP_CI=ON
+
+      - name: Configure (MacOS)
+        if: matrix.os == 'macos-14'
+        run: >
+          cmake . -B build -G Ninja
+          -D CMAKE_BUILD_TYPE=Release
+          -D VISP_CI=ON
+          -D GGML_METAL=OFF
+          -D GGML_RPC=ON
+          -D CMAKE_BUILD_RPATH="@loader_path"
+
+      - name: Build
+        run: cmake --build build --config Release
+
+      # tests fail with vulkan/llvmpipe (runs out of memory or just wrong results)
+      # - name: Test Vulkan
+      #   if: matrix.os == 'ubuntu-22.04'
+      #   working-directory: ./build
+      #   run: |
+      #     export GGML_VK_VISIBLE_DEVICES=0
+      #     ctest --verbose
+
+      - name: Test CPU
+        if: matrix.os != 'ubuntu-22.04'
+        working-directory: ./build
+        run: ctest --verbose -C Release
+
+      - name: Install
+        run: cmake --install build --prefix install --config Release
+
+      - name: Package
+        working-directory: ./build
+        run: cpack
+
+      - name: Upload artifacts
+        if: success() || failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: visioncpp-${{ matrix.os }}
+          path: |
+            ./build/*.tar.gz
+            ./build/*.zip
+            ./tests/results/*.png
+          compression-level: 0
diff --git a/.gitmodules b/.gitmodules
index b3cd91e..4bf732a 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "depend/ggml"]
 	path = depend/ggml
-	url = git@github.com:Acly/ggml.git
+	url = https://github.com/Acly/ggml.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a665252..8e462d1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.22)
+cmake_minimum_required(VERSION 3.28)
 
 project(vision.cpp VERSION 0.1.0 LANGUAGES CXX)
 
@@ -16,11 +16,16 @@ if(PROJECT_IS_TOP_LEVEL)
   set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 endif()
 
-if(VISP_DEV OR ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+if(VISP_DEV)
   set(VISP_ASSERT "VISP_ASSERT_BREAK")
-endif()
-if(${CMAKE_BUILD_TYPE} STREQUAL "Release")
-  set(VISP_ASSERT "VISP_ASSERT_DISABLE")
+elseif(VISP_CI)
+  set(VISP_ASSERT "VISP_ASSERT_THROW")
+elseif(CMAKE_BUILD_TYPE)
+  if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+    set(VISP_ASSERT "VISP_ASSERT_BREAK")
+  elseif(${CMAKE_BUILD_TYPE} STREQUAL "Release")
+    set(VISP_ASSERT "VISP_ASSERT_DISABLE")
+  endif()
 endif()
 
 if(VISP_ASAN)
@@ -60,11 +65,15 @@ endif()
 set(GGML_VULKAN ${VISP_VULKAN})
 set(GGML_LLAMAFILE ON)
 if(VISP_CI)
-  set(GGML_NATIVE OFF)
   set(GGML_BACKEND_DL ON)
-  foreach (feat SSE42 AVX AVX2 F16C BMI2 FMA) # ~haswell and newer
-    set(GGML_${feat} ON)
-  endforeach()
+  if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(aarch64|arm.*|ARM64)$")
+    # set default for ARM
+  else()
+    set(GGML_NATIVE OFF)
+    foreach (feat SSE42 AVX AVX2 F16C BMI2 FMA) # ~haswell and newer
+      set(GGML_${feat} ON)
+    endforeach()
+  endif()
 endif()
 add_subdirectory(depend/ggml)
 
@@ -81,7 +90,7 @@ if(VISP_TESTS)
   add_subdirectory(models)
 endif()
 
-# Installation and packaging
+# Installation
 
 install(TARGETS visioncpp
   RUNTIME DESTINATION bin
@@ -92,6 +101,8 @@ if(PROJECT_IS_TOP_LEVEL)
   install(FILES README.md LICENSE DESTINATION .)
 endif()
 
+install(TARGETS vision-cli RUNTIME DESTINATION bin)
+
 include(CMakePackageConfigHelpers)
 
 set(VISP_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
@@ -115,12 +126,19 @@ install(
   DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/visioncpp
 )
 
-# if(WIN32)
-#   set(CPACK_GENERATOR "ZIP")
-#   set(CPACK_PACKAGE_FILE_NAME visioncpp-windows-x64-${PROJECT_VERSION})
-# else()
-#   set(CPACK_GENERATOR "TGZ")
-#   set(CPACK_PACKAGE_FILE_NAME visioncpp-linux-x64-${PROJECT_VERSION})
-# endif()
-# set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
-# include(CPack)
+# Packaging
+
+if(PROJECT_IS_TOP_LEVEL)
+  if(WIN32)
+    set(CPACK_GENERATOR "ZIP")
+    set(CPACK_PACKAGE_FILE_NAME visioncpp-windows-x64-${PROJECT_VERSION})
+  elseif(APPLE)
+    set(CPACK_GENERATOR "TGZ")
+    set(CPACK_PACKAGE_FILE_NAME visioncpp-macos-x64-${PROJECT_VERSION})
+  else()
+    set(CPACK_GENERATOR "TGZ")
+    set(CPACK_PACKAGE_FILE_NAME visioncpp-linux-x64-${PROJECT_VERSION})
+  endif()
+  set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
+  include(CPack)
+endif()
diff --git a/README.md b/README.md
index 98abe58..bcab4e2 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ See [Building](#building) to build from source. Binaries can be found in `build/
 Let's use MobileSAM to generate a segmentation mask for the <object>
 at pixel position (320, 240).
 
-You can download the required model from huggingface: [MobileSAM-F16.gguf]().
+You can download the required model from huggingface: [MobileSAM-F16.gguf](https://huggingface.co/Acly/MobileSAM-GGUF/resolve/main/MobileSAM-F16.gguf).
 
 #### CLI
 
@@ -93,14 +93,14 @@ vision-cli esrgan -m models/4x_foolhardy_Remacrih-F16.gguf -i input.png -o outpu
 
 ### Converting models
 
-Models need to be converted to GGUF before they can be used. This can also
+Models need to be converted to GGUF before they can be used. This will also
 rearrange or precompute tensors for more optimal inference.
 
-To convert eg. an ESRGAN model, install [uv](https://docs.astral.sh/uv/) and run:
+To convert a model, install [uv](https://docs.astral.sh/uv/) and run:
 ```sh
-uv run scripts/convert.py esrgan 4x_NMKD-Superscale-SP_178000_G.pth -q f16
+uv run scripts/convert.py <arch> MyModel.pth -q f16
 ```
-This will create `models/4x_NMKD-Superscale-SP_178000_G-F16.gguf`.
+where `<arch>` is one of `sam, birefnet, esrgan, ...`. This will create `models/MyModel-F16.gguf`.
 
 See `convert.py --help` for more options.
 
@@ -110,35 +110,36 @@ Building requires CMake and a compiler with C++20 support.
 
 **Get the sources**
 ```sh
-git clone --recursive
+git clone https://github.com/Acly/vision.cpp.git --recursive
 cd vision.cpp
 ```
 
 **Configure and build**
 ```sh
-cmake . -B build
-cmake --build build --config Release
+cmake . -B build -D CMAKE_BUILD_TYPE=Release
+cmake --build build
 ```
 
-### Vulkan
+### _(Optional)_ Vulkan
 
 Vulkan GPU support requires the [Vulkan SDK](https://www.lunarg.com/vulkan-sdk/) to be installed.
 
 ```sh
-cmake . -B build -DVISP_VULKAN=ON
-cmake --build build --config Release
+cmake . -B build -D CMAKE_BUILD_TYPE=Release -D VISP_VULKAN=ON
+cmake --build build
 ```
 
-### Tests
+### _(Optional)_ Tests
 
-Run all tests with the following command:
+Run all C++ tests with the following command:
 ```sh
-ctest build -C Release
+cd build
+ctest
 ```
 
 Some tests require a Python environment. It can be set up with [uv](https://docs.astral.sh/uv/):
 ```sh
-# Setup venv and install dependencies
+# Setup venv and install dependencies (once only)
 uv sync
 
 # Run only python tests
diff --git a/depend/fmt/CMakeLists.txt b/depend/fmt/CMakeLists.txt
index 53334f8..4d6b9d8 100644
--- a/depend/fmt/CMakeLists.txt
+++ b/depend/fmt/CMakeLists.txt
@@ -10,7 +10,9 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 FetchContent_Declare(
   fmt
   GIT_REPOSITORY https://github.com/fmtlib/fmt
-  GIT_TAG        40626af88bd7df9a5fb80be7b25ac85b122d6c21) # 11.2.0
+  GIT_TAG        40626af88bd7df9a5fb80be7b25ac85b122d6c21 # 11.2.0
+  EXCLUDE_FROM_ALL
+)
 FetchContent_MakeAvailable(fmt)
 
 set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD})
diff --git a/depend/ggml b/depend/ggml
index 095096a..29f4567 160000
--- a/depend/ggml
+++ b/depend/ggml
@@ -1 +1 @@
-Subproject commit 095096a551c2ad11dc3524da0b0cf2a9ab143f13
+Subproject commit 29f456798cfc420ccac7479310f9444a0f8ced9f
diff --git a/include/visp/util.hpp b/include/visp/util.hpp
index c1c1766..99c008f 100644
--- a/include/visp/util.hpp
+++ b/include/visp/util.hpp
@@ -146,9 +146,17 @@ struct flags {
         return (lhs.value & uint32_t(rhs)) != 0;
     }
 
+    friend constexpr bool operator&(flags<E> lhs, flags<E> rhs) {
+        return (lhs.value & rhs.value) != 0;
+    }
+
     friend constexpr flags<E> operator|(flags<E> lhs, E rhs) {
         return flags<E>(lhs.value | uint32_t(rhs));
     }
+
+    friend constexpr flags<E> operator|(flags<E> lhs, flags<E> rhs) {
+        return flags<E>(lhs.value | rhs.value);
+    }
 };
 
 } // namespace visp
diff --git a/include/visp/vision.hpp b/include/visp/vision.hpp
index fc99e10..787d03d 100644
--- a/include/visp/vision.hpp
+++ b/include/visp/vision.hpp
@@ -206,6 +206,7 @@ struct esrgan_params {
 };
 
 VISP_API esrgan_params esrgan_detect_params(model_ref);
+VISP_API int esrgan_estimate_graph_size(esrgan_params const&);
 
 VISP_API tensor esrgan_generate(model_ref, tensor image, esrgan_params const&);
 
diff --git a/models/CMakeLists.txt b/models/CMakeLists.txt
index ca94db5..137ec43 100644
--- a/models/CMakeLists.txt
+++ b/models/CMakeLists.txt
@@ -1,5 +1,4 @@
-# Download models used in tests
-# (this is disabled unless VISP_TESTS is enabled)
+# Download models used in tests (happens only if VISP_TESTS is enabled)
 
 message(STATUS "Checking for models/MobileSAM-F16.gguf")
 file(DOWNLOAD
@@ -22,4 +21,10 @@ file(DOWNLOAD
   EXPECTED_HASH "SHA256=c9f241e96fb5a791f9494fc7d4c2dd793297ae95f05b8423f547d19bea465b81"
   SHOW_PROGRESS
 )
-# TODO: ESRGAN
+message(STATUS "Checking for models/RealESRGAN-x4plus_anime-6B-F16.gguf")
+file(DOWNLOAD
+  "https://huggingface.co/Acly/Real-ESRGAN-GGUF/resolve/main/RealESRGAN-x4plus_anime-6B-F16.gguf"
+  ${CMAKE_CURRENT_LIST_DIR}/RealESRGAN-x4plus_anime-6B-F16.gguf
+  EXPECTED_HASH "SHA256=b741e68720d7ad6251dee2120bf7579ef816ea16da18299b39f6cbcb0e13ecf0"
+  SHOW_PROGRESS
+)
\ No newline at end of file
diff --git a/scripts/cmake/visioncpp-config.cmake.in b/scripts/cmake/visioncpp-config.cmake.in
index 6bd6b5e..68f4239 100644
--- a/scripts/cmake/visioncpp-config.cmake.in
+++ b/scripts/cmake/visioncpp-config.cmake.in
@@ -3,7 +3,7 @@
 set_and_check(VISP_INCLUDE_DIR "@PACKAGE_VISP_INCLUDE_INSTALL_DIR@")
 set_and_check(VISP_LIB_DIR "@PACKAGE_VISP_LIB_INSTALL_DIR@")
 
-find_package(ggml REQUIRED)
+find_dependency(ggml)
 
 find_library(VISP_LIBRARY visioncpp REQUIRED HINTS ${VISP_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH)
 
@@ -14,7 +14,6 @@ set_target_properties(visioncpp PROPERTIES
     INTERFACE_COMPILE_FEATURES cxx_std_20
     IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
     IMPORTED_LOCATION "${VISP_LIBRARY}"
-    POSITION_INDEPENDENT_CODE ON
 )
 
 check_required_components(visioncpp)
diff --git a/src/cli/cli.cpp b/src/cli/cli.cpp
index 7d77b54..2b86b01 100644
--- a/src/cli/cli.cpp
+++ b/src/cli/cli.cpp
@@ -35,7 +35,7 @@ char const* next_arg(int argc, char** argv, int& i) {
     if (++i < argc) {
         return argv[i];
     } else {
-        throw error("Missing argument after {}", argv[i - 1]);
+        throw except("Missing argument after {}", argv[i - 1]);
     }
 }
 
@@ -45,7 +45,7 @@ std::vector<char const*> collect_args(int argc, char** argv, int& i, char delim
         r.push_back(next_arg(argc, argv, i));
     } while (i + 1 < argc && argv[i + 1][0] != delim);
     if (r.empty()) {
-        throw error("Missing argument after {}", argv[i - 1]);
+        throw except("Missing argument after {}", argv[i - 1]);
     }
     return r;
 }
@@ -54,21 +54,21 @@ int parse_int(std::string_view arg) {
     int value = 0;
     auto [ptr, ec] = std::from_chars(arg.data(), arg.data() + arg.size(), value);
     if (ec != std::errc()) {
-        throw error("Invalid integer argument: {}", arg);
+        throw except("Invalid integer argument: {}", arg);
     }
     return value;
 }
 
 char const* validate_path(char const* arg) {
     if (!exists(path(arg))) {
-        throw error("File not found: {}", arg);
+        throw except("File not found: {}", arg);
     }
     return arg;
 }
 
 void require_inputs(std::span<char const* const> inputs, int n_required, char const* names) {
     if (inputs.size() != size_t(n_required)) {
-        throw error(
+        throw except(
             "Expected -i to be followed by {} inputs: {} - but found {}.", n_required, names,
             inputs.size());
     }
@@ -77,7 +77,7 @@ void require_inputs(std::span<char const* const> inputs, int n_required, char co
 cli_args cli_parse(int argc, char** argv) {
     cli_args r;
     if (argc < 2) {
-        throw error("Missing command.\nUsage: {} <command> [options]", argv[0]);
+        throw except("Missing command.\nUsage: {} <command> [options]", argv[0]);
     }
 
     std::string_view arg1 = argv[1];
@@ -90,7 +90,7 @@ cli_args cli_parse(int argc, char** argv) {
     } else if (arg1 == "esrgan") {
         r.command = cli_command::esrgan;
     } else {
-        throw error("Unknown command: {}", arg1);
+        throw except("Unknown command: {}", arg1);
     }
 
     for (int i = 2; i < argc; ++i) {
@@ -111,14 +111,14 @@ cli_args cli_parse(int argc, char** argv) {
             } else if (backend_arg == "gpu") {
                 r.bknd_type = backend_type::gpu;
             } else {
-                throw error("Unknown backend type '{}', must be one of: cpu, gpu", backend_arg);
+                throw except("Unknown backend type '{}', must be one of: cpu, gpu", backend_arg);
             }
         } else if (arg == "--composite") {
             r.composite = next_arg(argc, argv, i);
         } else if (arg == "--tile") {
             r.tile_size = parse_int(next_arg(argc, argv, i));
         } else if (arg.starts_with("-")) {
-            throw error("Unknown argument: {}", arg);
+            throw except("Unknown argument: {}", arg);
         }
     }
     return r;
@@ -255,12 +255,12 @@ struct sam_prompt {
 
 sam_prompt sam_parse_prompt(std::span<char const* const> args, i32x2 extent) {
     if (args.empty()) {
-        throw error(
+        throw except(
             "SAM requires a prompt with coordinates for a point or box"
             "eg. '--prompt 100 200' to pick the point at pixel (x=100, y=200)");
     }
     if (args.size() < 2 || args.size() > 4) {
-        throw error(
+        throw except(
             "Invalid number of arguments for SAM prompt. Expected 2 (point) or 4 (box) numbers, "
             "got {}",
             args.size());
@@ -269,17 +269,17 @@ sam_prompt sam_parse_prompt(std::span<char const* const> args, i32x2 extent) {
     if (args.size() >= 2) {
         a = {parse_int(args[0]), parse_int(args[1])};
         if (a[0] < 0 || a[1] < 0 || a[0] >= extent[0] || a[1] >= extent[1]) {
-            throw error("Invalid image coordinates: ({}, {})", a[0], a[1]);
+            throw except("Invalid image coordinates: ({}, {})", a[0], a[1]);
         }
     }
     i32x2 b{-1, -1};
     if (args.size() == 4) {
         b = {parse_int(args[2]), parse_int(args[3])};
         if (b[0] < 0 || b[1] < 0 || b[0] >= extent[0] || b[1] >= extent[1]) {
-            throw error("Invalid image coordinates: ({}, {})", b[0], b[1]);
+            throw except("Invalid image coordinates: ({}, {})", b[0], b[1]);
         }
         if (a[0] >= b[0] || a[1] >= b[1]) {
-            throw error("Invalid box coordinates: ({}, {}) to ({}, {})", a[0], a[1], b[0], b[1]);
+            throw except("Invalid box coordinates: ({}, {}) to ({}, {})", a[0], a[1], b[0], b[1]);
         }
     }
     return sam_prompt{a, b};
@@ -287,7 +287,7 @@ sam_prompt sam_parse_prompt(std::span<char const* const> args, i32x2 extent) {
 
 void run_sam(cli_args const& args) {
     backend_device backend = backend_init(args);
-    model_weights weights = load_model_weights(args, backend, "models/mobile-sam.gguf");
+    model_weights weights = load_model_weights(args, backend, "models/MobileSAM-F16.gguf");
     sam_params params{};
 
     require_inputs(args.inputs, 1, "<image>");
@@ -340,7 +340,7 @@ void run_sam(cli_args const& args) {
 
 void run_birefnet(cli_args const& args) {
     backend_device backend = backend_init(args);
-    model_weights weights = load_model_weights(args, backend, "models/birefnet.gguf", 6);
+    model_weights weights = load_model_weights(args, backend, "models/BiRefNet-F16.gguf", 6);
     birefnet_params params = birefnet_detect_params(weights);
     int img_size = params.image_size;
 
@@ -380,7 +380,7 @@ void run_birefnet(cli_args const& args) {
 
 void run_migan(cli_args const& args) {
     backend_device backend = backend_init(args);
-    model_weights weights = load_model_weights(args, backend, "models/migan_512_places2-f16.gguf");
+    model_weights weights = load_model_weights(args, backend, "models/MIGAN-512-places2-F16.gguf");
     migan_params params = migan_detect_params(weights);
     params.invert_mask = true; // -> inpaint opaque areas
 
@@ -417,7 +417,7 @@ void run_migan(cli_args const& args) {
 
 void run_esrgan(cli_args const& args) {
     backend_device backend = backend_init(args);
-    model_weights weights = load_model_weights(args, backend, "models/RealESRGAN_x4.gguf");
+    model_weights weights = load_model_weights(args, backend, "models/RealESRGAN-x4.gguf");
     esrgan_params params = esrgan_detect_params(weights);
 
     require_inputs(args.inputs, 1, "<image>");
@@ -430,7 +430,7 @@ void run_esrgan(cli_args const& args) {
     image_data output_tile = image_alloc(tiles_out.tile_size, image_format::rgb_f32);
     image_data output_image = image_alloc(image.extent * params.scale, image_format::rgb_f32);
 
-    compute_graph graph = compute_graph_init();
+    compute_graph graph = compute_graph_init(esrgan_estimate_graph_size(params));
     model_ref m(weights, graph);
 
     i64x4 input_shape = {3, tiles.tile_size[0], tiles.tile_size[1], 1};
diff --git a/src/util/string.hpp b/src/util/string.hpp
index 92c26d4..a0b61a9 100644
--- a/src/util/string.hpp
+++ b/src/util/string.hpp
@@ -66,8 +66,8 @@ template <size_t N, typename... Args>
 char const* format(fixed_string<N>& dst, char const* fmt, Args&&... args) {
     auto it = truncating_iterator(dst.data, N);
     auto out = fmt::vformat_to(it, fmt, fmt::make_format_args(args...));
-    dst.data[N - 1] = 0;
     dst.length = std::min(size_t(out - it), N - 1);
+    dst.data[dst.length] = 0;
     return dst.c_str();
 }
 
@@ -79,7 +79,7 @@ String format(char const* fmt, Args&&... args) {
 }
 
 template <typename... Args>
-exception error(char const* fmt, Args&&... args) {
+exception except(char const* fmt, Args&&... args) {
     return exception(format<fixed_string<128>>(fmt, std::forward<Args>(args)...));
 }
 
@@ -87,12 +87,14 @@ inline void assertion_failure(char const* file, int line, char const* expr) {
     auto msg = format<fixed_string<256>>("Assertion failed at {}:{}: {}\n", file, line, expr);
     fwrite(msg.data, 1, msg.length, stderr);
 
-#ifdef VISP_ASSERT_BREAK
+#if defined(VISP_ASSERT_BREAK)
 #    ifdef _MSC_VER
     __debugbreak();
 #    else
     __builtin_trap();
 #    endif
+#elif defined(VISP_ASSERT_THROW)
+    throw exception(msg.c_str());
 #else
     std::abort();
 #endif
diff --git a/src/visp/birefnet.cpp b/src/visp/birefnet.cpp
index 9750d3e..125034f 100644
--- a/src/visp/birefnet.cpp
+++ b/src/visp/birefnet.cpp
@@ -588,14 +588,14 @@ const swin_params swin_l_params = {
 swin_params swin_detect_params(model_ref m) {
     tensor t = m.find("bb.layers.0.blocks.0.attn.proj.bias");
     if (t == nullptr) {
-        throw error("Failed to detect model parameters");
+        throw except("Failed to detect model parameters");
     }
     if (t->ne[0] == 96) {
         return swin_t_params;
     } else if (t->ne[0] == 192) {
         return swin_l_params;
     } else {
-        throw error("Unsupported Swin Transformer embed dim: {}", t->ne[0]);
+        throw except("Unsupported Swin Transformer embed dim: {}", t->ne[0]);
     }
 }
 
diff --git a/src/visp/esrgan.cpp b/src/visp/esrgan.cpp
index 36d3783..89b3aae 100644
--- a/src/visp/esrgan.cpp
+++ b/src/visp/esrgan.cpp
@@ -102,12 +102,17 @@ esrgan_params esrgan_detect_params(model_ref m) {
     // 3 layers per upscale block, each upscales x2, 5 blocks for the rest of the model
     p.scale = 1 << ((model_len - 5) / 3);
     if (p.scale < 2 || p.scale > 4) {
-        throw error("Unsupported scale: {}", p.scale);
+        throw except("Unsupported scale: {}", p.scale);
     }
     if (p.n_blocks < 1 || p.n_blocks > 23) {
-        throw error("Invalid number of blocks: {}", p.n_blocks);
+        throw except("Invalid number of blocks: {}", p.n_blocks);
     }
     return p;
 }
 
+int esrgan_estimate_graph_size(esrgan_params const& p) {
+    // worst-case estimate, exact number depends on how conv-2d is implemented for the backend
+    return 512 + p.n_blocks * 192;
+}
+
 } // namespace visp
\ No newline at end of file
diff --git a/src/visp/image.cpp b/src/visp/image.cpp
index 97ed5af..235c97d 100644
--- a/src/visp/image.cpp
+++ b/src/visp/image.cpp
@@ -184,10 +184,10 @@ image_data image_load(char const* filepath) {
     int channels = 0;
     uint8_t* pixels = stbi_load(filepath, &extent[0], &extent[1], &channels, 0);
     if (!pixels) {
-        throw error("Failed to load image {}: {}", filepath, stbi_failure_reason());
+        throw except("Failed to load image {}: {}", filepath, stbi_failure_reason());
     }
     image_format format = image_format_from_channels(channels);
-    return image_data(extent, format, std::unique_ptr<uint8_t[]>(pixels));
+    return image_data{extent, format, std::unique_ptr<uint8_t[]>(pixels)};
 }
 
 void image_save(image_view const& img, char const* filepath) {
@@ -195,12 +195,12 @@ void image_save(image_view const& img, char const* filepath) {
     
     if (!(img.format == image_format::alpha_u8 || img.format == image_format::rgb_u8 ||
           img.format == image_format::rgba_u8)) {
-        throw error("Unsupported image format [{}]", int(img.format));
+        throw except("Unsupported image format [{}]", int(img.format));
     }
     int comp = n_channels(img.format);
     if (!stbi_write_png(
             filepath, img.extent[0], img.extent[1], comp, img.data, img.extent[0] * comp)) {
-        throw error("Failed to save image {}", filepath);
+        throw except("Failed to save image {}", filepath);
     }
 }
 
@@ -338,7 +338,7 @@ void image_scale(image_view const& img, i32x2 target, image_span const& dst) {
             STBIR_COLORSPACE_SRGB, nullptr);
     }
     if (result == 0) {
-        throw error(
+        throw except(
             "Failed to resize image {}x{} to {}x{}", img.extent[0], img.extent[1], target[0],
             target[1]);
     }
diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp
index e9712c6..3fe3ac0 100644
--- a/src/visp/ml.cpp
+++ b/src/visp/ml.cpp
@@ -52,7 +52,7 @@ backend_device backend_init(backend_type type) {
     backend_device b;
     b.handle.reset(ggml_backend_init_by_type(convert(type), nullptr));
     if (!b.handle) {
-        throw error("Failed to initialize backend, no suitable device available");
+        throw except("Failed to initialize backend, no suitable device available");
     }
     b.device = ggml_backend_get_device(b.handle.get());
 
@@ -305,7 +305,7 @@ tensor model_ref::weights(char const* name) const {
     if (tensor result = find(name)) {
         return result;
     }
-    throw error("tensor not found: {}.{}", prefix.view(), name);
+    throw except("tensor not found: {}.{}", prefix.view(), name);
 }
 
 model_ref model_ref::with_prefix(tensor_name new_prefix) const {
@@ -360,13 +360,13 @@ tensor_data tensor_alloc(tensor x) {
 tensor_data tensor_load(tensor x, char const* filepath) {
     FILE* file = fopen(filepath, "rb");
     if (!file) {
-        throw error("Failed to open file: {}", filepath);
+        throw except("Failed to open file: {}", filepath);
     }
     tensor_data result = tensor_alloc(x);
     size_t read = fread(result.data.get(), 1, ggml_nbytes(x), file);
     fclose(file);
     if (read != ggml_nbytes(x)) {
-        throw error("Failed to read data from file: {}", filepath);
+        throw except("Failed to read data from file: {}", filepath);
     }
     return result;
 }
diff --git a/src/visp/vision.cpp b/src/visp/vision.cpp
index b816556..4d335ef 100644
--- a/src/visp/vision.cpp
+++ b/src/visp/vision.cpp
@@ -171,7 +171,7 @@ image_data esrgan_compute(esrgan_model& model, image_view image) {
     tile_layout tiles(image.extent, esrgan_default_tile_size, 16);
     if (!model.graph || model.tile_size != tiles.tile_size) {
         model.tile_size = tiles.tile_size;
-        model.graph = compute_graph_init();
+        model.graph = compute_graph_init(esrgan_estimate_graph_size(model.params));
 
         model_ref m(model.weights, model.graph);
         i64x4 input_shape = {3, tiles.tile_size[0], tiles.tile_size[1], 1};
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 8c40209..a757c45 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -6,10 +6,10 @@ target_sources(test-vision PRIVATE
   test-image.cpp
 )
 target_include_directories(test-vision PRIVATE . ../src)
-target_compile_options(visioncpp PRIVATE ${VISP_WARNINGS})
-target_compile_definitions(visioncpp PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS})
+target_compile_options(test-vision PRIVATE ${VISP_WARNINGS})
+target_compile_definitions(test-vision PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS})
 target_link_libraries(test-vision PRIVATE visioncpp ${VISP_FMT_LINK})
-add_test(NAME vision COMMAND test-vision)
+add_test(NAME vision COMMAND test-vision -v)
 
 #
 # Model tests (image comparisons)
@@ -20,10 +20,15 @@ target_sources(test-models PRIVATE
   test-models.cpp
 )
 target_include_directories(test-models PRIVATE . ../src)
-target_compile_options(visioncpp PRIVATE ${VISP_WARNINGS})
-target_compile_definitions(visioncpp PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS})
+target_compile_options(test-models PRIVATE ${VISP_WARNINGS})
+target_compile_definitions(test-models PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS})
 target_link_libraries(test-models PRIVATE visioncpp ${VISP_FMT_LINK})
-add_test(NAME models COMMAND test-models)
+if(VISP_VULKAN AND NOT VISP_CI)
+  add_test(NAME models COMMAND test-models -v)
+else()
+  # GPU tests currently don't pass on GitHub runners with Vulkan/llvmpipe
+  add_test(NAME models COMMAND test-models -v --no-gpu)
+endif()
 
 include(reference-images.cmake)
 
diff --git a/tests/test-image.cpp b/tests/test-image.cpp
index 58ae717..b3eaa18 100644
--- a/tests/test-image.cpp
+++ b/tests/test-image.cpp
@@ -8,7 +8,7 @@
 
 namespace visp {
 
-TEST_CASE(image_formats) {
+VISP_TEST(image_formats) {
     auto formats = std::array{image_format::rgba_u8, image_format::bgra_u8,  image_format::argb_u8,
                               image_format::rgb_u8,  image_format::alpha_u8, image_format::rgba_f32,
                               image_format::rgb_f32, image_format::alpha_f32};
@@ -36,14 +36,14 @@ TEST_CASE(image_formats) {
     }
 }
 
-TEST_CASE(image_load) {
+VISP_TEST(image_load) {
     image_data img = image_load((test_dir().input / "cat-and-hat.jpg").string().c_str());
     CHECK(img.extent == i32x2{512, 512});
     CHECK(img.format == image_format::rgb_u8);
     CHECK(n_bytes(img) == 512 * 512 * 3);
 }
 
-TEST_CASE(image_save) {
+VISP_TEST(image_save) {
     image_data img = image_alloc(i32x2{16, 16}, image_format::rgba_u8);
     for (int i = 0; i < 16 * 16; ++i) {
         img.data.get()[i * 4 + 0] = 255;
@@ -74,20 +74,20 @@ void test_image_u8_to_f32(
     CHECK_IMAGES_EQUAL(output, expected);
 }
 
-TEST_CASE(image_alpha_u8_to_alpha_f32) {
+VISP_TEST(image_alpha_u8_to_alpha_f32) {
     test_image_u8_to_f32(
         image_format::alpha_u8, image_format::alpha_f32, //
         std::array<uint8_t, 4>{0, 128, 190, 255},        //
         std::array<float, 4>{0.05f, 0.3f, 0.4225f, 0.55f});
 }
-TEST_CASE(image_rgb_u8_to_rgb_f32) {
+VISP_TEST(image_rgb_u8_to_rgb_f32) {
     test_image_u8_to_f32(
         image_format::rgb_u8, image_format::rgb_f32,                                  //
         std::array<uint8_t, 12>{0, 128, 192, 255, 0, 128, 128, 255, 0, 128, 64, 255}, //
         std::array<float, 12>{
             0.05f, 0.7f, -1.05f, 0.55f, 0.2f, -0.8f, 0.3f, 1.2f, -0.3f, 0.3f, 0.45f, -1.3f});
 }
-TEST_CASE(image_rgba_u8_to_rgb_f32) {
+VISP_TEST(image_rgba_u8_to_rgb_f32) {
     test_image_u8_to_f32(
         image_format::rgba_u8, image_format::rgb_f32, //
         std::array<uint8_t, 16>{
@@ -101,7 +101,7 @@ TEST_CASE(image_rgba_u8_to_rgb_f32) {
             0.3f, 1.2f, -0.3f,   //
             0.3f, 0.45f, -1.3f});
 }
-TEST_CASE(image_rgba_u8_to_rgba_f32) {
+VISP_TEST(image_rgba_u8_to_rgba_f32) {
     test_image_u8_to_f32(
         image_format::rgba_u8, image_format::rgba_f32, //
         std::array<uint8_t, 16>{
@@ -115,7 +115,7 @@ TEST_CASE(image_rgba_u8_to_rgba_f32) {
             0.3f, 1.2f, -0.3f, 0.9f, 0.3f, //
             0.45f, -1.3f, 1.4f});
 }
-TEST_CASE(image_bgra_u8_to_rgb_f32) {
+VISP_TEST(image_bgra_u8_to_rgb_f32) {
     test_image_u8_to_f32(
         image_format::bgra_u8, image_format::rgb_f32, //
         std::array<uint8_t, 16>{
@@ -129,7 +129,7 @@ TEST_CASE(image_bgra_u8_to_rgb_f32) {
             0.3f, 1.2f, -0.3f,   //
             0.3f, 0.45f, -1.3f});
 }
-TEST_CASE(image_argb_u8_to_rgb_f32) {
+VISP_TEST(image_argb_u8_to_rgb_f32) {
     test_image_u8_to_f32(
         image_format::argb_u8, image_format::rgb_f32, //
         std::array<uint8_t, 16>{
@@ -144,7 +144,7 @@ TEST_CASE(image_argb_u8_to_rgb_f32) {
             0.3f, 0.45f, -1.3f});
 }
 
-TEST_CASE(image_u8_to_f32_tiled_pad) {
+VISP_TEST(image_u8_to_f32_tiled_pad) {
     std::array<uint8_t, 9> input_data = {0, 0, 102, 0, 0, 255, 0, 0, 102};
     std::array<float, 4> expected_data = {1.0f, 1.0f, 0.4f, 0.4f};
     image_view input(i32x2{3, 3}, image_format::alpha_u8, input_data);
@@ -159,7 +159,7 @@ TEST_CASE(image_u8_to_f32_tiled_pad) {
     CHECK_IMAGES_EQUAL(output, expected);
 }
 
-TEST_CASE(image_alpha_f32_to_alpha_u8) {
+VISP_TEST(image_alpha_f32_to_alpha_u8) {
     std::array<float, 4> input_data{0.0f, 0.3f, 0.4225f, 1.1f};
     std::array<uint8_t, 4> expected_data = {0, 76, 107, 255};
     image_view input(i32x2{2, 2}, image_format::alpha_f32, input_data.data());
@@ -171,7 +171,7 @@ TEST_CASE(image_alpha_f32_to_alpha_u8) {
     CHECK_IMAGES_EQUAL(output, expected);
 }
 
-TEST_CASE(image_rgb_f32_to_rgba_u8) {
+VISP_TEST(image_rgb_f32_to_rgba_u8) {
     std::array<float, 6> input_data{0.0f, 0.31f, -0.51f, 1.0f, 0.2f, 1.8f};
     std::array<uint8_t, 8> expected_data = {0, 79, 0, 255, 255, 51, 255, 255};
     image_view input(i32x2{2, 1}, image_format::rgb_f32, input_data.data());
@@ -183,7 +183,7 @@ TEST_CASE(image_rgb_f32_to_rgba_u8) {
     CHECK_IMAGES_EQUAL(output, expected);
 }
 
-TEST_CASE(image_scale) {
+VISP_TEST(image_scale) {
     image_data img = image_alloc(i32x2{8, 8}, image_format::rgba_u8);
     for (int i = 0; i < 8 * 8; ++i) {
         img.data[i * 4 + 0] = uint8_t(255);
@@ -202,7 +202,7 @@ TEST_CASE(image_scale) {
     }
 }
 
-TEST_CASE(image_alpha_composite) {
+VISP_TEST(image_alpha_composite) {
     std::array<uint8_t, 2 * 2 * 4> fg_data = {255, 0, 0,   255, 0,   255, 0, 255, //
                                               0,   0, 255, 255, 255, 255, 0, 255};
     image_view fg{i32x2{2, 2}, image_format::rgba_u8, fg_data};
@@ -222,7 +222,7 @@ TEST_CASE(image_alpha_composite) {
     CHECK_IMAGES_EQUAL(output, expected);
 }
 
-TEST_CASE(image_blur) {
+VISP_TEST(image_blur) {
     constexpr i32x2 extent{6, 6};
     // clang-format off
     std::array<float, extent[0] * extent[1]> input_data = {
@@ -252,7 +252,7 @@ TEST_CASE(image_blur) {
     CHECK_IMAGES_EQUAL(output, expected);
 }
 
-TEST_CASE(tile_merge) {
+VISP_TEST(tile_merge) {
     std::array<std::array<f32x3, 5 * 5>, 4> tiles;
     for (int t = 0; t < 4; ++t) {
         float v = float(t);
@@ -290,7 +290,7 @@ TEST_CASE(tile_merge) {
     CHECK_IMAGES_EQUAL(dst_span, expected);
 }
 
-TEST_CASE(tile_merge_blending) {
+VISP_TEST(tile_merge_blending) {
     std::array<f32x3, 22 * 19> dst{};
     auto dst_span = image_span({22, 19}, dst);
 
diff --git a/tests/test-models.cpp b/tests/test-models.cpp
index bb45dc1..531c5ac 100644
--- a/tests/test-models.cpp
+++ b/tests/test-models.cpp
@@ -1,5 +1,5 @@
-#include "visp/vision.hpp"
 #include "util/string.hpp"
+#include "visp/vision.hpp"
 
 #include "testing.hpp"
 
@@ -12,15 +12,14 @@ void compare_images(std::string_view name, image_view result, float tolerance =
     image_save(result, result_path.string().c_str());
     image_data reference = image_load(reference_path.string().c_str());
 
-    test_set_info(
-        format(
-            "while comparing images {} and {}", relative(result_path).string(),
-            relative(reference_path).string()));
+    test_set_info(format(
+        "while comparing images {} and {}", relative(result_path).string(),
+        relative(reference_path).string()));
     test_with_tolerance with(tolerance);
     CHECK_IMAGES_EQUAL(result, reference);
 }
 
-void test_mobile_sam(backend_type bt) {
+VISP_BACKEND_TEST(test_mobile_sam)(backend_type bt) {
     path model_path = test_dir().models / "MobileSAM-F16.gguf";
     path input_path = test_dir().input / "cat-and-hat.jpg";
 
@@ -29,7 +28,7 @@ void test_mobile_sam(backend_type bt) {
     image_data input = image_load(input_path.string().c_str());
     sam_encode(model, input);
     image_data mask_box = sam_compute(model, box_2d{{180, 110}, {505, 330}});
-    image_data mask_point =  sam_compute(model, i32x2{200, 300});
+    image_data mask_point = sam_compute(model, i32x2{200, 300});
 
     char const* suffix = bt == backend_type::cpu ? "-cpu.png" : "-gpu.png";
     float tolerance = bt == backend_type::cpu ? 0.01f : 0.015f;
@@ -37,14 +36,7 @@ void test_mobile_sam(backend_type bt) {
     compare_images(format("mobile_sam-point{}", suffix), mask_point, tolerance);
 }
 
-TEST_CASE(test_mobile_sam_cpu) {
-    test_mobile_sam(backend_type::cpu);
-}
-TEST_CASE(test_mobile_sam_gpu) {
-    test_mobile_sam(backend_type::gpu);
-}
-
-void test_birefnet(backend_type bt) {
+VISP_BACKEND_TEST(test_birefnet)(backend_type bt) {
     path model_path = test_dir().models / "BiRefNet-lite-F16.gguf";
     path input_path = test_dir().input / "wardrobe.jpg";
     std::string name = "birefnet";
@@ -55,17 +47,11 @@ void test_birefnet(backend_type bt) {
     image_data input = image_load(input_path.string().c_str());
     image_data output = birefnet_compute(model, input);
 
-    compare_images(name, output);
+    float tolerance = bt == backend_type::cpu ? 0.01f : 0.3f; // TODO: GPU is non-deterministic
+    compare_images(name, output, tolerance);
 }
 
-TEST_CASE(test_birefnet_cpu) {
-    test_birefnet(backend_type::cpu);
-}
-TEST_CASE(test_birefnet_gpu) {
-    test_birefnet(backend_type::gpu);
-}
-
-void test_migan(backend_type bt) {
+VISP_BACKEND_TEST(test_migan)(backend_type bt) {
     path model_path = test_dir().models / "MIGAN-512-places2-F16.gguf";
     path image_path = test_dir().input / "bench-image.jpg";
     path mask_path = test_dir().input / "bench-mask.png";
@@ -82,15 +68,8 @@ void test_migan(backend_type bt) {
     compare_images(name, composited);
 }
 
-TEST_CASE(test_migan_cpu) {
-    test_migan(backend_type::cpu);
-}
-TEST_CASE(test_migan_gpu) {
-    test_migan(backend_type::gpu);
-}
-
-void test_esrgan(backend_type bt) {
-    path model_path = test_dir().models / "RealESRGAN_x4plus_anime_6Bh.gguf";
+VISP_BACKEND_TEST(test_esrgan)(backend_type bt) {
+    path model_path = test_dir().models / "RealESRGAN-x4plus_anime-6B-F16.gguf";
     path input_path = test_dir().input / "vase-and-bowl.jpg";
     std::string name = "esrgan";
     name += bt == backend_type::cpu ? "-cpu.png" : "-gpu.png";
@@ -103,11 +82,4 @@ void test_esrgan(backend_type bt) {
     compare_images(name, output);
 }
 
-TEST_CASE(test_esrgan_cpu) {
-    test_esrgan(backend_type::cpu);
-}
-TEST_CASE(test_esrgan_gpu) {
-    test_esrgan(backend_type::gpu);
-}
-
 } // namespace visp
\ No newline at end of file
diff --git a/tests/testing.cpp b/tests/testing.cpp
index fd73e00..1e596bc 100644
--- a/tests/testing.cpp
+++ b/tests/testing.cpp
@@ -1,4 +1,5 @@
 #include "testing.hpp"
+#include "visp/ml.hpp"
 
 #include <chrono>
 #include <filesystem>
@@ -10,45 +11,55 @@ namespace visp {
 // Globals
 float tolerance = 1e-5f;
 std::string extra_info;
-}
+} // namespace visp
 
 int main(int argc, char** argv) {
-    auto& registry = visp::test_registry_instance();
+    using namespace visp;
+
+    auto& registry = test_registry_instance();
 
     int passed = 0;
     int failed = 0;
     int errors = 0;
 
     std::string_view filter;
+    bool exclude_gpu = false;
     bool verbose = false;
+
     for (int i = 1; i < argc; ++i) {
         std::string_view arg(argv[i]);
         if (arg == "-v" || arg == "--verbose") {
             verbose = true;
+        } else if (arg == "--no-gpu") {
+            exclude_gpu = true;
         } else {
             filter = arg;
         }
     }
 
-    auto time_start = steady_clock::now();
-
-    for (auto& test : registry.tests) {
-        if (!filter.empty() && test.name != filter) {
-            continue;
-        }
+    auto run = [&](test_case const& test, char const* name, backend_type backend) {
         try {
-            printf(test.name);
-            fflush(stdout);
+            if (!filter.empty() && name != filter && test.name != filter) {
+                return; // test not selected
+            }
+            if (verbose) {
+                printf("%s", name);
+                fflush(stdout);
+            }
+
+            if (test.is_backend_test) {
+                test.backend_func(backend);
+            } else {
+                test.func();
+            }
 
-            test.func();
-            
             ++passed;
             if (verbose) {
-                printf(" \033[32mPASSED\033[0m\n", test.name);
+                printf(" %s\n", "\033[32mPASSED\033[0m");
             }
         } catch (const visp::test_failure& e) {
             ++failed;
-            printf(" \033[31mFAILED\033[0m\n", test.name);
+            printf(" %s\n", "\033[31mFAILED\033[0m");
             printf("  \033[90m%s:%d:\033[0m Assertion failed\n", e.file, e.line);
             printf("  \033[93m%s\033[0m\n", e.condition);
             if (e.eval) {
@@ -59,11 +70,25 @@ int main(int argc, char** argv) {
             }
         } catch (const std::exception& e) {
             ++errors;
-            printf(" \033[31mERROR\033[0m\n", test.name);
+            printf(" %s\n", "\033[31mERROR\033[0m");
             printf("  \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line);
             printf("  \033[93m%s\033[0m\n", e.what());
         }
         visp::extra_info.clear();
+    };
+
+    auto time_start = steady_clock::now();
+    fixed_string<128> name;
+
+    for (auto& test : registry.tests) {
+        if (test.is_backend_test) {
+            run(test, format(name, "{}[cpu]", test.name), backend_type::cpu);
+            if (!exclude_gpu) {
+                run(test, format(name, "{}[gpu]", test.name), backend_type::gpu);
+            }
+        } else {
+            run(test, test.name, backend_type::cpu);
+        }
     }
 
     auto time_end = steady_clock::now();
@@ -71,14 +96,16 @@ int main(int argc, char** argv) {
         std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
 
     char const* color = (failed > 0 || errors > 0) ? "\033[31m" : "\033[32m";
-    printf("%s----------------------------------------------------------------------\n", color);
+    if (verbose || failed > 0 || errors > 0) {
+        printf("%s----------------------------------------------------------------------\n", color);
+    }
     if (failed > 0) {
         printf("\033[31m%d failed, ", failed);
     }
     if (errors > 0) {
         printf("\033[31m%d errors, ", errors);
     }
-    printf("\033[92m%d passed %sin %lldms\033[0m\n", passed, color, duration);
+    printf("\033[92m%d passed %sin %lldms\033[0m\n", passed, color, (long long)duration);
 
     return (failed > 0 || errors > 0) ? 1 : 0;
 }
@@ -92,7 +119,24 @@ test_registry& test_registry_instance() {
 
 test_registration::test_registration(
     char const* name, test_function f, char const* file, int line) {
-    test_registry_instance().tests.push_back({name, f, file, line});
+    test_case t;
+    t.name = name;
+    t.file = file;
+    t.line = line;
+    t.func = f;
+    t.is_backend_test = false;
+    test_registry_instance().tests.push_back(t);
+}
+
+test_registration::test_registration(
+    char const* name, test_backend_function f, char const* file, int line) {
+    test_case t;
+    t.name = name;
+    t.file = file;
+    t.line = line;
+    t.backend_func = f;
+    t.is_backend_test = true;
+    test_registry_instance().tests.push_back(t);
 }
 
 test_directories const& test_dir() {
@@ -130,9 +174,7 @@ float& test_tolerance_value() {
 test_failure test_failure_image_mismatch(
     char const* file, int line, char const* condition, float rms) {
     test_failure result(file, line, condition);
-    format(
-        result.eval, "-> rmse {:.5f} > {:.5f} tolerance", rms,
-        test_tolerance_value());
+    format(result.eval, "-> rmse {:.5f} > {:.5f} tolerance", rms, test_tolerance_value());
     return result;
 }
 
diff --git a/tests/testing.hpp b/tests/testing.hpp
index 03f6b44..b074cd6 100644
--- a/tests/testing.hpp
+++ b/tests/testing.hpp
@@ -6,6 +6,7 @@
 #include <vector>
 
 namespace visp {
+enum class backend_type;
 
 struct test_failure {
     char const* file;
@@ -18,12 +19,17 @@ struct test_failure {
 };
 
 using test_function = void (*)();
+using test_backend_function = void (*)(backend_type);
 
 struct test_case {
     char const* name;
-    test_function func;
     char const* file;
     int line;
+    bool is_backend_test;
+    union {
+        test_function func;
+        test_backend_function backend_func;
+    };
 };
 
 struct test_registry {
@@ -34,6 +40,7 @@ test_registry& test_registry_instance();
 
 struct test_registration {
     test_registration(char const* name, test_function f, char const* file, int line);
+    test_registration(char const* name, test_backend_function f, char const* file, int line);
 };
 
 using std::filesystem::path;
@@ -66,8 +73,9 @@ template <typename T>
 bool test_is_equal(T const& a, T const& b) {
     if constexpr (std::is_floating_point_v<T>) {
         return std::abs(a - b) <= test_tolerance_value();
+    } else {
+        return a == b;
     }
-    return a == b;
 }
 
 template <typename LHS, typename RHS>
@@ -87,11 +95,16 @@ test_failure test_failure_image_mismatch(char const* file, int line, char const*
 
 } // namespace visp
 
-#define TEST_CASE(name)                                                                            \
+#define VISP_TEST(name)                                                                            \
     void test_func_##name();                                                                       \
     const visp::test_registration test_reg_##name(#name, test_func_##name, __FILE__, __LINE__);    \
     void test_func_##name()
 
+#define VISP_BACKEND_TEST(name)                                                                    \
+    void test_func_##name(visp::backend_type);                                                     \
+    const visp::test_registration test_reg_##name(#name, test_func_##name, __FILE__, __LINE__);    \
+    void test_func_##name
+
 #define CHECK(...)                                                                                 \
     if (!(__VA_ARGS__)) {                                                                          \
         throw visp::test_failure(__FILE__, __LINE__, #__VA_ARGS__);                                \
diff --git a/tests/workbench.cpp b/tests/workbench.cpp
index 22f4149..bd2daf8 100644
--- a/tests/workbench.cpp
+++ b/tests/workbench.cpp
@@ -348,7 +348,7 @@ DEF(biref_decode)(model_ref m, span<tensor> input, param_dict const& p) {
 // MI-GAN
 
 DEF(migan_lrelu_agc)(model_ref m, span<tensor> input, param_dict const& p) {
-    return {migan::lrelu_agc(m, input[0], 0.2f, std::sqrtf(2), 1.0f)};
+    return {migan::lrelu_agc(m, input[0], 0.2f, std::sqrt(2), 1.0f)};
 }
 
 DEF(migan_downsample_2d)(model_ref m, span<tensor> input, param_dict const& p) {
@@ -427,7 +427,7 @@ param_dict build_dict(span<raw_param const> raw_params) {
             param.type = param_type::string;
             param.value.s = raw.value;
             break;
-        default: throw error("Unknown parameter type");
+        default: throw except("Unknown parameter type");
         }
         dict.params.push_back(param);
     }
@@ -509,7 +509,7 @@ test_case const& workbench_find_test(std::string_view name) {
     if (it != w.tests.end()) {
         return *it;
     }
-    throw error("Test case not found: {}", name);
+    throw except("Test case not found: {}", name);
 }
 
 void workbench_run(