diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b713a40 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,97 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + strategy: + matrix: + os: [ubuntu-22.04, windows-latest, macos-14] + + runs-on: ${{ matrix.os }} + + name: Build & Test on ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Dependencies (Linux) + if: matrix.os == 'ubuntu-22.04' + run: | + wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + sudo apt-get update -y + sudo apt-get install -y build-essential cmake g++ ninja-build mesa-vulkan-drivers vulkan-sdk + + - name: Dependencies (Windows) + if: matrix.os == 'windows-latest' + uses: microsoft/setup-msbuild@v2 + + - name: Configure (Linux) + if: matrix.os == 'ubuntu-22.04' + run: > + cmake . -B build -G Ninja + -D CMAKE_BUILD_TYPE=Release + -D VISP_CI=ON + -D VISP_VULKAN=ON + -D VISP_FMT_LIB=ON + + - name: Configure (Windows) + if: matrix.os == 'windows-latest' + run: > + cmake . -B build -A x64 + -D CMAKE_BUILD_TYPE=Release + -D VISP_CI=ON + + - name: Configure (MacOS) + if: matrix.os == 'macos-14' + run: > + cmake . -B build -G Ninja + -D CMAKE_BUILD_TYPE=Release + -D VISP_CI=ON + -D GGML_METAL=OFF + -D GGML_RPC=ON + -D CMAKE_BUILD_RPATH="@loader_path" + + - name: Build + run: cmake --build build --config Release + + # tests fail with vulkan/llvmpipe (runs out of memory or just wrong results) + # - name: Test Vulkan + # if: matrix.os == 'ubuntu-22.04' + # working-directory: ./build + # run: | + # export GGML_VK_VISIBLE_DEVICES=0 + # ctest --verbose + + - name: Test CPU + if: matrix.os != 'ubuntu-22.04' + working-directory: ./build + run: ctest --verbose -C Release + + - name: Install + run: cmake --install build --prefix install --config Release + + - name: Package + working-directory: ./build + run: cpack + + - name: Upload artifacts + if: success() || failure() + uses: actions/upload-artifact@v4 + with: + name: visioncpp-${{ matrix.os }} + path: | + ./build/*.tar.gz + ./build/*.zip + ./tests/results/*.png + compression-level: 0 diff --git a/.gitmodules b/.gitmodules index b3cd91e..4bf732a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "depend/ggml"] path = depend/ggml - url = git@github.com:Acly/ggml.git + url = https://github.com/Acly/ggml.git diff --git a/CMakeLists.txt b/CMakeLists.txt index a665252..8e462d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.22) +cmake_minimum_required(VERSION 3.28) project(vision.cpp VERSION 0.1.0 LANGUAGES CXX) @@ -16,11 +16,16 @@ if(PROJECT_IS_TOP_LEVEL) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) endif() -if(VISP_DEV OR ${CMAKE_BUILD_TYPE} STREQUAL "Debug") +if(VISP_DEV) set(VISP_ASSERT "VISP_ASSERT_BREAK") -endif() -if(${CMAKE_BUILD_TYPE} STREQUAL "Release") - set(VISP_ASSERT "VISP_ASSERT_DISABLE") +elseif(VISP_CI) + set(VISP_ASSERT "VISP_ASSERT_THROW") +elseif(CMAKE_BUILD_TYPE) + if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") + set(VISP_ASSERT "VISP_ASSERT_BREAK") + elseif(${CMAKE_BUILD_TYPE} STREQUAL "Release") + set(VISP_ASSERT "VISP_ASSERT_DISABLE") + endif() endif() if(VISP_ASAN) @@ -60,11 +65,15 @@ endif() set(GGML_VULKAN ${VISP_VULKAN}) set(GGML_LLAMAFILE ON) if(VISP_CI) - set(GGML_NATIVE OFF) set(GGML_BACKEND_DL ON) - foreach (feat SSE42 AVX AVX2 F16C BMI2 FMA) # ~haswell and newer - set(GGML_${feat} ON) - endforeach() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(aarch64|arm.*|ARM64)$") + # set default for ARM + else() + set(GGML_NATIVE OFF) + foreach (feat SSE42 AVX AVX2 F16C BMI2 FMA) # ~haswell and newer + set(GGML_${feat} ON) + endforeach() + endif() endif() add_subdirectory(depend/ggml) @@ -81,7 +90,7 @@ if(VISP_TESTS) add_subdirectory(models) endif() -# Installation and packaging +# Installation install(TARGETS visioncpp RUNTIME DESTINATION bin @@ -92,6 +101,8 @@ if(PROJECT_IS_TOP_LEVEL) install(FILES README.md LICENSE DESTINATION .) endif() +install(TARGETS vision-cli RUNTIME DESTINATION bin) + include(CMakePackageConfigHelpers) set(VISP_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") @@ -115,12 +126,19 @@ install( DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/visioncpp ) -# if(WIN32) -# set(CPACK_GENERATOR "ZIP") -# set(CPACK_PACKAGE_FILE_NAME visioncpp-windows-x64-${PROJECT_VERSION}) -# else() -# set(CPACK_GENERATOR "TGZ") -# set(CPACK_PACKAGE_FILE_NAME visioncpp-linux-x64-${PROJECT_VERSION}) -# endif() -# set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) -# include(CPack) +# Packaging + +if(PROJECT_IS_TOP_LEVEL) + if(WIN32) + set(CPACK_GENERATOR "ZIP") + set(CPACK_PACKAGE_FILE_NAME visioncpp-windows-x64-${PROJECT_VERSION}) + elseif(APPLE) + set(CPACK_GENERATOR "TGZ") + set(CPACK_PACKAGE_FILE_NAME visioncpp-macos-x64-${PROJECT_VERSION}) + else() + set(CPACK_GENERATOR "TGZ") + set(CPACK_PACKAGE_FILE_NAME visioncpp-linux-x64-${PROJECT_VERSION}) + endif() + set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) + include(CPack) +endif() diff --git a/README.md b/README.md index 98abe58..bcab4e2 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ See [Building](#building) to build from source. Binaries can be found in `build/ Let's use MobileSAM to generate a segmentation mask for the at pixel position (320, 240). -You can download the required model from huggingface: [MobileSAM-F16.gguf](). +You can download the required model from huggingface: [MobileSAM-F16.gguf](https://huggingface.co/Acly/MobileSAM-GGUF/resolve/main/MobileSAM-F16.gguf). #### CLI @@ -93,14 +93,14 @@ vision-cli esrgan -m models/4x_foolhardy_Remacrih-F16.gguf -i input.png -o outpu ### Converting models -Models need to be converted to GGUF before they can be used. This can also +Models need to be converted to GGUF before they can be used. This will also rearrange or precompute tensors for more optimal inference. -To convert eg. an ESRGAN model, install [uv](https://docs.astral.sh/uv/) and run: +To convert a model, install [uv](https://docs.astral.sh/uv/) and run: ```sh -uv run scripts/convert.py esrgan 4x_NMKD-Superscale-SP_178000_G.pth -q f16 +uv run scripts/convert.py MyModel.pth -q f16 ``` -This will create `models/4x_NMKD-Superscale-SP_178000_G-F16.gguf`. +where `` is one of `sam, birefnet, esrgan, ...`. This will create `models/MyModel-F16.gguf`. See `convert.py --help` for more options. @@ -110,35 +110,36 @@ Building requires CMake and a compiler with C++20 support. **Get the sources** ```sh -git clone --recursive +git clone https://github.com/Acly/vision.cpp.git --recursive cd vision.cpp ``` **Configure and build** ```sh -cmake . -B build -cmake --build build --config Release +cmake . -B build -D CMAKE_BUILD_TYPE=Release +cmake --build build ``` -### Vulkan +### _(Optional)_ Vulkan Vulkan GPU support requires the [Vulkan SDK](https://www.lunarg.com/vulkan-sdk/) to be installed. ```sh -cmake . -B build -DVISP_VULKAN=ON -cmake --build build --config Release +cmake . -B build -D CMAKE_BUILD_TYPE=Release -D VISP_VULKAN=ON +cmake --build build ``` -### Tests +### _(Optional)_ Tests -Run all tests with the following command: +Run all C++ tests with the following command: ```sh -ctest build -C Release +cd build +ctest ``` Some tests require a Python environment. It can be set up with [uv](https://docs.astral.sh/uv/): ```sh -# Setup venv and install dependencies +# Setup venv and install dependencies (once only) uv sync # Run only python tests diff --git a/depend/fmt/CMakeLists.txt b/depend/fmt/CMakeLists.txt index 53334f8..4d6b9d8 100644 --- a/depend/fmt/CMakeLists.txt +++ b/depend/fmt/CMakeLists.txt @@ -10,7 +10,9 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) FetchContent_Declare( fmt GIT_REPOSITORY https://github.com/fmtlib/fmt - GIT_TAG 40626af88bd7df9a5fb80be7b25ac85b122d6c21) # 11.2.0 + GIT_TAG 40626af88bd7df9a5fb80be7b25ac85b122d6c21 # 11.2.0 + EXCLUDE_FROM_ALL +) FetchContent_MakeAvailable(fmt) set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_OLD}) diff --git a/depend/ggml b/depend/ggml index 095096a..29f4567 160000 --- a/depend/ggml +++ b/depend/ggml @@ -1 +1 @@ -Subproject commit 095096a551c2ad11dc3524da0b0cf2a9ab143f13 +Subproject commit 29f456798cfc420ccac7479310f9444a0f8ced9f diff --git a/include/visp/util.hpp b/include/visp/util.hpp index c1c1766..99c008f 100644 --- a/include/visp/util.hpp +++ b/include/visp/util.hpp @@ -146,9 +146,17 @@ struct flags { return (lhs.value & uint32_t(rhs)) != 0; } + friend constexpr bool operator&(flags lhs, flags rhs) { + return (lhs.value & rhs.value) != 0; + } + friend constexpr flags operator|(flags lhs, E rhs) { return flags(lhs.value | uint32_t(rhs)); } + + friend constexpr flags operator|(flags lhs, flags rhs) { + return flags(lhs.value | rhs.value); + } }; } // namespace visp diff --git a/include/visp/vision.hpp b/include/visp/vision.hpp index fc99e10..787d03d 100644 --- a/include/visp/vision.hpp +++ b/include/visp/vision.hpp @@ -206,6 +206,7 @@ struct esrgan_params { }; VISP_API esrgan_params esrgan_detect_params(model_ref); +VISP_API int esrgan_estimate_graph_size(esrgan_params const&); VISP_API tensor esrgan_generate(model_ref, tensor image, esrgan_params const&); diff --git a/models/CMakeLists.txt b/models/CMakeLists.txt index ca94db5..137ec43 100644 --- a/models/CMakeLists.txt +++ b/models/CMakeLists.txt @@ -1,5 +1,4 @@ -# Download models used in tests -# (this is disabled unless VISP_TESTS is enabled) +# Download models used in tests (happens only if VISP_TESTS is enabled) message(STATUS "Checking for models/MobileSAM-F16.gguf") file(DOWNLOAD @@ -22,4 +21,10 @@ file(DOWNLOAD EXPECTED_HASH "SHA256=c9f241e96fb5a791f9494fc7d4c2dd793297ae95f05b8423f547d19bea465b81" SHOW_PROGRESS ) -# TODO: ESRGAN +message(STATUS "Checking for models/RealESRGAN-x4plus_anime-6B-F16.gguf") +file(DOWNLOAD + "https://huggingface.co/Acly/Real-ESRGAN-GGUF/resolve/main/RealESRGAN-x4plus_anime-6B-F16.gguf" + ${CMAKE_CURRENT_LIST_DIR}/RealESRGAN-x4plus_anime-6B-F16.gguf + EXPECTED_HASH "SHA256=b741e68720d7ad6251dee2120bf7579ef816ea16da18299b39f6cbcb0e13ecf0" + SHOW_PROGRESS +) \ No newline at end of file diff --git a/scripts/cmake/visioncpp-config.cmake.in b/scripts/cmake/visioncpp-config.cmake.in index 6bd6b5e..68f4239 100644 --- a/scripts/cmake/visioncpp-config.cmake.in +++ b/scripts/cmake/visioncpp-config.cmake.in @@ -3,7 +3,7 @@ set_and_check(VISP_INCLUDE_DIR "@PACKAGE_VISP_INCLUDE_INSTALL_DIR@") set_and_check(VISP_LIB_DIR "@PACKAGE_VISP_LIB_INSTALL_DIR@") -find_package(ggml REQUIRED) +find_dependency(ggml) find_library(VISP_LIBRARY visioncpp REQUIRED HINTS ${VISP_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH) @@ -14,7 +14,6 @@ set_target_properties(visioncpp PROPERTIES INTERFACE_COMPILE_FEATURES cxx_std_20 IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" IMPORTED_LOCATION "${VISP_LIBRARY}" - POSITION_INDEPENDENT_CODE ON ) check_required_components(visioncpp) diff --git a/src/cli/cli.cpp b/src/cli/cli.cpp index 7d77b54..2b86b01 100644 --- a/src/cli/cli.cpp +++ b/src/cli/cli.cpp @@ -35,7 +35,7 @@ char const* next_arg(int argc, char** argv, int& i) { if (++i < argc) { return argv[i]; } else { - throw error("Missing argument after {}", argv[i - 1]); + throw except("Missing argument after {}", argv[i - 1]); } } @@ -45,7 +45,7 @@ std::vector collect_args(int argc, char** argv, int& i, char delim r.push_back(next_arg(argc, argv, i)); } while (i + 1 < argc && argv[i + 1][0] != delim); if (r.empty()) { - throw error("Missing argument after {}", argv[i - 1]); + throw except("Missing argument after {}", argv[i - 1]); } return r; } @@ -54,21 +54,21 @@ int parse_int(std::string_view arg) { int value = 0; auto [ptr, ec] = std::from_chars(arg.data(), arg.data() + arg.size(), value); if (ec != std::errc()) { - throw error("Invalid integer argument: {}", arg); + throw except("Invalid integer argument: {}", arg); } return value; } char const* validate_path(char const* arg) { if (!exists(path(arg))) { - throw error("File not found: {}", arg); + throw except("File not found: {}", arg); } return arg; } void require_inputs(std::span inputs, int n_required, char const* names) { if (inputs.size() != size_t(n_required)) { - throw error( + throw except( "Expected -i to be followed by {} inputs: {} - but found {}.", n_required, names, inputs.size()); } @@ -77,7 +77,7 @@ void require_inputs(std::span inputs, int n_required, char co cli_args cli_parse(int argc, char** argv) { cli_args r; if (argc < 2) { - throw error("Missing command.\nUsage: {} [options]", argv[0]); + throw except("Missing command.\nUsage: {} [options]", argv[0]); } std::string_view arg1 = argv[1]; @@ -90,7 +90,7 @@ cli_args cli_parse(int argc, char** argv) { } else if (arg1 == "esrgan") { r.command = cli_command::esrgan; } else { - throw error("Unknown command: {}", arg1); + throw except("Unknown command: {}", arg1); } for (int i = 2; i < argc; ++i) { @@ -111,14 +111,14 @@ cli_args cli_parse(int argc, char** argv) { } else if (backend_arg == "gpu") { r.bknd_type = backend_type::gpu; } else { - throw error("Unknown backend type '{}', must be one of: cpu, gpu", backend_arg); + throw except("Unknown backend type '{}', must be one of: cpu, gpu", backend_arg); } } else if (arg == "--composite") { r.composite = next_arg(argc, argv, i); } else if (arg == "--tile") { r.tile_size = parse_int(next_arg(argc, argv, i)); } else if (arg.starts_with("-")) { - throw error("Unknown argument: {}", arg); + throw except("Unknown argument: {}", arg); } } return r; @@ -255,12 +255,12 @@ struct sam_prompt { sam_prompt sam_parse_prompt(std::span args, i32x2 extent) { if (args.empty()) { - throw error( + throw except( "SAM requires a prompt with coordinates for a point or box" "eg. '--prompt 100 200' to pick the point at pixel (x=100, y=200)"); } if (args.size() < 2 || args.size() > 4) { - throw error( + throw except( "Invalid number of arguments for SAM prompt. Expected 2 (point) or 4 (box) numbers, " "got {}", args.size()); @@ -269,17 +269,17 @@ sam_prompt sam_parse_prompt(std::span args, i32x2 extent) { if (args.size() >= 2) { a = {parse_int(args[0]), parse_int(args[1])}; if (a[0] < 0 || a[1] < 0 || a[0] >= extent[0] || a[1] >= extent[1]) { - throw error("Invalid image coordinates: ({}, {})", a[0], a[1]); + throw except("Invalid image coordinates: ({}, {})", a[0], a[1]); } } i32x2 b{-1, -1}; if (args.size() == 4) { b = {parse_int(args[2]), parse_int(args[3])}; if (b[0] < 0 || b[1] < 0 || b[0] >= extent[0] || b[1] >= extent[1]) { - throw error("Invalid image coordinates: ({}, {})", b[0], b[1]); + throw except("Invalid image coordinates: ({}, {})", b[0], b[1]); } if (a[0] >= b[0] || a[1] >= b[1]) { - throw error("Invalid box coordinates: ({}, {}) to ({}, {})", a[0], a[1], b[0], b[1]); + throw except("Invalid box coordinates: ({}, {}) to ({}, {})", a[0], a[1], b[0], b[1]); } } return sam_prompt{a, b}; @@ -287,7 +287,7 @@ sam_prompt sam_parse_prompt(std::span args, i32x2 extent) { void run_sam(cli_args const& args) { backend_device backend = backend_init(args); - model_weights weights = load_model_weights(args, backend, "models/mobile-sam.gguf"); + model_weights weights = load_model_weights(args, backend, "models/MobileSAM-F16.gguf"); sam_params params{}; require_inputs(args.inputs, 1, ""); @@ -340,7 +340,7 @@ void run_sam(cli_args const& args) { void run_birefnet(cli_args const& args) { backend_device backend = backend_init(args); - model_weights weights = load_model_weights(args, backend, "models/birefnet.gguf", 6); + model_weights weights = load_model_weights(args, backend, "models/BiRefNet-F16.gguf", 6); birefnet_params params = birefnet_detect_params(weights); int img_size = params.image_size; @@ -380,7 +380,7 @@ void run_birefnet(cli_args const& args) { void run_migan(cli_args const& args) { backend_device backend = backend_init(args); - model_weights weights = load_model_weights(args, backend, "models/migan_512_places2-f16.gguf"); + model_weights weights = load_model_weights(args, backend, "models/MIGAN-512-places2-F16.gguf"); migan_params params = migan_detect_params(weights); params.invert_mask = true; // -> inpaint opaque areas @@ -417,7 +417,7 @@ void run_migan(cli_args const& args) { void run_esrgan(cli_args const& args) { backend_device backend = backend_init(args); - model_weights weights = load_model_weights(args, backend, "models/RealESRGAN_x4.gguf"); + model_weights weights = load_model_weights(args, backend, "models/RealESRGAN-x4.gguf"); esrgan_params params = esrgan_detect_params(weights); require_inputs(args.inputs, 1, ""); @@ -430,7 +430,7 @@ void run_esrgan(cli_args const& args) { image_data output_tile = image_alloc(tiles_out.tile_size, image_format::rgb_f32); image_data output_image = image_alloc(image.extent * params.scale, image_format::rgb_f32); - compute_graph graph = compute_graph_init(); + compute_graph graph = compute_graph_init(esrgan_estimate_graph_size(params)); model_ref m(weights, graph); i64x4 input_shape = {3, tiles.tile_size[0], tiles.tile_size[1], 1}; diff --git a/src/util/string.hpp b/src/util/string.hpp index 92c26d4..a0b61a9 100644 --- a/src/util/string.hpp +++ b/src/util/string.hpp @@ -66,8 +66,8 @@ template char const* format(fixed_string& dst, char const* fmt, Args&&... args) { auto it = truncating_iterator(dst.data, N); auto out = fmt::vformat_to(it, fmt, fmt::make_format_args(args...)); - dst.data[N - 1] = 0; dst.length = std::min(size_t(out - it), N - 1); + dst.data[dst.length] = 0; return dst.c_str(); } @@ -79,7 +79,7 @@ String format(char const* fmt, Args&&... args) { } template -exception error(char const* fmt, Args&&... args) { +exception except(char const* fmt, Args&&... args) { return exception(format>(fmt, std::forward(args)...)); } @@ -87,12 +87,14 @@ inline void assertion_failure(char const* file, int line, char const* expr) { auto msg = format>("Assertion failed at {}:{}: {}\n", file, line, expr); fwrite(msg.data, 1, msg.length, stderr); -#ifdef VISP_ASSERT_BREAK +#if defined(VISP_ASSERT_BREAK) # ifdef _MSC_VER __debugbreak(); # else __builtin_trap(); # endif +#elif defined(VISP_ASSERT_THROW) + throw exception(msg.c_str()); #else std::abort(); #endif diff --git a/src/visp/birefnet.cpp b/src/visp/birefnet.cpp index 9750d3e..125034f 100644 --- a/src/visp/birefnet.cpp +++ b/src/visp/birefnet.cpp @@ -588,14 +588,14 @@ const swin_params swin_l_params = { swin_params swin_detect_params(model_ref m) { tensor t = m.find("bb.layers.0.blocks.0.attn.proj.bias"); if (t == nullptr) { - throw error("Failed to detect model parameters"); + throw except("Failed to detect model parameters"); } if (t->ne[0] == 96) { return swin_t_params; } else if (t->ne[0] == 192) { return swin_l_params; } else { - throw error("Unsupported Swin Transformer embed dim: {}", t->ne[0]); + throw except("Unsupported Swin Transformer embed dim: {}", t->ne[0]); } } diff --git a/src/visp/esrgan.cpp b/src/visp/esrgan.cpp index 36d3783..89b3aae 100644 --- a/src/visp/esrgan.cpp +++ b/src/visp/esrgan.cpp @@ -102,12 +102,17 @@ esrgan_params esrgan_detect_params(model_ref m) { // 3 layers per upscale block, each upscales x2, 5 blocks for the rest of the model p.scale = 1 << ((model_len - 5) / 3); if (p.scale < 2 || p.scale > 4) { - throw error("Unsupported scale: {}", p.scale); + throw except("Unsupported scale: {}", p.scale); } if (p.n_blocks < 1 || p.n_blocks > 23) { - throw error("Invalid number of blocks: {}", p.n_blocks); + throw except("Invalid number of blocks: {}", p.n_blocks); } return p; } +int esrgan_estimate_graph_size(esrgan_params const& p) { + // worst-case estimate, exact number depends on how conv-2d is implemented for the backend + return 512 + p.n_blocks * 192; +} + } // namespace visp \ No newline at end of file diff --git a/src/visp/image.cpp b/src/visp/image.cpp index 97ed5af..235c97d 100644 --- a/src/visp/image.cpp +++ b/src/visp/image.cpp @@ -184,10 +184,10 @@ image_data image_load(char const* filepath) { int channels = 0; uint8_t* pixels = stbi_load(filepath, &extent[0], &extent[1], &channels, 0); if (!pixels) { - throw error("Failed to load image {}: {}", filepath, stbi_failure_reason()); + throw except("Failed to load image {}: {}", filepath, stbi_failure_reason()); } image_format format = image_format_from_channels(channels); - return image_data(extent, format, std::unique_ptr(pixels)); + return image_data{extent, format, std::unique_ptr(pixels)}; } void image_save(image_view const& img, char const* filepath) { @@ -195,12 +195,12 @@ void image_save(image_view const& img, char const* filepath) { if (!(img.format == image_format::alpha_u8 || img.format == image_format::rgb_u8 || img.format == image_format::rgba_u8)) { - throw error("Unsupported image format [{}]", int(img.format)); + throw except("Unsupported image format [{}]", int(img.format)); } int comp = n_channels(img.format); if (!stbi_write_png( filepath, img.extent[0], img.extent[1], comp, img.data, img.extent[0] * comp)) { - throw error("Failed to save image {}", filepath); + throw except("Failed to save image {}", filepath); } } @@ -338,7 +338,7 @@ void image_scale(image_view const& img, i32x2 target, image_span const& dst) { STBIR_COLORSPACE_SRGB, nullptr); } if (result == 0) { - throw error( + throw except( "Failed to resize image {}x{} to {}x{}", img.extent[0], img.extent[1], target[0], target[1]); } diff --git a/src/visp/ml.cpp b/src/visp/ml.cpp index e9712c6..3fe3ac0 100644 --- a/src/visp/ml.cpp +++ b/src/visp/ml.cpp @@ -52,7 +52,7 @@ backend_device backend_init(backend_type type) { backend_device b; b.handle.reset(ggml_backend_init_by_type(convert(type), nullptr)); if (!b.handle) { - throw error("Failed to initialize backend, no suitable device available"); + throw except("Failed to initialize backend, no suitable device available"); } b.device = ggml_backend_get_device(b.handle.get()); @@ -305,7 +305,7 @@ tensor model_ref::weights(char const* name) const { if (tensor result = find(name)) { return result; } - throw error("tensor not found: {}.{}", prefix.view(), name); + throw except("tensor not found: {}.{}", prefix.view(), name); } model_ref model_ref::with_prefix(tensor_name new_prefix) const { @@ -360,13 +360,13 @@ tensor_data tensor_alloc(tensor x) { tensor_data tensor_load(tensor x, char const* filepath) { FILE* file = fopen(filepath, "rb"); if (!file) { - throw error("Failed to open file: {}", filepath); + throw except("Failed to open file: {}", filepath); } tensor_data result = tensor_alloc(x); size_t read = fread(result.data.get(), 1, ggml_nbytes(x), file); fclose(file); if (read != ggml_nbytes(x)) { - throw error("Failed to read data from file: {}", filepath); + throw except("Failed to read data from file: {}", filepath); } return result; } diff --git a/src/visp/vision.cpp b/src/visp/vision.cpp index b816556..4d335ef 100644 --- a/src/visp/vision.cpp +++ b/src/visp/vision.cpp @@ -171,7 +171,7 @@ image_data esrgan_compute(esrgan_model& model, image_view image) { tile_layout tiles(image.extent, esrgan_default_tile_size, 16); if (!model.graph || model.tile_size != tiles.tile_size) { model.tile_size = tiles.tile_size; - model.graph = compute_graph_init(); + model.graph = compute_graph_init(esrgan_estimate_graph_size(model.params)); model_ref m(model.weights, model.graph); i64x4 input_shape = {3, tiles.tile_size[0], tiles.tile_size[1], 1}; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8c40209..a757c45 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,10 +6,10 @@ target_sources(test-vision PRIVATE test-image.cpp ) target_include_directories(test-vision PRIVATE . ../src) -target_compile_options(visioncpp PRIVATE ${VISP_WARNINGS}) -target_compile_definitions(visioncpp PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS}) +target_compile_options(test-vision PRIVATE ${VISP_WARNINGS}) +target_compile_definitions(test-vision PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS}) target_link_libraries(test-vision PRIVATE visioncpp ${VISP_FMT_LINK}) -add_test(NAME vision COMMAND test-vision) +add_test(NAME vision COMMAND test-vision -v) # # Model tests (image comparisons) @@ -20,10 +20,15 @@ target_sources(test-models PRIVATE test-models.cpp ) target_include_directories(test-models PRIVATE . ../src) -target_compile_options(visioncpp PRIVATE ${VISP_WARNINGS}) -target_compile_definitions(visioncpp PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS}) +target_compile_options(test-models PRIVATE ${VISP_WARNINGS}) +target_compile_definitions(test-models PRIVATE ${VISP_ASSERT} ${VISP_FMT_DEFS}) target_link_libraries(test-models PRIVATE visioncpp ${VISP_FMT_LINK}) -add_test(NAME models COMMAND test-models) +if(VISP_VULKAN AND NOT VISP_CI) + add_test(NAME models COMMAND test-models -v) +else() + # GPU tests currently don't pass on GitHub runners with Vulkan/llvmpipe + add_test(NAME models COMMAND test-models -v --no-gpu) +endif() include(reference-images.cmake) diff --git a/tests/test-image.cpp b/tests/test-image.cpp index 58ae717..b3eaa18 100644 --- a/tests/test-image.cpp +++ b/tests/test-image.cpp @@ -8,7 +8,7 @@ namespace visp { -TEST_CASE(image_formats) { +VISP_TEST(image_formats) { auto formats = std::array{image_format::rgba_u8, image_format::bgra_u8, image_format::argb_u8, image_format::rgb_u8, image_format::alpha_u8, image_format::rgba_f32, image_format::rgb_f32, image_format::alpha_f32}; @@ -36,14 +36,14 @@ TEST_CASE(image_formats) { } } -TEST_CASE(image_load) { +VISP_TEST(image_load) { image_data img = image_load((test_dir().input / "cat-and-hat.jpg").string().c_str()); CHECK(img.extent == i32x2{512, 512}); CHECK(img.format == image_format::rgb_u8); CHECK(n_bytes(img) == 512 * 512 * 3); } -TEST_CASE(image_save) { +VISP_TEST(image_save) { image_data img = image_alloc(i32x2{16, 16}, image_format::rgba_u8); for (int i = 0; i < 16 * 16; ++i) { img.data.get()[i * 4 + 0] = 255; @@ -74,20 +74,20 @@ void test_image_u8_to_f32( CHECK_IMAGES_EQUAL(output, expected); } -TEST_CASE(image_alpha_u8_to_alpha_f32) { +VISP_TEST(image_alpha_u8_to_alpha_f32) { test_image_u8_to_f32( image_format::alpha_u8, image_format::alpha_f32, // std::array{0, 128, 190, 255}, // std::array{0.05f, 0.3f, 0.4225f, 0.55f}); } -TEST_CASE(image_rgb_u8_to_rgb_f32) { +VISP_TEST(image_rgb_u8_to_rgb_f32) { test_image_u8_to_f32( image_format::rgb_u8, image_format::rgb_f32, // std::array{0, 128, 192, 255, 0, 128, 128, 255, 0, 128, 64, 255}, // std::array{ 0.05f, 0.7f, -1.05f, 0.55f, 0.2f, -0.8f, 0.3f, 1.2f, -0.3f, 0.3f, 0.45f, -1.3f}); } -TEST_CASE(image_rgba_u8_to_rgb_f32) { +VISP_TEST(image_rgba_u8_to_rgb_f32) { test_image_u8_to_f32( image_format::rgba_u8, image_format::rgb_f32, // std::array{ @@ -101,7 +101,7 @@ TEST_CASE(image_rgba_u8_to_rgb_f32) { 0.3f, 1.2f, -0.3f, // 0.3f, 0.45f, -1.3f}); } -TEST_CASE(image_rgba_u8_to_rgba_f32) { +VISP_TEST(image_rgba_u8_to_rgba_f32) { test_image_u8_to_f32( image_format::rgba_u8, image_format::rgba_f32, // std::array{ @@ -115,7 +115,7 @@ TEST_CASE(image_rgba_u8_to_rgba_f32) { 0.3f, 1.2f, -0.3f, 0.9f, 0.3f, // 0.45f, -1.3f, 1.4f}); } -TEST_CASE(image_bgra_u8_to_rgb_f32) { +VISP_TEST(image_bgra_u8_to_rgb_f32) { test_image_u8_to_f32( image_format::bgra_u8, image_format::rgb_f32, // std::array{ @@ -129,7 +129,7 @@ TEST_CASE(image_bgra_u8_to_rgb_f32) { 0.3f, 1.2f, -0.3f, // 0.3f, 0.45f, -1.3f}); } -TEST_CASE(image_argb_u8_to_rgb_f32) { +VISP_TEST(image_argb_u8_to_rgb_f32) { test_image_u8_to_f32( image_format::argb_u8, image_format::rgb_f32, // std::array{ @@ -144,7 +144,7 @@ TEST_CASE(image_argb_u8_to_rgb_f32) { 0.3f, 0.45f, -1.3f}); } -TEST_CASE(image_u8_to_f32_tiled_pad) { +VISP_TEST(image_u8_to_f32_tiled_pad) { std::array input_data = {0, 0, 102, 0, 0, 255, 0, 0, 102}; std::array expected_data = {1.0f, 1.0f, 0.4f, 0.4f}; image_view input(i32x2{3, 3}, image_format::alpha_u8, input_data); @@ -159,7 +159,7 @@ TEST_CASE(image_u8_to_f32_tiled_pad) { CHECK_IMAGES_EQUAL(output, expected); } -TEST_CASE(image_alpha_f32_to_alpha_u8) { +VISP_TEST(image_alpha_f32_to_alpha_u8) { std::array input_data{0.0f, 0.3f, 0.4225f, 1.1f}; std::array expected_data = {0, 76, 107, 255}; image_view input(i32x2{2, 2}, image_format::alpha_f32, input_data.data()); @@ -171,7 +171,7 @@ TEST_CASE(image_alpha_f32_to_alpha_u8) { CHECK_IMAGES_EQUAL(output, expected); } -TEST_CASE(image_rgb_f32_to_rgba_u8) { +VISP_TEST(image_rgb_f32_to_rgba_u8) { std::array input_data{0.0f, 0.31f, -0.51f, 1.0f, 0.2f, 1.8f}; std::array expected_data = {0, 79, 0, 255, 255, 51, 255, 255}; image_view input(i32x2{2, 1}, image_format::rgb_f32, input_data.data()); @@ -183,7 +183,7 @@ TEST_CASE(image_rgb_f32_to_rgba_u8) { CHECK_IMAGES_EQUAL(output, expected); } -TEST_CASE(image_scale) { +VISP_TEST(image_scale) { image_data img = image_alloc(i32x2{8, 8}, image_format::rgba_u8); for (int i = 0; i < 8 * 8; ++i) { img.data[i * 4 + 0] = uint8_t(255); @@ -202,7 +202,7 @@ TEST_CASE(image_scale) { } } -TEST_CASE(image_alpha_composite) { +VISP_TEST(image_alpha_composite) { std::array fg_data = {255, 0, 0, 255, 0, 255, 0, 255, // 0, 0, 255, 255, 255, 255, 0, 255}; image_view fg{i32x2{2, 2}, image_format::rgba_u8, fg_data}; @@ -222,7 +222,7 @@ TEST_CASE(image_alpha_composite) { CHECK_IMAGES_EQUAL(output, expected); } -TEST_CASE(image_blur) { +VISP_TEST(image_blur) { constexpr i32x2 extent{6, 6}; // clang-format off std::array input_data = { @@ -252,7 +252,7 @@ TEST_CASE(image_blur) { CHECK_IMAGES_EQUAL(output, expected); } -TEST_CASE(tile_merge) { +VISP_TEST(tile_merge) { std::array, 4> tiles; for (int t = 0; t < 4; ++t) { float v = float(t); @@ -290,7 +290,7 @@ TEST_CASE(tile_merge) { CHECK_IMAGES_EQUAL(dst_span, expected); } -TEST_CASE(tile_merge_blending) { +VISP_TEST(tile_merge_blending) { std::array dst{}; auto dst_span = image_span({22, 19}, dst); diff --git a/tests/test-models.cpp b/tests/test-models.cpp index bb45dc1..531c5ac 100644 --- a/tests/test-models.cpp +++ b/tests/test-models.cpp @@ -1,5 +1,5 @@ -#include "visp/vision.hpp" #include "util/string.hpp" +#include "visp/vision.hpp" #include "testing.hpp" @@ -12,15 +12,14 @@ void compare_images(std::string_view name, image_view result, float tolerance = image_save(result, result_path.string().c_str()); image_data reference = image_load(reference_path.string().c_str()); - test_set_info( - format( - "while comparing images {} and {}", relative(result_path).string(), - relative(reference_path).string())); + test_set_info(format( + "while comparing images {} and {}", relative(result_path).string(), + relative(reference_path).string())); test_with_tolerance with(tolerance); CHECK_IMAGES_EQUAL(result, reference); } -void test_mobile_sam(backend_type bt) { +VISP_BACKEND_TEST(test_mobile_sam)(backend_type bt) { path model_path = test_dir().models / "MobileSAM-F16.gguf"; path input_path = test_dir().input / "cat-and-hat.jpg"; @@ -29,7 +28,7 @@ void test_mobile_sam(backend_type bt) { image_data input = image_load(input_path.string().c_str()); sam_encode(model, input); image_data mask_box = sam_compute(model, box_2d{{180, 110}, {505, 330}}); - image_data mask_point = sam_compute(model, i32x2{200, 300}); + image_data mask_point = sam_compute(model, i32x2{200, 300}); char const* suffix = bt == backend_type::cpu ? "-cpu.png" : "-gpu.png"; float tolerance = bt == backend_type::cpu ? 0.01f : 0.015f; @@ -37,14 +36,7 @@ void test_mobile_sam(backend_type bt) { compare_images(format("mobile_sam-point{}", suffix), mask_point, tolerance); } -TEST_CASE(test_mobile_sam_cpu) { - test_mobile_sam(backend_type::cpu); -} -TEST_CASE(test_mobile_sam_gpu) { - test_mobile_sam(backend_type::gpu); -} - -void test_birefnet(backend_type bt) { +VISP_BACKEND_TEST(test_birefnet)(backend_type bt) { path model_path = test_dir().models / "BiRefNet-lite-F16.gguf"; path input_path = test_dir().input / "wardrobe.jpg"; std::string name = "birefnet"; @@ -55,17 +47,11 @@ void test_birefnet(backend_type bt) { image_data input = image_load(input_path.string().c_str()); image_data output = birefnet_compute(model, input); - compare_images(name, output); + float tolerance = bt == backend_type::cpu ? 0.01f : 0.3f; // TODO: GPU is non-deterministic + compare_images(name, output, tolerance); } -TEST_CASE(test_birefnet_cpu) { - test_birefnet(backend_type::cpu); -} -TEST_CASE(test_birefnet_gpu) { - test_birefnet(backend_type::gpu); -} - -void test_migan(backend_type bt) { +VISP_BACKEND_TEST(test_migan)(backend_type bt) { path model_path = test_dir().models / "MIGAN-512-places2-F16.gguf"; path image_path = test_dir().input / "bench-image.jpg"; path mask_path = test_dir().input / "bench-mask.png"; @@ -82,15 +68,8 @@ void test_migan(backend_type bt) { compare_images(name, composited); } -TEST_CASE(test_migan_cpu) { - test_migan(backend_type::cpu); -} -TEST_CASE(test_migan_gpu) { - test_migan(backend_type::gpu); -} - -void test_esrgan(backend_type bt) { - path model_path = test_dir().models / "RealESRGAN_x4plus_anime_6Bh.gguf"; +VISP_BACKEND_TEST(test_esrgan)(backend_type bt) { + path model_path = test_dir().models / "RealESRGAN-x4plus_anime-6B-F16.gguf"; path input_path = test_dir().input / "vase-and-bowl.jpg"; std::string name = "esrgan"; name += bt == backend_type::cpu ? "-cpu.png" : "-gpu.png"; @@ -103,11 +82,4 @@ void test_esrgan(backend_type bt) { compare_images(name, output); } -TEST_CASE(test_esrgan_cpu) { - test_esrgan(backend_type::cpu); -} -TEST_CASE(test_esrgan_gpu) { - test_esrgan(backend_type::gpu); -} - } // namespace visp \ No newline at end of file diff --git a/tests/testing.cpp b/tests/testing.cpp index fd73e00..1e596bc 100644 --- a/tests/testing.cpp +++ b/tests/testing.cpp @@ -1,4 +1,5 @@ #include "testing.hpp" +#include "visp/ml.hpp" #include #include @@ -10,45 +11,55 @@ namespace visp { // Globals float tolerance = 1e-5f; std::string extra_info; -} +} // namespace visp int main(int argc, char** argv) { - auto& registry = visp::test_registry_instance(); + using namespace visp; + + auto& registry = test_registry_instance(); int passed = 0; int failed = 0; int errors = 0; std::string_view filter; + bool exclude_gpu = false; bool verbose = false; + for (int i = 1; i < argc; ++i) { std::string_view arg(argv[i]); if (arg == "-v" || arg == "--verbose") { verbose = true; + } else if (arg == "--no-gpu") { + exclude_gpu = true; } else { filter = arg; } } - auto time_start = steady_clock::now(); - - for (auto& test : registry.tests) { - if (!filter.empty() && test.name != filter) { - continue; - } + auto run = [&](test_case const& test, char const* name, backend_type backend) { try { - printf(test.name); - fflush(stdout); + if (!filter.empty() && name != filter && test.name != filter) { + return; // test not selected + } + if (verbose) { + printf("%s", name); + fflush(stdout); + } + + if (test.is_backend_test) { + test.backend_func(backend); + } else { + test.func(); + } - test.func(); - ++passed; if (verbose) { - printf(" \033[32mPASSED\033[0m\n", test.name); + printf(" %s\n", "\033[32mPASSED\033[0m"); } } catch (const visp::test_failure& e) { ++failed; - printf(" \033[31mFAILED\033[0m\n", test.name); + printf(" %s\n", "\033[31mFAILED\033[0m"); printf(" \033[90m%s:%d:\033[0m Assertion failed\n", e.file, e.line); printf(" \033[93m%s\033[0m\n", e.condition); if (e.eval) { @@ -59,11 +70,25 @@ int main(int argc, char** argv) { } } catch (const std::exception& e) { ++errors; - printf(" \033[31mERROR\033[0m\n", test.name); + printf(" %s\n", "\033[31mERROR\033[0m"); printf(" \033[90m%s:%d:\033[0m Unhandled exception\n", test.file, test.line); printf(" \033[93m%s\033[0m\n", e.what()); } visp::extra_info.clear(); + }; + + auto time_start = steady_clock::now(); + fixed_string<128> name; + + for (auto& test : registry.tests) { + if (test.is_backend_test) { + run(test, format(name, "{}[cpu]", test.name), backend_type::cpu); + if (!exclude_gpu) { + run(test, format(name, "{}[gpu]", test.name), backend_type::gpu); + } + } else { + run(test, test.name, backend_type::cpu); + } } auto time_end = steady_clock::now(); @@ -71,14 +96,16 @@ int main(int argc, char** argv) { std::chrono::duration_cast(time_end - time_start).count(); char const* color = (failed > 0 || errors > 0) ? "\033[31m" : "\033[32m"; - printf("%s----------------------------------------------------------------------\n", color); + if (verbose || failed > 0 || errors > 0) { + printf("%s----------------------------------------------------------------------\n", color); + } if (failed > 0) { printf("\033[31m%d failed, ", failed); } if (errors > 0) { printf("\033[31m%d errors, ", errors); } - printf("\033[92m%d passed %sin %lldms\033[0m\n", passed, color, duration); + printf("\033[92m%d passed %sin %lldms\033[0m\n", passed, color, (long long)duration); return (failed > 0 || errors > 0) ? 1 : 0; } @@ -92,7 +119,24 @@ test_registry& test_registry_instance() { test_registration::test_registration( char const* name, test_function f, char const* file, int line) { - test_registry_instance().tests.push_back({name, f, file, line}); + test_case t; + t.name = name; + t.file = file; + t.line = line; + t.func = f; + t.is_backend_test = false; + test_registry_instance().tests.push_back(t); +} + +test_registration::test_registration( + char const* name, test_backend_function f, char const* file, int line) { + test_case t; + t.name = name; + t.file = file; + t.line = line; + t.backend_func = f; + t.is_backend_test = true; + test_registry_instance().tests.push_back(t); } test_directories const& test_dir() { @@ -130,9 +174,7 @@ float& test_tolerance_value() { test_failure test_failure_image_mismatch( char const* file, int line, char const* condition, float rms) { test_failure result(file, line, condition); - format( - result.eval, "-> rmse {:.5f} > {:.5f} tolerance", rms, - test_tolerance_value()); + format(result.eval, "-> rmse {:.5f} > {:.5f} tolerance", rms, test_tolerance_value()); return result; } diff --git a/tests/testing.hpp b/tests/testing.hpp index 03f6b44..b074cd6 100644 --- a/tests/testing.hpp +++ b/tests/testing.hpp @@ -6,6 +6,7 @@ #include namespace visp { +enum class backend_type; struct test_failure { char const* file; @@ -18,12 +19,17 @@ struct test_failure { }; using test_function = void (*)(); +using test_backend_function = void (*)(backend_type); struct test_case { char const* name; - test_function func; char const* file; int line; + bool is_backend_test; + union { + test_function func; + test_backend_function backend_func; + }; }; struct test_registry { @@ -34,6 +40,7 @@ test_registry& test_registry_instance(); struct test_registration { test_registration(char const* name, test_function f, char const* file, int line); + test_registration(char const* name, test_backend_function f, char const* file, int line); }; using std::filesystem::path; @@ -66,8 +73,9 @@ template bool test_is_equal(T const& a, T const& b) { if constexpr (std::is_floating_point_v) { return std::abs(a - b) <= test_tolerance_value(); + } else { + return a == b; } - return a == b; } template @@ -87,11 +95,16 @@ test_failure test_failure_image_mismatch(char const* file, int line, char const* } // namespace visp -#define TEST_CASE(name) \ +#define VISP_TEST(name) \ void test_func_##name(); \ const visp::test_registration test_reg_##name(#name, test_func_##name, __FILE__, __LINE__); \ void test_func_##name() +#define VISP_BACKEND_TEST(name) \ + void test_func_##name(visp::backend_type); \ + const visp::test_registration test_reg_##name(#name, test_func_##name, __FILE__, __LINE__); \ + void test_func_##name + #define CHECK(...) \ if (!(__VA_ARGS__)) { \ throw visp::test_failure(__FILE__, __LINE__, #__VA_ARGS__); \ diff --git a/tests/workbench.cpp b/tests/workbench.cpp index 22f4149..bd2daf8 100644 --- a/tests/workbench.cpp +++ b/tests/workbench.cpp @@ -348,7 +348,7 @@ DEF(biref_decode)(model_ref m, span input, param_dict const& p) { // MI-GAN DEF(migan_lrelu_agc)(model_ref m, span input, param_dict const& p) { - return {migan::lrelu_agc(m, input[0], 0.2f, std::sqrtf(2), 1.0f)}; + return {migan::lrelu_agc(m, input[0], 0.2f, std::sqrt(2), 1.0f)}; } DEF(migan_downsample_2d)(model_ref m, span input, param_dict const& p) { @@ -427,7 +427,7 @@ param_dict build_dict(span raw_params) { param.type = param_type::string; param.value.s = raw.value; break; - default: throw error("Unknown parameter type"); + default: throw except("Unknown parameter type"); } dict.params.push_back(param); } @@ -509,7 +509,7 @@ test_case const& workbench_find_test(std::string_view name) { if (it != w.tests.end()) { return *it; } - throw error("Test case not found: {}", name); + throw except("Test case not found: {}", name); } void workbench_run(