From bf23dfffad27a75c4f52d0b3343692be6adb3ae3 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Mon, 16 Dec 2024 21:17:10 -0500 Subject: [PATCH 1/8] Fix release publishing workflow --- .github/workflows/pip.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index c83cff317555..4d49f010a4c0 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -8,7 +8,7 @@ on: push: branches: [ main, build/pip-packaging ] release: - types: [ created ] + types: [ published ] env: # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 @@ -253,7 +253,7 @@ jobs: - uses: pypa/gh-action-pypi-publish@release/v1 if: github.event_name == 'push' && github.ref_name == 'main' with: - repository_url: https://test.pypi.org/legacy/ + repository-url: https://test.pypi.org/legacy/ - uses: pypa/gh-action-pypi-publish@release/v1 if: github.event_name == 'release' && github.event.action == 'published' From d1cbf812a04b8c785b306fe1dd872ac2c047a307 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Mon, 16 Dec 2024 21:23:51 -0500 Subject: [PATCH 2/8] Try allowing manual workflow dispatch --- .github/workflows/pip.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 4d49f010a4c0..ab3abf6ec6bd 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -9,6 +9,7 @@ on: branches: [ main, build/pip-packaging ] release: types: [ published ] + workflow_dispatch: env: # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 From 1bc9bad4a1dc39043fb161fb40751334b816d6a9 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Mon, 16 Dec 2024 21:26:37 -0500 Subject: [PATCH 3/8] Try edited just for now --- .github/workflows/pip.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index ab3abf6ec6bd..1d02f94ab9fd 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -8,8 +8,7 @@ on: push: branches: [ main, build/pip-packaging ] release: - types: [ published ] - workflow_dispatch: + types: [ published, edited ] env: # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 From af06d04a4d03dc928850e6b4e0251b829dc41b83 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Mon, 16 Dec 2024 21:35:09 -0500 Subject: [PATCH 4/8] Try any sort of release scenario --- .github/workflows/pip.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 1d02f94ab9fd..6276cc426910 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -8,7 +8,6 @@ on: push: branches: [ main, build/pip-packaging ] release: - types: [ published, edited ] env: # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 @@ -256,4 +255,4 @@ jobs: repository-url: https://test.pypi.org/legacy/ - uses: pypa/gh-action-pypi-publish@release/v1 - if: github.event_name == 'release' && github.event.action == 'published' + if: github.event_name == 'release' From 4c70f67e69182e7367c49497bba397337896c2c6 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Mon, 16 Dec 2024 21:38:09 -0500 Subject: [PATCH 5/8] Try a push just for this branch? --- .github/workflows/pip.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 6276cc426910..f2a61bbe7855 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -6,8 +6,9 @@ name: Build PyPI package on: push: - branches: [ main, build/pip-packaging ] + branches: [ main, release/*.x ] release: + types: [ published ] env: # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 @@ -255,4 +256,4 @@ jobs: repository-url: https://test.pypi.org/legacy/ - uses: pypa/gh-action-pypi-publish@release/v1 - if: github.event_name == 'release' + if: github.event_name == 'release' && github.event.action == 'published' From ac2fc94af3edf97a8aca6ef68a5e8c53c1be0844 Mon Sep 17 00:00:00 2001 From: Alex Reinking Date: Mon, 16 Dec 2024 21:45:24 -0500 Subject: [PATCH 6/8] Make sure the release actually runs? --- .github/workflows/pip.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index f2a61bbe7855..a930a82805b6 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -256,4 +256,6 @@ jobs: repository-url: https://test.pypi.org/legacy/ - uses: pypa/gh-action-pypi-publish@release/v1 - if: github.event_name == 'release' && github.event.action == 'published' + if: > + github.event_name == 'release' && github.event.action == 'published' || + github.event_name == 'push' && (github.ref_name == 'release/19.x' || github.ref_name == 'v19.0.0') From 1a6a9a7781c657fe8e1545d5f627e430afbafa95 Mon Sep 17 00:00:00 2001 From: Cesare Mercurio Date: Sun, 24 Aug 2025 10:14:07 -0700 Subject: [PATCH 7/8] Vulkan : Extend VulkanRuntime APIs to allow using internal halide memory allocator with custom Vulkan context Summary: In a situation where you'd want to set your own context (aka: you are integrating Halide in a system or app owning already Vulkan resources), the existing APIs expose the halide memory allocator as opaque handle, which does not allow an easy plug & play with a different allocator (like VMA). The changes proposed in this patch, would allow to set your own vkDevice, vkInstance, vkQueue, vkPhysicalDevice directly into Halide and use its own memory allocator, allowing integration in existing apps/systems. The behavior changed is: 1. use halide_vulkan_acquire_context with *allocator set to nullptr. (need to override the function). 2. use halide_vulkan_export_memory_allocator to retrieve the instance of the allocator and save it in your app. 3. use halide_vulkan_memory_allocator_release to properly release shader and allocator resources. (need to override). Test Plan: Simple app for testing provided in the apps folder: apps/vulkan_wrap_native_to_halide --- src/runtime/HalideRuntimeVulkan.h | 16 ++++++++++ src/runtime/vulkan.cpp | 27 ++++++++++++++++ src/runtime/vulkan_context.h | 51 +++++++++++++++++++++++++++++++ src/runtime/vulkan_internal.h | 6 ++++ 4 files changed, 100 insertions(+) diff --git a/src/runtime/HalideRuntimeVulkan.h b/src/runtime/HalideRuntimeVulkan.h index e150b7c6d00b..d49f559fc531 100644 --- a/src/runtime/HalideRuntimeVulkan.h +++ b/src/runtime/HalideRuntimeVulkan.h @@ -105,6 +105,22 @@ extern int halide_vulkan_release_context(void *user_context, VkDevice device, VkQueue queue, VkDebugUtilsMessengerEXT messenger); + +// - halide_vulkan_export_memory_allocator +// exports the internally allocated memory allocator in case the user wants to just set +// up their own context but use Halide's memory allocator. Must have overridden halide_vulkan_acquire_context +// and halide_vulkan_release_context. Must override also halide_vulkan_export_memory_allocator. Use same global spin +// lock to protect access to the allocator. This allows to save the allocator for future halide_vulkan_acquire_context calls +// halide will automatically issue to retrieve custom context. +extern int halide_vulkan_export_memory_allocator(void *user_context, + struct halide_vulkan_memory_allocator *allocator); +// - halide_vulkan_memory_allocator_release +// releases the internally allocated memory allocator, important for proper memory cleanup. Must have overridden halide_vulkan_acquire_context +// and halide_vulkan_release_context. Must also use the same global spin lock to protect access to the allocator. +extern int halide_vulkan_memory_allocator_release(void *user_context, + struct halide_vulkan_memory_allocator *allocator, + VkInstance instance, + VkDebugUtilsMessengerEXT messenger); // -- // Override the default allocation callbacks (default uses Vulkan runtime implementation) diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 776088448891..2da946c86673 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -86,6 +86,18 @@ WEAK bool halide_vulkan_is_initialized() { return is_initialized; } +WEAK int halide_vulkan_export_memory_allocator(void *user_context, halide_vulkan_memory_allocator *allocator) { + halide_mutex_lock(&thread_lock); + halide_error_code_t status = halide_error_code_success; + if (allocator == nullptr) { + halide_mutex_unlock(&thread_lock); + error(user_context) << "Vulkan: Memory allocator is null!\n"; + status = halide_error_code_buffer_argument_is_null; + } + halide_mutex_unlock(&thread_lock); + return status; +} + WEAK int halide_vulkan_device_free(void *user_context, halide_buffer_t *halide_buffer) { debug(user_context) << "halide_vulkan_device_free (user_context: " << user_context @@ -253,6 +265,21 @@ WEAK int halide_vulkan_device_release(void *user_context) { return destroy_status; } +WEAK int halide_vulkan_memory_allocator_release(void *user_context, + struct halide_vulkan_memory_allocator *allocator, + VkInstance instance, + VkDebugUtilsMessengerEXT messenger) { + debug(user_context) << "halide_vulkan_memory_allocator_release (user_context: " << user_context << ")\n"; + // Destroy the context if we created it + if (allocator == nullptr) { + error(user_context) << "Vulkan: Memory allocator is null!\n"; + return halide_error_code_buffer_argument_is_null; + } + + return vk_release_memory_allocator(user_context, (VulkanMemoryAllocator *)allocator, + instance, messenger); +} + WEAK int halide_vulkan_device_malloc(void *user_context, halide_buffer_t *buf) { debug(user_context) << "halide_vulkan_device_malloc (user_context: " << user_context diff --git a/src/runtime/vulkan_context.h b/src/runtime/vulkan_context.h index 9e82b9fb5b64..1a45af8da0ea 100644 --- a/src/runtime/vulkan_context.h +++ b/src/runtime/vulkan_context.h @@ -58,6 +58,43 @@ class VulkanContext { error = halide_error_code_device_interface_no_device; halide_error_no_device_interface(user_context); } + // If user overrode halide_vulkan_acquire_context and returned nullptr for allocator, + // create Halide's allocator for the provided device. User must override `halide_vulkan_export_memory_allocator` + // and make sure to propagate it back at the next call of `halide_vulkan_acquire_context` as he overrides it. + if (allocator == nullptr && + instance != VK_NULL_HANDLE && + device != VK_NULL_HANDLE && + physical_device != VK_NULL_HANDLE) { +#ifdef DEBUG_RUNTIME + // Initialize clock for debug timing - normally done in halide_vulkan_acquire_context + halide_start_clock(user_context); +#endif + // make sure halide vulkan is loaded BEFORE creating allocator + debug(user_context) << "VulkanContext: Loading Vulkan function pointers for context override...\n"; + + vk_load_vulkan_loader_functions(user_context); + if (vkGetInstanceProcAddr == nullptr) { + debug(user_context) << "VulkanContext: Failed to load vkGetInstanceProcAddr from loader!\n"; + } else { + debug(user_context) << "VulkanContext: vkGetInstanceProcAddr loaded successfully: " << (void*)vkGetInstanceProcAddr << "\n"; + vk_load_vulkan_instance_functions(user_context, instance); + vk_load_vulkan_device_functions(user_context, device); + } + + allocator = vk_create_memory_allocator(user_context, device, physical_device, + halide_vulkan_get_allocation_callbacks(user_context)); + if (allocator == nullptr) { + error = halide_error_code_out_of_memory; + debug(user_context) << "Vulkan: Failed to create memory allocator for device!\n"; + return; + } + int result = halide_vulkan_export_memory_allocator(user_context, reinterpret_cast(allocator)); + if (result != halide_error_code_success) { + error = static_cast(result); + debug(user_context) << "Vulkan: Failed to export memory allocator for device!\n"; + return; + } + } halide_debug_assert(user_context, allocator != nullptr); halide_debug_assert(user_context, instance != VK_NULL_HANDLE); halide_debug_assert(user_context, device != VK_NULL_HANDLE); @@ -560,6 +597,20 @@ int vk_destroy_context(void *user_context, VulkanMemoryAllocator *allocator, return halide_error_code_success; } +// Clean up only Halide's internal resources for external context (leaves device/instance alone) +int vk_release_memory_allocator(void *user_context, VulkanMemoryAllocator *allocator, + VkInstance instance, VkDebugUtilsMessengerEXT messenger) { + debug(user_context) << "vk_release_memory_allocator (user_context: " << user_context << ")\n"; + // Clean up only Halide's internal resources, not the device/instance we don't own + if (allocator != nullptr) { + vk_destroy_shader_modules(user_context, allocator); + vk_destroy_memory_allocator(user_context, allocator); + vk_destroy_debug_utils_messenger(user_context, instance, allocator, messenger); + } + + return halide_error_code_success; +} + // -------------------------------------------------------------------------- VKAPI_ATTR VkBool32 VKAPI_CALL vk_debug_utils_messenger_callback( diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h index aeef545385cc..af56fb136cfc 100644 --- a/src/runtime/vulkan_internal.h +++ b/src/runtime/vulkan_internal.h @@ -66,6 +66,12 @@ int vk_destroy_context( VkPhysicalDevice physical_device, VkQueue queue); +int vk_release_memory_allocator( + void *user_context, + VulkanMemoryAllocator *allocator, + VkInstance instance, + VkDebugUtilsMessengerEXT messenger); + int vk_find_compute_capability(void *user_context, int *major, int *minor); int vk_create_instance(void *user_context, const StringTable &requested_layers, VkInstance *instance, const VkAllocationCallbacks *alloc_callbacks); From 9041848759bc78e3cc712771b24c3cee35dc63ad Mon Sep 17 00:00:00 2001 From: Cesare Mercurio Date: Sun, 24 Aug 2025 15:08:07 -0700 Subject: [PATCH 8/8] Vulkan : wrap native vulkan demo app Summary: To test out the flow of the functions and verify correctness. Test Plan: Build Halide with patch shaid: 1a6a9a7781c657fe8e1545d5f627e430afbafa95 Instructions to build: cmake -G Ninja -S . -B build \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_OSX_ARCHITECTURES=arm64 \ -DCMAKE_OSX_SYSROOT="$(xcrun --sdk macosx --show-sdk-path)" \ -DLLVM_DIR="$(brew --prefix llvm@19)/lib/cmake/llvm" \ -DHalide_WASM_BACKEND=wabt \ -DWITH_PYTHON_BINDINGS=OFF \ -DWITH_TUTORIALS=OFF \ -DBUILD_SHARED_LIBS=OFF \ -DHalide_BUNDLE_STATIC=ON Then build: cmake --build build -j32 Then install: cmake --install build --prefix "$PWD/install" Then build the test app but first modify hardcoded paths in CMakeLists.txt cd apps/external_vk_demo mkdir build cd build cmake ../ make ./demo_main --- apps/external_vk_demo/CMakeLists.txt | 124 ++++ apps/external_vk_demo/convert_generator.cc | 36 + apps/external_vk_demo/demo_main.cpp | 83 +++ .../external_halide_override.cpp | 180 +++++ .../external_halide_override.h | 48 ++ .../vk_buffer_wrap_halide_defs.h | 62 ++ apps/external_vk_demo/vulkan_app.cpp | 661 ++++++++++++++++++ apps/external_vk_demo/vulkan_app.h | 74 ++ 8 files changed, 1268 insertions(+) create mode 100644 apps/external_vk_demo/CMakeLists.txt create mode 100644 apps/external_vk_demo/convert_generator.cc create mode 100644 apps/external_vk_demo/demo_main.cpp create mode 100644 apps/external_vk_demo/external_halide_override.cpp create mode 100644 apps/external_vk_demo/external_halide_override.h create mode 100644 apps/external_vk_demo/vk_buffer_wrap_halide_defs.h create mode 100644 apps/external_vk_demo/vulkan_app.cpp create mode 100644 apps/external_vk_demo/vulkan_app.h diff --git a/apps/external_vk_demo/CMakeLists.txt b/apps/external_vk_demo/CMakeLists.txt new file mode 100644 index 000000000000..8704fd543653 --- /dev/null +++ b/apps/external_vk_demo/CMakeLists.txt @@ -0,0 +1,124 @@ +cmake_minimum_required(VERSION 3.16) +project(external_vk_demo) + +enable_testing() + +# Set up language settings +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS NO) + +# Define Halide installation path +# I've compiled it with the changes like the following: +# cmake -G Ninja -S . -B build \ +# -DCMAKE_BUILD_TYPE=Debug \ +# -DCMAKE_OSX_ARCHITECTURES=arm64 \ +# -DCMAKE_OSX_SYSROOT="$(xcrun --sdk macosx --show-sdk-path)" \ +# -DLLVM_DIR="$(brew --prefix llvm@19)/lib/cmake/llvm" \ +# -DHalide_WASM_BACKEND=wabt \ +# -DWITH_PYTHON_BINDINGS=OFF \ +# -DWITH_TUTORIALS=OFF \ +# -DBUILD_SHARED_LIBS=OFF \ +# -DHalide_BUNDLE_STATIC=ON +# Then build: +# cmake --build build -j32 +# Then install: +# cmake --install build --prefix "$PWD/install" +set(HALIDE_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../install) + +# Create our own Halide targets exactly like the Bazel BUILD file + +# Halide runtime headers (equivalent to ":runtime") +add_library(Halide_Runtime INTERFACE) +target_include_directories(Halide_Runtime INTERFACE ${HALIDE_INSTALL_DIR}/include) + +# Halide static library (equivalent to ":lib_halide_static") +add_library(Halide_Static STATIC IMPORTED) +# FIX paths they are hardcoded to my machine +set_target_properties(Halide_Static PROPERTIES + IMPORTED_LOCATION ${HALIDE_INSTALL_DIR}/lib/libHalide.a + INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INSTALL_DIR}/include + INTERFACE_LINK_LIBRARIES "-L/opt/homebrew/opt/llvm@19/lib;-lLLVM-19;-L/opt/homebrew/Cellar/wabt/1.0.37/lib;-lwabt;-lwasm-rt-impl;-L/opt/homebrew/Cellar/lld@19/19.1.7/lib;-llldWasm;-llldCommon;-framework Foundation;-framework CoreFoundation;-framework CoreGraphics;-framework CoreVideo;-framework IOKit;-framework AppKit;-lz;-ldl;-lpthread;-lcurses;-lffi" +) +target_link_libraries(Halide_Static INTERFACE Halide_Runtime) + +# Halide GenGen library (equivalent to ":gengen") +add_library(Halide_GenGen STATIC IMPORTED) +set_target_properties(Halide_GenGen PROPERTIES + IMPORTED_LOCATION ${HALIDE_INSTALL_DIR}/lib/libHalide_GenGen.a + INTERFACE_INCLUDE_DIRECTORIES "${HALIDE_INSTALL_DIR}/include;${HALIDE_INSTALL_DIR}/share/Halide/tools" + # Force load the GenGen library (equivalent to alwayslink = True) + INTERFACE_LINK_LIBRARIES "-Wl,-force_load,${HALIDE_INSTALL_DIR}/lib/libHalide_GenGen.a" +) +target_link_libraries(Halide_GenGen INTERFACE Halide_Static) + +# Find Vulkan +find_package(Vulkan REQUIRED) + +# Generator executable - create it manually like Bazel does +add_executable(convert_generator convert_generator.cc) +target_link_libraries(convert_generator + PRIVATE + Halide_GenGen + Vulkan::Vulkan) + +# Manual AOT compilation using custom command (following Makefile pattern) +# Note: Requires Vulkan library to be in your path. Set environment before building: +# On macOS: export DYLD_LIBRARY_PATH=/path/to/vulkan/lib:$DYLD_LIBRARY_PATH +# On Linux: export LD_LIBRARY_PATH=/path/to/vulkan/lib:$LD_LIBRARY_PATH +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.a ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.h + COMMAND $ -g convert_generator -o ${CMAKE_CURRENT_BINARY_DIR} target=host-vulkan-vk_int8 + DEPENDS convert_generator + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating Vulkan AOT library (ensure Vulkan is in library path)" +) + +# Create custom target and library from generated files +add_custom_target(convert_aot_files DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.a ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.h) +add_library(convert_aot STATIC IMPORTED) +set_target_properties(convert_aot PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.a) +add_dependencies(convert_aot convert_aot_files) +target_link_libraries(convert_aot INTERFACE Vulkan::Vulkan Halide_Static) + +# External Halide override library +add_library(external_halide_override + external_halide_override.cpp + external_halide_override.h) +target_link_libraries(external_halide_override + PRIVATE + convert_aot + Halide_Static + Halide_GenGen + Vulkan::Vulkan) + +# Vulkan app library +add_library(vulkan_app + vulkan_app.cpp + vulkan_app.h) +target_include_directories(vulkan_app PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${HALIDE_INSTALL_DIR}/share/tools) +add_dependencies(vulkan_app convert_aot_files) # Ensure generated header exists +target_link_libraries(vulkan_app + PRIVATE + Halide_Static + Vulkan::Vulkan + external_halide_override) + +# Simple test executable to verify image loading +add_executable(demo_main demo_main.cpp) +target_include_directories(demo_main PRIVATE ${HALIDE_INSTALL_DIR}/share/tools) +target_link_libraries(demo_main + PRIVATE + vulkan_app + external_halide_override + convert_aot + Halide_Static + Halide_GenGen + Vulkan::Vulkan) + +# Did not try the following, only used MacOS, on Windows we will prob fail as we override WEAK +if (NOT WIN32) + target_link_libraries(external_halide_override PRIVATE dl pthread) + target_link_libraries(vulkan_app PRIVATE dl pthread) + target_link_libraries(demo_main PRIVATE dl pthread) +endif () \ No newline at end of file diff --git a/apps/external_vk_demo/convert_generator.cc b/apps/external_vk_demo/convert_generator.cc new file mode 100644 index 000000000000..00831debd859 --- /dev/null +++ b/apps/external_vk_demo/convert_generator.cc @@ -0,0 +1,36 @@ +#include "Halide.h" + +using namespace Halide; + +// RGB to Grayscale conversion generator for AOT compilation +class ConvertGenerator : public Halide::Generator { +public: + Input> input{"input"}; + Output> output{"output"}; + + void generate() { + Var x("x"), y("y"); + input.dim(0).set_stride(3); + input.dim(2).set_stride(1); + input.dim(2).set_bounds(0, 3); + + // RGB to Grayscale conversion using standard luminance formula + output(x, y) = cast( + 0.299f * cast(input(x, y, 0)) + // Red + 0.587f * cast(input(x, y, 1)) + // Green + 0.114f * cast(input(x, y, 2)) // Blue + ); + // Schedule for target + // dumb scheduling + if (get_target().has_feature(Target::Vulkan)) { + // GPU scheduling for Vulkan + Var xi("xi"), yi("yi"); + output.gpu_tile(x, y, xi, yi, 16, 16); + } else { + // CPU scheduling + output.vectorize(x, 8); + } + } +}; + +HALIDE_REGISTER_GENERATOR(ConvertGenerator, convert_generator) \ No newline at end of file diff --git a/apps/external_vk_demo/demo_main.cpp b/apps/external_vk_demo/demo_main.cpp new file mode 100644 index 000000000000..157c873afb30 --- /dev/null +++ b/apps/external_vk_demo/demo_main.cpp @@ -0,0 +1,83 @@ +#include "vulkan_app.h" + +#include + +int main() { + std::cout << "External Vulkan Demo\n"; + + // Test synthetic image generation first + auto img = loadTestImage(); + + std::cout << "Image loaded successfully!\n"; + std::cout << "Dimensions: " << img.width() << "x" << img.height() << "x" + << img.channels() << std::endl; + + // Check some pixel values to verify the image is properly loaded + std::cout << "Sample pixel values at (10,10): "; + std::cout << "R=" << static_cast(img(10, 10, 0)) << " "; + std::cout << "G=" << static_cast(img(10, 10, 1)) << " "; + std::cout << "B=" << static_cast(img(10, 10, 2)) << std::endl; + + // Try to initialize Vulkan context + std::cout << "\nTesting Vulkan context initialization...\n"; + if (!initializeVulkanContext()) { + std::cout << "Vulkan not available - skipping VkBuffer allocation test\n"; + std::cout << "Image loading test passed!\n"; + return 0; + } + + // Test VkBuffer allocation + std::cout << "Testing VkBuffer allocation...\n"; + if (!allocateVkBuffersForImage(img)) { + std::cerr << "Failed to allocate VkBuffers\n"; + cleanupVulkan(); + return -1; + } + + // Test VkBuffer wrapping with Halide + std::cout << "\nTesting VkBuffer wrapping with Halide...\n"; + auto vk_input = wrapVkBufferInput(img); + if (!vk_input.raw_buffer() || !vk_input.raw_buffer()->device_interface) { + std::cerr << "Failed to wrap input VkBuffer\n"; + cleanupVulkan(); + return -1; + } + + auto vk_output = wrapVkBufferOutput(img); + if (!vk_output.raw_buffer() || !vk_output.raw_buffer()->device_interface) { + std::cerr << "Failed to wrap output VkBuffer\n"; + cleanupVulkan(); + return -1; + } + + std::cout << "Successfully created wrapped Halide buffers:\n"; + std::cout << " Input: " << vk_input.width() << "x" << vk_input.height() + << "x" << vk_input.channels() << "\n"; + std::cout << " Output: " << vk_output.width() << "x" << vk_output.height() + << "\n"; + + // Copy host image data to wrapped VkBuffer input + std::cout << "\nCopying host image data to VkBuffer...\n"; + if (!copyHostDataToVkBuffer(img, vk_input)) { + std::cerr << "Failed to copy host data to VkBuffer\n"; + cleanupVulkan(); + return -1; + } + std::cout << "Successfully copied host image data to VkBuffer!\n"; + + // Execute RGB to grayscale conversion using wrapped buffers + std::cout << "\nExecuting RGB to grayscale conversion...\n"; + if (!executeConversionWithWrappedBuffers(vk_input, vk_output)) { + std::cerr << "Failed to execute conversion with wrapped buffers\n"; + cleanupVulkan(); + return -1; + } + std::cout << "Successfully executed RGB to grayscale conversion!\n"; + + std::cout << "\nAll tests passed!\n"; + + // Cleanup + cleanupVulkan(); + std::cout << "Cleaned up Vulkan resources\n"; + return 0; +} \ No newline at end of file diff --git a/apps/external_vk_demo/external_halide_override.cpp b/apps/external_vk_demo/external_halide_override.cpp new file mode 100644 index 000000000000..730afcd795d6 --- /dev/null +++ b/apps/external_vk_demo/external_halide_override.cpp @@ -0,0 +1,180 @@ +#include "external_halide_override.h" + +#include "HalideRuntimeVulkan.h" +#include "vk_buffer_wrap_halide_defs.h" + +#include +#include + +// Global state to track the external context and memory allocator +static struct { + std::atomic initialized{false}; + VkInstance instance = VK_NULL_HANDLE; + VkDevice device = VK_NULL_HANDLE; + VkPhysicalDevice physical_device = VK_NULL_HANDLE; + VkQueue queue = VK_NULL_HANDLE; + uint32_t queue_family_index = 0; + + // Memory allocator management + struct halide_vulkan_memory_allocator* allocator = nullptr; + std::atomic allocator_saved{false}; + + // Global spin lock for thread safety (simple atomic flag) + std::atomic_flag allocator_lock = ATOMIC_FLAG_INIT; +} g_external_context; + +extern "C" { + +int halide_vulkan_acquire_context( + void* user_context, struct halide_vulkan_memory_allocator** allocator, + VkInstance* instance, VkDevice* device, VkPhysicalDevice* physical_device, + VkQueue* queue, uint32_t* queue_family_index, + VkDebugUtilsMessengerEXT* messenger, bool create) { + + std::cout << "halide_vulkan_acquire_context called (create=" << create + << ")\n"; + + if (!g_external_context.initialized.load()) { + return 0; + } + + // Acquire global spin lock + while (g_external_context.allocator_lock.test_and_set( + std::memory_order_acquire)) { + // Spin wait + } + + // Provide application's Vulkan context to Halide + *instance = g_external_context.instance; + *device = g_external_context.device; + *physical_device = g_external_context.physical_device; + *queue = g_external_context.queue; + *queue_family_index = g_external_context.queue_family_index; + *messenger = VK_NULL_HANDLE; + + // Use saved allocator if we have one, otherwise let Halide create one + if (g_external_context.allocator_saved.load() && + g_external_context.allocator != nullptr) { + *allocator = g_external_context.allocator; + std::cout << "Using saved Halide memory allocator\n"; + } else { + *allocator = nullptr; + std::cout << "Letting Halide create new memory allocator\n"; + } + + // Release global spin lock + g_external_context.allocator_lock.clear(std::memory_order_release); + + std::cout << "Provided external Vulkan context to Halide\n"; + return 0; +} + +int halide_vulkan_release_context(void* user_context, VkInstance instance, + VkDevice device, VkQueue queue, + VkDebugUtilsMessengerEXT messenger) { + + std::cout << "halide_vulkan_release_context called\n"; + // Application retains ownership of context - nothing to release + return 0; +} + +int halide_vulkan_export_memory_allocator( + void* user_context, struct halide_vulkan_memory_allocator* allocator) { + std::cout << "halide_vulkan_export_memory_allocator called\n"; + + if (allocator == nullptr) { + std::cerr << "Error: Received null allocator in export_memory_allocator\n"; + return -1; + } + + // Acquire global spin lock + while (g_external_context.allocator_lock.test_and_set( + std::memory_order_acquire)) { + // Spin wait + } + + // Save the allocator for future acquire_context calls + g_external_context.allocator = allocator; + g_external_context.allocator_saved.store(true); + + // Release global spin lock + g_external_context.allocator_lock.clear(std::memory_order_release); + + std::cout << "Successfully saved Halide memory allocator for reuse\n"; + return 0; +} + +void register_external_vulkan_context(VkInstance instance, VkDevice device, + VkPhysicalDevice physical_device, + VkQueue queue, + uint32_t queue_family_index) { + + g_external_context.instance = instance; + g_external_context.device = device; + g_external_context.physical_device = physical_device; + g_external_context.queue = queue; + g_external_context.queue_family_index = queue_family_index; + g_external_context.initialized = true; + + std::cout << "Registered external Vulkan context with Halide\n"; +} + +void unregister_external_vulkan_context() { + std::cout << "Unregistering external Vulkan context from Halide...\n"; + + // Acquire global spin lock + while (g_external_context.allocator_lock.test_and_set( + std::memory_order_acquire)) { + // Spin wait + } + + // Release the memory allocator if we have one + if (g_external_context.allocator_saved.load() && + g_external_context.allocator != nullptr) { + std::cout << "Releasing Halide memory allocator...\n"; + int result = halide_vulkan_memory_allocator_release( + nullptr, g_external_context.allocator, g_external_context.instance, + VK_NULL_HANDLE); + if (result != 0) { + std::cerr << "Warning: Failed to release memory allocator, error code: " + << result << "\n"; + } else { + std::cout << "Successfully released Halide memory allocator\n"; + } + + g_external_context.allocator = nullptr; + g_external_context.allocator_saved.store(false); + } + + // Clear context + g_external_context.initialized.store(false); + g_external_context.instance = VK_NULL_HANDLE; + g_external_context.device = VK_NULL_HANDLE; + g_external_context.physical_device = VK_NULL_HANDLE; + g_external_context.queue = VK_NULL_HANDLE; + g_external_context.queue_family_index = 0; + + // Release global spin lock + g_external_context.allocator_lock.clear(std::memory_order_release); + + std::cout << "Unregistered external Vulkan context from Halide\n"; +} + +int halide_vulkan_detach_vk_buffer(void* user_context, halide_buffer_t* buf) { + if (buf->device == 0) { + return halide_error_code_success; + } + if (buf->device_interface != halide_vulkan_device_interface()) { + printf( + "Error: detach called on buffer with incompatible device interface: %p " + "vs %p\n", + buf->device_interface, halide_vulkan_device_interface()); + return halide_error_code_incompatible_device_interface; + } + auto* region = reinterpret_cast(buf->device); + region->is_owner = false; + region->handle = nullptr; + return halide_error_code_success; +} + +} // extern "C" diff --git a/apps/external_vk_demo/external_halide_override.h b/apps/external_vk_demo/external_halide_override.h new file mode 100644 index 000000000000..f0133ec77074 --- /dev/null +++ b/apps/external_vk_demo/external_halide_override.h @@ -0,0 +1,48 @@ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// External Vulkan context override functions for Halide +// These functions allow applications to provide their own Vulkan context +// instead of letting Halide create its own + +int halide_vulkan_acquire_context( + void* user_context, struct halide_vulkan_memory_allocator** allocator, + VkInstance* instance, VkDevice* device, VkPhysicalDevice* physical_device, + VkQueue* queue, uint32_t* queue_family_index, + VkDebugUtilsMessengerEXT* messenger, bool create); + +int halide_vulkan_release_context(void* user_context, VkInstance instance, + VkDevice device, VkQueue queue, + VkDebugUtilsMessengerEXT messenger); + +int halide_vulkan_export_memory_allocator( + void* user_context, struct halide_vulkan_memory_allocator* allocator); + +int halide_vulkan_memory_allocator_release( + void* user_context, struct halide_vulkan_memory_allocator* allocator, + VkInstance instance, VkDebugUtilsMessengerEXT messenger); + +// Buffer wrapping functions +int halide_vulkan_wrap_vk_buffer(void* user_context, + struct halide_buffer_t* buf, + uint64_t vk_buffer); + +int halide_vulkan_detach_vk_buffer(void* user_context, + struct halide_buffer_t* buf); + +// Application interface to register Vulkan context +void register_external_vulkan_context(VkInstance instance, VkDevice device, + VkPhysicalDevice physical_device, + VkQueue queue, + uint32_t queue_family_index); + +void unregister_external_vulkan_context(); + +#ifdef __cplusplus +} +#endif diff --git a/apps/external_vk_demo/vk_buffer_wrap_halide_defs.h b/apps/external_vk_demo/vk_buffer_wrap_halide_defs.h new file mode 100644 index 000000000000..4f9495a262cd --- /dev/null +++ b/apps/external_vk_demo/vk_buffer_wrap_halide_defs.h @@ -0,0 +1,62 @@ +#pragma once + +#include + +// External Vulkan buffer management structures (duplicated from Halide +// internal) +enum class ExternalMemoryVisibility { + InvalidVisibility, //< invalid enum value + HostOnly, //< host local + DeviceOnly, //< device local + DeviceToHost, //< transfer from device to host + HostToDevice, //< transfer from host to device + DefaultVisibility, //< default visibility +}; + +enum class ExternalMemoryUsage { + InvalidUsage, //< invalid enum value + StaticStorage, //< intended for static storage + DynamicStorage, //< intended for dynamic storage + UniformStorage, //< intended for uniform storage + TransferSrc, //< intended for staging storage updates (source) + TransferDst, //< intended for staging storage updates (destination) + TransferSrcDst, //< intended for staging storage updates (source or + //destination) + DefaultUsage //< default usage +}; + +enum class ExternalMemoryCaching { + InvalidCaching, //< invalid enum value + Cached, //< cached + Uncached, //< uncached + CachedCoherent, //< cached and coherent + UncachedCoherent, //< uncached but still coherent + DefaultCaching //< default caching +}; + +struct ExternalMemoryProperties { + ExternalMemoryVisibility visibility = + ExternalMemoryVisibility::InvalidVisibility; + ExternalMemoryUsage usage = ExternalMemoryUsage::InvalidUsage; + ExternalMemoryCaching caching = ExternalMemoryCaching::InvalidCaching; + size_t alignment = 0; //< required alignment of allocations + size_t nearest_multiple = + 0; //< require the allocation size to round up to nearest multiple +}; + +struct ExternalMemoryRange { + size_t head_offset = 0; //< byte offset from start of region + size_t tail_offset = 0; //< byte offset from end of region +}; + +struct ExternalVulkanBuffer { + void* handle = nullptr; //< client data storing native handle (VkBuffer*) + size_t offset = 0; //< offset from base address in block (in bytes) + size_t size = 0; //< allocated size (in bytes) + ExternalMemoryRange range; //< optional range (e.g. for handling crops, etc) + bool dedicated = + false; //< flag indicating whether allocation is one dedicated resource + bool is_owner = + true; //< flag indicating whether allocation is owned by this region + ExternalMemoryProperties properties; //< properties for the allocated region +}; \ No newline at end of file diff --git a/apps/external_vk_demo/vulkan_app.cpp b/apps/external_vk_demo/vulkan_app.cpp new file mode 100644 index 000000000000..24c78310adfd --- /dev/null +++ b/apps/external_vk_demo/vulkan_app.cpp @@ -0,0 +1,661 @@ +#include "vulkan_app.h" + +#include "Halide.h" +#include "HalideRuntimeVulkan.h" +#include "convert_generator.h" // AOT generated header +#include "external_halide_override.h" + +#include +#include +#include + +// Global state for the demo +static AppVulkanContext g_app_context{}; +static AppVulkanBuffers g_app_buffers{}; + +bool initializeVulkanContext() { + // First, check if we can get the Vulkan loader version + std::cout << "Checking Vulkan availability...\n"; + + // Check available instance extensions + uint32_t extension_count = 0; + vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); + + std::vector available_extensions(extension_count); + vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, + available_extensions.data()); + + std::cout << "Available Vulkan extensions: " << extension_count << std::endl; + for (const auto& extension : available_extensions) { + std::cout << " " << extension.extensionName << std::endl; + } + + // Check instance version + uint32_t api_version = 0; + VkResult version_result = vkEnumerateInstanceVersion(&api_version); + if (version_result == VK_SUCCESS) { + std::cout << "Vulkan API version: " << VK_VERSION_MAJOR(api_version) << "." + << VK_VERSION_MINOR(api_version) << "." + << VK_VERSION_PATCH(api_version) << std::endl; + } + + VkApplicationInfo app_info{}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pApplicationName = "Halide External Context Demo"; + app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + app_info.pEngineName = "No Engine"; + app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0); + app_info.apiVersion = VK_API_VERSION_1_0; + + // Enable required extensions for macOS (MoltenVK) + std::vector required_extensions; + + // Check for portability enumeration extension (required on macOS) + bool has_portability = false; + for (const auto& extension : available_extensions) { + if (strcmp(extension.extensionName, + VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME) == 0) { + has_portability = true; + break; + } + } + + if (has_portability) { + required_extensions.push_back( + VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); + std::cout << "Enabling portability enumeration extension for macOS\n"; + } + + VkInstanceCreateInfo create_info{}; + create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + create_info.pApplicationInfo = &app_info; + create_info.enabledExtensionCount = + static_cast(required_extensions.size()); + create_info.ppEnabledExtensionNames = required_extensions.data(); + create_info.enabledLayerCount = 0; + create_info.ppEnabledLayerNames = nullptr; + + // Enable portability subset flag for macOS + if (has_portability) { + create_info.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; + } + + std::cout << "Creating Vulkan instance...\n"; + VkResult result = + vkCreateInstance(&create_info, nullptr, &g_app_context.instance); + if (result != VK_SUCCESS) { + std::cerr << "Failed to create Vulkan instance, error code: " << result; + switch (result) { + case VK_ERROR_OUT_OF_HOST_MEMORY: + std::cerr << " (VK_ERROR_OUT_OF_HOST_MEMORY)\n"; + break; + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + std::cerr << " (VK_ERROR_OUT_OF_DEVICE_MEMORY)\n"; + break; + case VK_ERROR_INITIALIZATION_FAILED: + std::cerr << " (VK_ERROR_INITIALIZATION_FAILED)\n"; + break; + case VK_ERROR_LAYER_NOT_PRESENT: + std::cerr << " (VK_ERROR_LAYER_NOT_PRESENT)\n"; + break; + case VK_ERROR_EXTENSION_NOT_PRESENT: + std::cerr << " (VK_ERROR_EXTENSION_NOT_PRESENT)\n"; + break; + case VK_ERROR_INCOMPATIBLE_DRIVER: + std::cerr << " (VK_ERROR_INCOMPATIBLE_DRIVER)\n"; + break; + default: + std::cerr << " (unknown error)\n"; + break; + } + return false; + } + + // Find physical device + uint32_t device_count = 0; + vkEnumeratePhysicalDevices(g_app_context.instance, &device_count, nullptr); + if (device_count == 0) { + std::cerr << "No Vulkan physical devices found\n"; + return false; + } + + std::vector devices(device_count); + vkEnumeratePhysicalDevices(g_app_context.instance, &device_count, + devices.data()); + g_app_context.physical_device = devices[0]; + + // Find queue family + uint32_t queue_family_count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(g_app_context.physical_device, + &queue_family_count, nullptr); + + std::vector queue_families(queue_family_count); + vkGetPhysicalDeviceQueueFamilyProperties(g_app_context.physical_device, + &queue_family_count, + queue_families.data()); + + for (uint32_t i = 0; i < queue_family_count; i++) { + if (queue_families[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { + g_app_context.queue_family_index = i; + break; + } + } + + // Create logical device + VkDeviceQueueCreateInfo queue_create_info{}; + queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_info.queueFamilyIndex = g_app_context.queue_family_index; + queue_create_info.queueCount = 1; + float queue_priority = 1.0f; + queue_create_info.pQueuePriorities = &queue_priority; + + VkDeviceCreateInfo device_info{}; + device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_info.queueCreateInfoCount = 1; + device_info.pQueueCreateInfos = &queue_create_info; + + if (vkCreateDevice(g_app_context.physical_device, &device_info, nullptr, + &g_app_context.device) != VK_SUCCESS) { + std::cerr << "Failed to create logical device\n"; + return false; + } + + vkGetDeviceQueue(g_app_context.device, g_app_context.queue_family_index, 0, + &g_app_context.queue); + + g_app_context.initialized = true; + std::cout << "Initialized application Vulkan context\n"; + return true; +} + +void cleanupVulkan() { + if (g_app_buffers.wrapped_input_buffer) { + std::cout << "Clearing wrapped input buffer reference...\n"; + if (halide_vulkan_detach_vk_buffer(nullptr, + g_app_buffers.wrapped_input_buffer)) { + std::cerr << "Failed to detach wrapped input buffer\n"; + } + } + if (g_app_buffers.wrapped_output_buffer) { + std::cout << "Clearing wrapped output buffer reference...\n"; + if (halide_vulkan_detach_vk_buffer(nullptr, + g_app_buffers.wrapped_output_buffer)) { + std::cerr << "Failed to detach wrapped output buffer\n"; + } + } + + if (g_app_context.input_mapped_memory) { + vkUnmapMemory(g_app_context.device, g_app_context.input_memory); + g_app_context.input_mapped_memory = nullptr; + } + if (g_app_context.output_mapped_memory) { + vkUnmapMemory(g_app_context.device, g_app_context.output_memory); + g_app_context.output_mapped_memory = nullptr; + } + + if (g_app_buffers.input_buffer) { + vkDestroyBuffer(g_app_context.device, g_app_buffers.input_buffer, nullptr); + g_app_buffers.input_buffer = VK_NULL_HANDLE; + } + if (g_app_buffers.output_buffer) { + vkDestroyBuffer(g_app_context.device, g_app_buffers.output_buffer, nullptr); + g_app_buffers.output_buffer = VK_NULL_HANDLE; + } + if (g_app_context.input_memory) { + vkFreeMemory(g_app_context.device, g_app_context.input_memory, nullptr); + g_app_context.input_memory = VK_NULL_HANDLE; + } + if (g_app_context.output_memory) { + vkFreeMemory(g_app_context.device, g_app_context.output_memory, nullptr); + g_app_context.output_memory = VK_NULL_HANDLE; + } + + // Unregister external context and release memory allocator + unregister_external_vulkan_context(); + + // Free heap-allocated regions + if (g_app_buffers.input_region) { + delete g_app_buffers.input_region; + g_app_buffers.input_region = nullptr; + } + if (g_app_buffers.output_region) { + delete g_app_buffers.output_region; + g_app_buffers.output_region = nullptr; + } + + if (g_app_context.device) { + vkDestroyDevice(g_app_context.device, nullptr); + g_app_context.device = VK_NULL_HANDLE; + } + if (g_app_context.instance) { + vkDestroyInstance(g_app_context.instance, nullptr); + g_app_context.instance = VK_NULL_HANDLE; + } + + g_app_context.initialized = false; + std::cout << "Cleaned up Vulkan resources\n"; +} + +// Access functions for external override registration +AppVulkanContext& getAppVulkanContext() { + return g_app_context; +} + +AppVulkanBuffers& getAppVulkanBuffers() { + return g_app_buffers; +} + +Halide::Runtime::Buffer loadTestImage() { + std::cout << "Creating synthetic test image for external context demo" + << std::endl; + + // Create a simple synthetic RGB image for testing with proper interleaved + // layout + const int width = 256, height = 256, channels = 3; + + // Allocate buffer with proper interleaved RGB layout [x, y, c] where + // stride(0) = 3 + Halide::Runtime::Buffer synthetic_img = + Halide::Runtime::Buffer::make_interleaved(width, height, + channels); + + // Fill with a simple pattern - checkerboard with gradients + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + // Create a checkerboard pattern with gradients + bool checker = ((x / 32) + (y / 32)) % 2; + if (checker) { + synthetic_img(x, y, 0) = (x + y) % 256; // Red gradient + synthetic_img(x, y, 1) = (x * 2) % 256; // Green gradient + synthetic_img(x, y, 2) = (y * 2) % 256; // Blue gradient + } else { + synthetic_img(x, y, 0) = 255 - (x % 256); // Inverted red + synthetic_img(x, y, 1) = 128; // Fixed green + synthetic_img(x, y, 2) = 255 - (y % 256); // Inverted blue + } + } + } + + std::cout << "Created synthetic RGB test image: " << width << "x" << height + << " pixels with interleaved layout" << std::endl; + return synthetic_img; +} + +bool allocateVkBuffersForImage( + const Halide::Runtime::Buffer& host_image) { + if (!g_app_context.initialized) { + std::cerr << "Vulkan context not initialized\n"; + return false; + } + + // Initial buffer sizes for VkBuffer creation (will be recalculated after + // alignment) + size_t initial_input_size = host_image.width() * host_image.height() * + host_image.channels(); // RGB: 3 channels + size_t initial_output_size = + host_image.width() * host_image.height() * 1; // Grayscale: 1 channel + + std::cout << "Allocating VkBuffers for image processing:\n"; + std::cout << " Input (RGB): " << host_image.width() << "x" + << host_image.height() << "x" << host_image.channels() + << " (initial: " << initial_input_size << " bytes)\n"; + std::cout << " Output (Grayscale): " << host_image.width() << "x" + << host_image.height() << "x1 (initial: " << initial_output_size + << " bytes)\n"; + + // Create input VkBuffer (RGB) + VkBufferCreateInfo input_buffer_info{}; + input_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + input_buffer_info.size = initial_input_size; + input_buffer_info.usage = + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + input_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateBuffer(g_app_context.device, &input_buffer_info, nullptr, + &g_app_buffers.input_buffer) != VK_SUCCESS) { + std::cerr << "Failed to create input VkBuffer\n"; + return false; + } + + // Create output VkBuffer (Grayscale) + VkBufferCreateInfo output_buffer_info{}; + output_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + output_buffer_info.size = initial_output_size; + output_buffer_info.usage = + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + output_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateBuffer(g_app_context.device, &output_buffer_info, nullptr, + &g_app_buffers.output_buffer) != VK_SUCCESS) { + std::cerr << "Failed to create output VkBuffer\n"; + return false; + } + + // Get memory requirements for both buffers + VkMemoryRequirements input_mem_req, output_mem_req; + vkGetBufferMemoryRequirements(g_app_context.device, + g_app_buffers.input_buffer, &input_mem_req); + vkGetBufferMemoryRequirements(g_app_context.device, + g_app_buffers.output_buffer, &output_mem_req); + + // Calculate stride based on Vulkan alignment requirements + // For RGB interleaved input: stride is total bytes per row with alignment + size_t input_row_bytes = host_image.width() * host_image.channels(); + g_app_buffers.input_stride = (input_row_bytes + input_mem_req.alignment - 1) & + ~(input_mem_req.alignment - 1); + + // For grayscale output: stride is total bytes per row with alignment + size_t output_row_bytes = host_image.width(); + g_app_buffers.output_stride = + (output_row_bytes + output_mem_req.alignment - 1) & + ~(output_mem_req.alignment - 1); + + // Recalculate actual buffer sizes based on aligned stride + size_t input_size = g_app_buffers.input_stride * host_image.height(); + size_t output_size = g_app_buffers.output_stride * host_image.height(); + + // Find suitable memory type (host-visible and coherent) + VkPhysicalDeviceMemoryProperties mem_properties; + vkGetPhysicalDeviceMemoryProperties(g_app_context.physical_device, + &mem_properties); + + // Find memory type for input buffer + uint32_t input_memory_type_index = UINT32_MAX; + for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { + if ((input_mem_req.memoryTypeBits & (1 << i)) && + (mem_properties.memoryTypes[i].propertyFlags & + (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))) { + input_memory_type_index = i; + break; + } + } + + // Find memory type for output buffer + uint32_t output_memory_type_index = UINT32_MAX; + for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { + if ((output_mem_req.memoryTypeBits & (1 << i)) && + (mem_properties.memoryTypes[i].propertyFlags & + (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))) { + output_memory_type_index = i; + break; + } + } + + if (input_memory_type_index == UINT32_MAX || + output_memory_type_index == UINT32_MAX) { + std::cerr << "Failed to find suitable memory types\n"; + return false; + } + + // Allocate separate memory for input buffer + VkMemoryAllocateInfo input_alloc_info{}; + input_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + input_alloc_info.allocationSize = input_mem_req.size; + input_alloc_info.memoryTypeIndex = input_memory_type_index; + + if (vkAllocateMemory(g_app_context.device, &input_alloc_info, nullptr, + &g_app_context.input_memory) != VK_SUCCESS) { + std::cerr << "Failed to allocate input VkBuffer memory\n"; + return false; + } + + // Allocate separate memory for output buffer + VkMemoryAllocateInfo output_alloc_info{}; + output_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + output_alloc_info.allocationSize = output_mem_req.size; + output_alloc_info.memoryTypeIndex = output_memory_type_index; + + if (vkAllocateMemory(g_app_context.device, &output_alloc_info, nullptr, + &g_app_context.output_memory) != VK_SUCCESS) { + std::cerr << "Failed to allocate output VkBuffer memory\n"; + return false; + } + + // Bind buffers to their separate memory (both at offset 0) + if (vkBindBufferMemory(g_app_context.device, g_app_buffers.input_buffer, + g_app_context.input_memory, 0) != VK_SUCCESS) { + std::cerr << "Failed to bind input VkBuffer memory\n"; + return false; + } + + if (vkBindBufferMemory(g_app_context.device, g_app_buffers.output_buffer, + g_app_context.output_memory, 0) != VK_SUCCESS) { + std::cerr << "Failed to bind output VkBuffer memory\n"; + return false; + } + + // Map memory for CPU access (separate mappings) + if (vkMapMemory(g_app_context.device, g_app_context.input_memory, 0, + VK_WHOLE_SIZE, 0, + &g_app_context.input_mapped_memory) != VK_SUCCESS) { + std::cerr << "Failed to map input VkBuffer memory\n"; + return false; + } + + if (vkMapMemory(g_app_context.device, g_app_context.output_memory, 0, + VK_WHOLE_SIZE, 0, + &g_app_context.output_mapped_memory) != VK_SUCCESS) { + std::cerr << "Failed to map output VkBuffer memory\n"; + return false; + } + + // Allocate ExternalVulkanBuffer regions on heap + g_app_buffers.input_region = new ExternalVulkanBuffer(); + g_app_buffers.input_region->handle = &g_app_buffers.input_buffer; + g_app_buffers.input_region->offset = 0; + g_app_buffers.input_region->size = input_size; + g_app_buffers.input_region->is_owner = true; + + g_app_buffers.output_region = new ExternalVulkanBuffer(); + g_app_buffers.output_region->handle = &g_app_buffers.output_buffer; + g_app_buffers.output_region->offset = 0; + g_app_buffers.output_region->size = output_size; + g_app_buffers.output_region->is_owner = true; + + std::cout << "Successfully allocated and bound VkBuffers:\n"; + std::cout << " Input buffer: separate memory, size " << input_size + << " bytes\n"; + std::cout << " Output buffer: separate memory, size " << output_size + << " bytes\n"; + std::cout << " Input stride: " << g_app_buffers.input_stride + << " bytes per row\n"; + std::cout << " Output stride: " << g_app_buffers.output_stride + << " bytes per row\n"; + + return true; +} + +// Helper function to set up proper Halide buffer dimensions with stride +void setupHalideBufferDimensions(halide_buffer_t* buf, int width, int height, + int channels, int stride_bytes) { + if (channels > 1) { + // RGB interleaved: [x, y, c] + buf->dimensions = 3; + buf->dim[0].min = 0; + buf->dim[0].extent = width; + buf->dim[0].stride = channels; // Skip channels to get to next x + + buf->dim[1].min = 0; + buf->dim[1].extent = height; + buf->dim[1].stride = + stride_bytes; // Total bytes per row (includes alignment) + + buf->dim[2].min = 0; + buf->dim[2].extent = channels; + buf->dim[2].stride = 1; // Adjacent channels + } else { + // Grayscale: [x, y] + buf->dimensions = 2; + buf->dim[0].min = 0; + buf->dim[0].extent = width; + buf->dim[0].stride = 1; + + buf->dim[1].min = 0; + buf->dim[1].extent = height; + buf->dim[1].stride = + stride_bytes; // Total bytes per row (includes alignment) + } +} + +Halide::Runtime::Buffer wrapVkBufferInput( + const Halide::Runtime::Buffer& host_image) { + if (!g_app_context.initialized || !g_app_buffers.input_buffer) { + std::cerr + << "VkBuffer not allocated - call allocateVkBuffersForImage first\n"; + return Halide::Runtime::Buffer(); + } + + std::cout << "Wrapping input VkBuffer with Halide buffer...\n"; + + // Create a Halide buffer with the same dimensions as the host image + // (GPU-only) + Halide::Runtime::Buffer vk_input_buffer( + nullptr, host_image.width(), host_image.height(), host_image.channels()); + + // Use the actual stride calculated from Vulkan alignment requirements + int stride = g_app_buffers.input_stride; + setupHalideBufferDimensions(vk_input_buffer.raw_buffer(), host_image.width(), + host_image.height(), host_image.channels(), + stride); + + // Register our external context with Halide + register_external_vulkan_context(g_app_context.instance, g_app_context.device, + g_app_context.physical_device, + g_app_context.queue, + g_app_context.queue_family_index); + + // Wrap the VkBuffer using the MemoryRegion pattern + uint64_t memory_region_ptr = + reinterpret_cast(g_app_buffers.input_region); + + // Ensure device interface is set before wrapping + vk_input_buffer.raw_buffer()->device_interface = + halide_vulkan_device_interface(); + g_app_buffers.wrapped_input_buffer = vk_input_buffer.raw_buffer(); + + int result = halide_vulkan_wrap_vk_buffer( + nullptr, g_app_buffers.wrapped_input_buffer, memory_region_ptr); + if (result != 0) { + std::cerr << "Failed to wrap input VkBuffer with Halide, error code: " + << result << "\n"; + return Halide::Runtime::Buffer(); + } + + // Verify buffer setup + size_t calculated_size = vk_input_buffer.size_in_bytes(); + std::cout << "Successfully wrapped input VkBuffer (" << host_image.width() + << "x" << host_image.height() << "x" << host_image.channels() + << "), calculated size: " << calculated_size << " bytes\n"; + + return vk_input_buffer; +} + +Halide::Runtime::Buffer wrapVkBufferOutput( + const Halide::Runtime::Buffer& host_image) { + if (!g_app_context.initialized || !g_app_buffers.output_buffer) { + std::cerr + << "VkBuffer not allocated - call allocateVkBuffersForImage first\n"; + return Halide::Runtime::Buffer(); + } + + std::cout << "Wrapping output VkBuffer with Halide buffer...\n"; + + // Create a Halide buffer for grayscale output (2D - width x height, no + // channels, GPU-only) + Halide::Runtime::Buffer vk_output_buffer( + nullptr, host_image.width(), host_image.height()); + + // Use the actual stride calculated from Vulkan alignment requirements + int stride = g_app_buffers.output_stride; + setupHalideBufferDimensions(vk_output_buffer.raw_buffer(), host_image.width(), + host_image.height(), 1, stride); + + // Wrap the VkBuffer using the MemoryRegion pattern + uint64_t memory_region_ptr = + reinterpret_cast(g_app_buffers.output_region); + + // Ensure device interface is set before wrapping + vk_output_buffer.raw_buffer()->device_interface = + halide_vulkan_device_interface(); + g_app_buffers.wrapped_output_buffer = vk_output_buffer.raw_buffer(); + + int result = halide_vulkan_wrap_vk_buffer( + nullptr, g_app_buffers.wrapped_output_buffer, memory_region_ptr); + if (result != 0) { + std::cerr << "Failed to wrap output VkBuffer with Halide, error code: " + << result << "\n"; + return Halide::Runtime::Buffer(); + } + + // Verify buffer setup + size_t calculated_size = vk_output_buffer.size_in_bytes(); + std::cout << "Successfully wrapped output VkBuffer (" << host_image.width() + << "x" << host_image.height() + << "), calculated size: " << calculated_size << " bytes\n"; + + return vk_output_buffer; +} + +bool copyHostDataToVkBuffer( + const Halide::Runtime::Buffer& host_image, + const Halide::Runtime::Buffer& vk_buffer) { + if (!g_app_context.initialized || !g_app_buffers.input_buffer) { + std::cerr + << "Vulkan context not initialized or input buffer not allocated\n"; + return false; + } + + std::cout + << "Copying host image data to VkBuffer using halide_buffer_copy...\n"; + std::cout << " Source (host): " << host_image.width() << "x" + << host_image.height() << "x" << host_image.channels() << "\n"; + std::cout << " Dest (VkBuffer): " << vk_buffer.width() << "x" + << vk_buffer.height() << "x" << vk_buffer.channels() << "\n"; + + // Use Halide's buffer copy function to handle the transfer + // Need to cast away const for halide_buffer_copy API + int result = halide_buffer_copy( + nullptr, const_cast(host_image.raw_buffer()), + halide_vulkan_device_interface(), + const_cast(vk_buffer.raw_buffer())); + + if (result != 0) { + std::cerr << "halide_buffer_copy failed with error code: " << result + << "\n"; + return false; + } + + std::cout << "Successfully copied host image data to VkBuffer!\n"; + return true; +} + +bool executeConversionWithWrappedBuffers( + const Halide::Runtime::Buffer& vk_input, + const Halide::Runtime::Buffer& vk_output) { + if (!g_app_context.initialized) { + std::cerr << "Vulkan context not initialized\n"; + return false; + } + + std::cout << "Executing RGB to grayscale conversion using AOT generated " + "function...\n"; + std::cout << " Input buffer: " << vk_input.width() << "x" + << vk_input.height() << "x" << vk_input.channels() << "\n"; + std::cout << " Output buffer: " << vk_output.width() << "x" + << vk_output.height() << "\n"; + + // Call the AOT generated convert_generator function + int result = + convert_generator(const_cast(vk_input.raw_buffer()), + const_cast(vk_output.raw_buffer())); + + if (result != 0) { + std::cerr << "convert_generator failed with error code: " << result << "\n"; + return false; + } + + std::cout << "Successfully executed RGB to grayscale conversion!\n"; + return true; +} \ No newline at end of file diff --git a/apps/external_vk_demo/vulkan_app.h b/apps/external_vk_demo/vulkan_app.h new file mode 100644 index 000000000000..e4d47368abb1 --- /dev/null +++ b/apps/external_vk_demo/vulkan_app.h @@ -0,0 +1,74 @@ +#pragma once + +#include "HalideBuffer.h" +#include "vk_buffer_wrap_halide_defs.h" + +#include + +#include + +// Application Vulkan context +struct AppVulkanContext { + bool initialized = false; + VkInstance instance = VK_NULL_HANDLE; + VkDevice device = VK_NULL_HANDLE; + VkPhysicalDevice physical_device = VK_NULL_HANDLE; + VkQueue queue = VK_NULL_HANDLE; + uint32_t queue_family_index = 0; + VkDeviceMemory input_memory = VK_NULL_HANDLE; + VkDeviceMemory output_memory = VK_NULL_HANDLE; + void* input_mapped_memory = nullptr; + void* output_mapped_memory = nullptr; +}; + +// Application buffer resources +struct AppVulkanBuffers { + VkBuffer input_buffer = VK_NULL_HANDLE; + VkBuffer output_buffer = VK_NULL_HANDLE; + ExternalVulkanBuffer* input_region = nullptr; // Heap allocated + ExternalVulkanBuffer* output_region = nullptr; // Heap allocated + + // Stride information calculated from Vulkan alignment requirements + int input_stride = 0; + int output_stride = 0; + + // Track wrapped Halide buffers for proper cleanup + halide_buffer_t* wrapped_input_buffer = nullptr; + halide_buffer_t* wrapped_output_buffer = nullptr; +}; + +// Vulkan application interface +bool initializeVulkanContext(); +bool createVulkanBuffers(size_t buffer_size); +void cleanupVulkan(); + +// Image loading functions +Halide::Runtime::Buffer loadTestImage(); + +// VkBuffer allocation functions +bool allocateVkBuffersForImage( + const Halide::Runtime::Buffer& host_image); + +// VkBuffer wrapping with Halide functions +Halide::Runtime::Buffer wrapVkBufferInput( + const Halide::Runtime::Buffer& host_image); +Halide::Runtime::Buffer wrapVkBufferOutput( + const Halide::Runtime::Buffer& host_image); + +// Data copying functions +bool copyHostDataToVkBuffer( + const Halide::Runtime::Buffer& host_image, + const Halide::Runtime::Buffer& vk_buffer); + +// Conversion functions +bool executeConversionWithWrappedBuffers( + const Halide::Runtime::Buffer& vk_input, + const Halide::Runtime::Buffer& vk_output); + +// Test functions +bool testJITWithExternalResources(); +bool testAOTWithExternalResources(); + +// Access to global context (for external override registration) +AppVulkanContext& getAppVulkanContext(); +AppVulkanBuffers& getAppVulkanBuffers(); \ No newline at end of file