diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index c83cff317555..a930a82805b6 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -6,9 +6,9 @@ name: Build PyPI package on: push: - branches: [ main, build/pip-packaging ] + branches: [ main, release/*.x ] release: - types: [ created ] + types: [ published ] env: # TODO: detect this from repo somehow: https://github.com/halide/Halide/issues/8406 @@ -253,7 +253,9 @@ jobs: - uses: pypa/gh-action-pypi-publish@release/v1 if: github.event_name == 'push' && github.ref_name == 'main' with: - repository_url: https://test.pypi.org/legacy/ + repository-url: https://test.pypi.org/legacy/ - uses: pypa/gh-action-pypi-publish@release/v1 - if: github.event_name == 'release' && github.event.action == 'published' + if: > + github.event_name == 'release' && github.event.action == 'published' || + github.event_name == 'push' && (github.ref_name == 'release/19.x' || github.ref_name == 'v19.0.0') diff --git a/apps/external_vk_demo/CMakeLists.txt b/apps/external_vk_demo/CMakeLists.txt new file mode 100644 index 000000000000..8704fd543653 --- /dev/null +++ b/apps/external_vk_demo/CMakeLists.txt @@ -0,0 +1,124 @@ +cmake_minimum_required(VERSION 3.16) +project(external_vk_demo) + +enable_testing() + +# Set up language settings +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS NO) + +# Define Halide installation path +# I've compiled it with the changes like the following: +# cmake -G Ninja -S . -B build \ +# -DCMAKE_BUILD_TYPE=Debug \ +# -DCMAKE_OSX_ARCHITECTURES=arm64 \ +# -DCMAKE_OSX_SYSROOT="$(xcrun --sdk macosx --show-sdk-path)" \ +# -DLLVM_DIR="$(brew --prefix llvm@19)/lib/cmake/llvm" \ +# -DHalide_WASM_BACKEND=wabt \ +# -DWITH_PYTHON_BINDINGS=OFF \ +# -DWITH_TUTORIALS=OFF \ +# -DBUILD_SHARED_LIBS=OFF \ +# -DHalide_BUNDLE_STATIC=ON +# Then build: +# cmake --build build -j32 +# Then install: +# cmake --install build --prefix "$PWD/install" +set(HALIDE_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../install) + +# Create our own Halide targets exactly like the Bazel BUILD file + +# Halide runtime headers (equivalent to ":runtime") +add_library(Halide_Runtime INTERFACE) +target_include_directories(Halide_Runtime INTERFACE ${HALIDE_INSTALL_DIR}/include) + +# Halide static library (equivalent to ":lib_halide_static") +add_library(Halide_Static STATIC IMPORTED) +# FIX paths they are hardcoded to my machine +set_target_properties(Halide_Static PROPERTIES + IMPORTED_LOCATION ${HALIDE_INSTALL_DIR}/lib/libHalide.a + INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INSTALL_DIR}/include + INTERFACE_LINK_LIBRARIES "-L/opt/homebrew/opt/llvm@19/lib;-lLLVM-19;-L/opt/homebrew/Cellar/wabt/1.0.37/lib;-lwabt;-lwasm-rt-impl;-L/opt/homebrew/Cellar/lld@19/19.1.7/lib;-llldWasm;-llldCommon;-framework Foundation;-framework CoreFoundation;-framework CoreGraphics;-framework CoreVideo;-framework IOKit;-framework AppKit;-lz;-ldl;-lpthread;-lcurses;-lffi" +) +target_link_libraries(Halide_Static INTERFACE Halide_Runtime) + +# Halide GenGen library (equivalent to ":gengen") +add_library(Halide_GenGen STATIC IMPORTED) +set_target_properties(Halide_GenGen PROPERTIES + IMPORTED_LOCATION ${HALIDE_INSTALL_DIR}/lib/libHalide_GenGen.a + INTERFACE_INCLUDE_DIRECTORIES "${HALIDE_INSTALL_DIR}/include;${HALIDE_INSTALL_DIR}/share/Halide/tools" + # Force load the GenGen library (equivalent to alwayslink = True) + INTERFACE_LINK_LIBRARIES "-Wl,-force_load,${HALIDE_INSTALL_DIR}/lib/libHalide_GenGen.a" +) +target_link_libraries(Halide_GenGen INTERFACE Halide_Static) + +# Find Vulkan +find_package(Vulkan REQUIRED) + +# Generator executable - create it manually like Bazel does +add_executable(convert_generator convert_generator.cc) +target_link_libraries(convert_generator + PRIVATE + Halide_GenGen + Vulkan::Vulkan) + +# Manual AOT compilation using custom command (following Makefile pattern) +# Note: Requires Vulkan library to be in your path. Set environment before building: +# On macOS: export DYLD_LIBRARY_PATH=/path/to/vulkan/lib:$DYLD_LIBRARY_PATH +# On Linux: export LD_LIBRARY_PATH=/path/to/vulkan/lib:$LD_LIBRARY_PATH +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.a ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.h + COMMAND $ -g convert_generator -o ${CMAKE_CURRENT_BINARY_DIR} target=host-vulkan-vk_int8 + DEPENDS convert_generator + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating Vulkan AOT library (ensure Vulkan is in library path)" +) + +# Create custom target and library from generated files +add_custom_target(convert_aot_files DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.a ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.h) +add_library(convert_aot STATIC IMPORTED) +set_target_properties(convert_aot PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/convert_generator.a) +add_dependencies(convert_aot convert_aot_files) +target_link_libraries(convert_aot INTERFACE Vulkan::Vulkan Halide_Static) + +# External Halide override library +add_library(external_halide_override + external_halide_override.cpp + external_halide_override.h) +target_link_libraries(external_halide_override + PRIVATE + convert_aot + Halide_Static + Halide_GenGen + Vulkan::Vulkan) + +# Vulkan app library +add_library(vulkan_app + vulkan_app.cpp + vulkan_app.h) +target_include_directories(vulkan_app PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${HALIDE_INSTALL_DIR}/share/tools) +add_dependencies(vulkan_app convert_aot_files) # Ensure generated header exists +target_link_libraries(vulkan_app + PRIVATE + Halide_Static + Vulkan::Vulkan + external_halide_override) + +# Simple test executable to verify image loading +add_executable(demo_main demo_main.cpp) +target_include_directories(demo_main PRIVATE ${HALIDE_INSTALL_DIR}/share/tools) +target_link_libraries(demo_main + PRIVATE + vulkan_app + external_halide_override + convert_aot + Halide_Static + Halide_GenGen + Vulkan::Vulkan) + +# Did not try the following, only used MacOS, on Windows we will prob fail as we override WEAK +if (NOT WIN32) + target_link_libraries(external_halide_override PRIVATE dl pthread) + target_link_libraries(vulkan_app PRIVATE dl pthread) + target_link_libraries(demo_main PRIVATE dl pthread) +endif () \ No newline at end of file diff --git a/apps/external_vk_demo/convert_generator.cc b/apps/external_vk_demo/convert_generator.cc new file mode 100644 index 000000000000..00831debd859 --- /dev/null +++ b/apps/external_vk_demo/convert_generator.cc @@ -0,0 +1,36 @@ +#include "Halide.h" + +using namespace Halide; + +// RGB to Grayscale conversion generator for AOT compilation +class ConvertGenerator : public Halide::Generator { +public: + Input> input{"input"}; + Output> output{"output"}; + + void generate() { + Var x("x"), y("y"); + input.dim(0).set_stride(3); + input.dim(2).set_stride(1); + input.dim(2).set_bounds(0, 3); + + // RGB to Grayscale conversion using standard luminance formula + output(x, y) = cast( + 0.299f * cast(input(x, y, 0)) + // Red + 0.587f * cast(input(x, y, 1)) + // Green + 0.114f * cast(input(x, y, 2)) // Blue + ); + // Schedule for target + // dumb scheduling + if (get_target().has_feature(Target::Vulkan)) { + // GPU scheduling for Vulkan + Var xi("xi"), yi("yi"); + output.gpu_tile(x, y, xi, yi, 16, 16); + } else { + // CPU scheduling + output.vectorize(x, 8); + } + } +}; + +HALIDE_REGISTER_GENERATOR(ConvertGenerator, convert_generator) \ No newline at end of file diff --git a/apps/external_vk_demo/demo_main.cpp b/apps/external_vk_demo/demo_main.cpp new file mode 100644 index 000000000000..157c873afb30 --- /dev/null +++ b/apps/external_vk_demo/demo_main.cpp @@ -0,0 +1,83 @@ +#include "vulkan_app.h" + +#include + +int main() { + std::cout << "External Vulkan Demo\n"; + + // Test synthetic image generation first + auto img = loadTestImage(); + + std::cout << "Image loaded successfully!\n"; + std::cout << "Dimensions: " << img.width() << "x" << img.height() << "x" + << img.channels() << std::endl; + + // Check some pixel values to verify the image is properly loaded + std::cout << "Sample pixel values at (10,10): "; + std::cout << "R=" << static_cast(img(10, 10, 0)) << " "; + std::cout << "G=" << static_cast(img(10, 10, 1)) << " "; + std::cout << "B=" << static_cast(img(10, 10, 2)) << std::endl; + + // Try to initialize Vulkan context + std::cout << "\nTesting Vulkan context initialization...\n"; + if (!initializeVulkanContext()) { + std::cout << "Vulkan not available - skipping VkBuffer allocation test\n"; + std::cout << "Image loading test passed!\n"; + return 0; + } + + // Test VkBuffer allocation + std::cout << "Testing VkBuffer allocation...\n"; + if (!allocateVkBuffersForImage(img)) { + std::cerr << "Failed to allocate VkBuffers\n"; + cleanupVulkan(); + return -1; + } + + // Test VkBuffer wrapping with Halide + std::cout << "\nTesting VkBuffer wrapping with Halide...\n"; + auto vk_input = wrapVkBufferInput(img); + if (!vk_input.raw_buffer() || !vk_input.raw_buffer()->device_interface) { + std::cerr << "Failed to wrap input VkBuffer\n"; + cleanupVulkan(); + return -1; + } + + auto vk_output = wrapVkBufferOutput(img); + if (!vk_output.raw_buffer() || !vk_output.raw_buffer()->device_interface) { + std::cerr << "Failed to wrap output VkBuffer\n"; + cleanupVulkan(); + return -1; + } + + std::cout << "Successfully created wrapped Halide buffers:\n"; + std::cout << " Input: " << vk_input.width() << "x" << vk_input.height() + << "x" << vk_input.channels() << "\n"; + std::cout << " Output: " << vk_output.width() << "x" << vk_output.height() + << "\n"; + + // Copy host image data to wrapped VkBuffer input + std::cout << "\nCopying host image data to VkBuffer...\n"; + if (!copyHostDataToVkBuffer(img, vk_input)) { + std::cerr << "Failed to copy host data to VkBuffer\n"; + cleanupVulkan(); + return -1; + } + std::cout << "Successfully copied host image data to VkBuffer!\n"; + + // Execute RGB to grayscale conversion using wrapped buffers + std::cout << "\nExecuting RGB to grayscale conversion...\n"; + if (!executeConversionWithWrappedBuffers(vk_input, vk_output)) { + std::cerr << "Failed to execute conversion with wrapped buffers\n"; + cleanupVulkan(); + return -1; + } + std::cout << "Successfully executed RGB to grayscale conversion!\n"; + + std::cout << "\nAll tests passed!\n"; + + // Cleanup + cleanupVulkan(); + std::cout << "Cleaned up Vulkan resources\n"; + return 0; +} \ No newline at end of file diff --git a/apps/external_vk_demo/external_halide_override.cpp b/apps/external_vk_demo/external_halide_override.cpp new file mode 100644 index 000000000000..730afcd795d6 --- /dev/null +++ b/apps/external_vk_demo/external_halide_override.cpp @@ -0,0 +1,180 @@ +#include "external_halide_override.h" + +#include "HalideRuntimeVulkan.h" +#include "vk_buffer_wrap_halide_defs.h" + +#include +#include + +// Global state to track the external context and memory allocator +static struct { + std::atomic initialized{false}; + VkInstance instance = VK_NULL_HANDLE; + VkDevice device = VK_NULL_HANDLE; + VkPhysicalDevice physical_device = VK_NULL_HANDLE; + VkQueue queue = VK_NULL_HANDLE; + uint32_t queue_family_index = 0; + + // Memory allocator management + struct halide_vulkan_memory_allocator* allocator = nullptr; + std::atomic allocator_saved{false}; + + // Global spin lock for thread safety (simple atomic flag) + std::atomic_flag allocator_lock = ATOMIC_FLAG_INIT; +} g_external_context; + +extern "C" { + +int halide_vulkan_acquire_context( + void* user_context, struct halide_vulkan_memory_allocator** allocator, + VkInstance* instance, VkDevice* device, VkPhysicalDevice* physical_device, + VkQueue* queue, uint32_t* queue_family_index, + VkDebugUtilsMessengerEXT* messenger, bool create) { + + std::cout << "halide_vulkan_acquire_context called (create=" << create + << ")\n"; + + if (!g_external_context.initialized.load()) { + return 0; + } + + // Acquire global spin lock + while (g_external_context.allocator_lock.test_and_set( + std::memory_order_acquire)) { + // Spin wait + } + + // Provide application's Vulkan context to Halide + *instance = g_external_context.instance; + *device = g_external_context.device; + *physical_device = g_external_context.physical_device; + *queue = g_external_context.queue; + *queue_family_index = g_external_context.queue_family_index; + *messenger = VK_NULL_HANDLE; + + // Use saved allocator if we have one, otherwise let Halide create one + if (g_external_context.allocator_saved.load() && + g_external_context.allocator != nullptr) { + *allocator = g_external_context.allocator; + std::cout << "Using saved Halide memory allocator\n"; + } else { + *allocator = nullptr; + std::cout << "Letting Halide create new memory allocator\n"; + } + + // Release global spin lock + g_external_context.allocator_lock.clear(std::memory_order_release); + + std::cout << "Provided external Vulkan context to Halide\n"; + return 0; +} + +int halide_vulkan_release_context(void* user_context, VkInstance instance, + VkDevice device, VkQueue queue, + VkDebugUtilsMessengerEXT messenger) { + + std::cout << "halide_vulkan_release_context called\n"; + // Application retains ownership of context - nothing to release + return 0; +} + +int halide_vulkan_export_memory_allocator( + void* user_context, struct halide_vulkan_memory_allocator* allocator) { + std::cout << "halide_vulkan_export_memory_allocator called\n"; + + if (allocator == nullptr) { + std::cerr << "Error: Received null allocator in export_memory_allocator\n"; + return -1; + } + + // Acquire global spin lock + while (g_external_context.allocator_lock.test_and_set( + std::memory_order_acquire)) { + // Spin wait + } + + // Save the allocator for future acquire_context calls + g_external_context.allocator = allocator; + g_external_context.allocator_saved.store(true); + + // Release global spin lock + g_external_context.allocator_lock.clear(std::memory_order_release); + + std::cout << "Successfully saved Halide memory allocator for reuse\n"; + return 0; +} + +void register_external_vulkan_context(VkInstance instance, VkDevice device, + VkPhysicalDevice physical_device, + VkQueue queue, + uint32_t queue_family_index) { + + g_external_context.instance = instance; + g_external_context.device = device; + g_external_context.physical_device = physical_device; + g_external_context.queue = queue; + g_external_context.queue_family_index = queue_family_index; + g_external_context.initialized = true; + + std::cout << "Registered external Vulkan context with Halide\n"; +} + +void unregister_external_vulkan_context() { + std::cout << "Unregistering external Vulkan context from Halide...\n"; + + // Acquire global spin lock + while (g_external_context.allocator_lock.test_and_set( + std::memory_order_acquire)) { + // Spin wait + } + + // Release the memory allocator if we have one + if (g_external_context.allocator_saved.load() && + g_external_context.allocator != nullptr) { + std::cout << "Releasing Halide memory allocator...\n"; + int result = halide_vulkan_memory_allocator_release( + nullptr, g_external_context.allocator, g_external_context.instance, + VK_NULL_HANDLE); + if (result != 0) { + std::cerr << "Warning: Failed to release memory allocator, error code: " + << result << "\n"; + } else { + std::cout << "Successfully released Halide memory allocator\n"; + } + + g_external_context.allocator = nullptr; + g_external_context.allocator_saved.store(false); + } + + // Clear context + g_external_context.initialized.store(false); + g_external_context.instance = VK_NULL_HANDLE; + g_external_context.device = VK_NULL_HANDLE; + g_external_context.physical_device = VK_NULL_HANDLE; + g_external_context.queue = VK_NULL_HANDLE; + g_external_context.queue_family_index = 0; + + // Release global spin lock + g_external_context.allocator_lock.clear(std::memory_order_release); + + std::cout << "Unregistered external Vulkan context from Halide\n"; +} + +int halide_vulkan_detach_vk_buffer(void* user_context, halide_buffer_t* buf) { + if (buf->device == 0) { + return halide_error_code_success; + } + if (buf->device_interface != halide_vulkan_device_interface()) { + printf( + "Error: detach called on buffer with incompatible device interface: %p " + "vs %p\n", + buf->device_interface, halide_vulkan_device_interface()); + return halide_error_code_incompatible_device_interface; + } + auto* region = reinterpret_cast(buf->device); + region->is_owner = false; + region->handle = nullptr; + return halide_error_code_success; +} + +} // extern "C" diff --git a/apps/external_vk_demo/external_halide_override.h b/apps/external_vk_demo/external_halide_override.h new file mode 100644 index 000000000000..f0133ec77074 --- /dev/null +++ b/apps/external_vk_demo/external_halide_override.h @@ -0,0 +1,48 @@ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// External Vulkan context override functions for Halide +// These functions allow applications to provide their own Vulkan context +// instead of letting Halide create its own + +int halide_vulkan_acquire_context( + void* user_context, struct halide_vulkan_memory_allocator** allocator, + VkInstance* instance, VkDevice* device, VkPhysicalDevice* physical_device, + VkQueue* queue, uint32_t* queue_family_index, + VkDebugUtilsMessengerEXT* messenger, bool create); + +int halide_vulkan_release_context(void* user_context, VkInstance instance, + VkDevice device, VkQueue queue, + VkDebugUtilsMessengerEXT messenger); + +int halide_vulkan_export_memory_allocator( + void* user_context, struct halide_vulkan_memory_allocator* allocator); + +int halide_vulkan_memory_allocator_release( + void* user_context, struct halide_vulkan_memory_allocator* allocator, + VkInstance instance, VkDebugUtilsMessengerEXT messenger); + +// Buffer wrapping functions +int halide_vulkan_wrap_vk_buffer(void* user_context, + struct halide_buffer_t* buf, + uint64_t vk_buffer); + +int halide_vulkan_detach_vk_buffer(void* user_context, + struct halide_buffer_t* buf); + +// Application interface to register Vulkan context +void register_external_vulkan_context(VkInstance instance, VkDevice device, + VkPhysicalDevice physical_device, + VkQueue queue, + uint32_t queue_family_index); + +void unregister_external_vulkan_context(); + +#ifdef __cplusplus +} +#endif diff --git a/apps/external_vk_demo/vk_buffer_wrap_halide_defs.h b/apps/external_vk_demo/vk_buffer_wrap_halide_defs.h new file mode 100644 index 000000000000..4f9495a262cd --- /dev/null +++ b/apps/external_vk_demo/vk_buffer_wrap_halide_defs.h @@ -0,0 +1,62 @@ +#pragma once + +#include + +// External Vulkan buffer management structures (duplicated from Halide +// internal) +enum class ExternalMemoryVisibility { + InvalidVisibility, //< invalid enum value + HostOnly, //< host local + DeviceOnly, //< device local + DeviceToHost, //< transfer from device to host + HostToDevice, //< transfer from host to device + DefaultVisibility, //< default visibility +}; + +enum class ExternalMemoryUsage { + InvalidUsage, //< invalid enum value + StaticStorage, //< intended for static storage + DynamicStorage, //< intended for dynamic storage + UniformStorage, //< intended for uniform storage + TransferSrc, //< intended for staging storage updates (source) + TransferDst, //< intended for staging storage updates (destination) + TransferSrcDst, //< intended for staging storage updates (source or + //destination) + DefaultUsage //< default usage +}; + +enum class ExternalMemoryCaching { + InvalidCaching, //< invalid enum value + Cached, //< cached + Uncached, //< uncached + CachedCoherent, //< cached and coherent + UncachedCoherent, //< uncached but still coherent + DefaultCaching //< default caching +}; + +struct ExternalMemoryProperties { + ExternalMemoryVisibility visibility = + ExternalMemoryVisibility::InvalidVisibility; + ExternalMemoryUsage usage = ExternalMemoryUsage::InvalidUsage; + ExternalMemoryCaching caching = ExternalMemoryCaching::InvalidCaching; + size_t alignment = 0; //< required alignment of allocations + size_t nearest_multiple = + 0; //< require the allocation size to round up to nearest multiple +}; + +struct ExternalMemoryRange { + size_t head_offset = 0; //< byte offset from start of region + size_t tail_offset = 0; //< byte offset from end of region +}; + +struct ExternalVulkanBuffer { + void* handle = nullptr; //< client data storing native handle (VkBuffer*) + size_t offset = 0; //< offset from base address in block (in bytes) + size_t size = 0; //< allocated size (in bytes) + ExternalMemoryRange range; //< optional range (e.g. for handling crops, etc) + bool dedicated = + false; //< flag indicating whether allocation is one dedicated resource + bool is_owner = + true; //< flag indicating whether allocation is owned by this region + ExternalMemoryProperties properties; //< properties for the allocated region +}; \ No newline at end of file diff --git a/apps/external_vk_demo/vulkan_app.cpp b/apps/external_vk_demo/vulkan_app.cpp new file mode 100644 index 000000000000..24c78310adfd --- /dev/null +++ b/apps/external_vk_demo/vulkan_app.cpp @@ -0,0 +1,661 @@ +#include "vulkan_app.h" + +#include "Halide.h" +#include "HalideRuntimeVulkan.h" +#include "convert_generator.h" // AOT generated header +#include "external_halide_override.h" + +#include +#include +#include + +// Global state for the demo +static AppVulkanContext g_app_context{}; +static AppVulkanBuffers g_app_buffers{}; + +bool initializeVulkanContext() { + // First, check if we can get the Vulkan loader version + std::cout << "Checking Vulkan availability...\n"; + + // Check available instance extensions + uint32_t extension_count = 0; + vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); + + std::vector available_extensions(extension_count); + vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, + available_extensions.data()); + + std::cout << "Available Vulkan extensions: " << extension_count << std::endl; + for (const auto& extension : available_extensions) { + std::cout << " " << extension.extensionName << std::endl; + } + + // Check instance version + uint32_t api_version = 0; + VkResult version_result = vkEnumerateInstanceVersion(&api_version); + if (version_result == VK_SUCCESS) { + std::cout << "Vulkan API version: " << VK_VERSION_MAJOR(api_version) << "." + << VK_VERSION_MINOR(api_version) << "." + << VK_VERSION_PATCH(api_version) << std::endl; + } + + VkApplicationInfo app_info{}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pApplicationName = "Halide External Context Demo"; + app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + app_info.pEngineName = "No Engine"; + app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0); + app_info.apiVersion = VK_API_VERSION_1_0; + + // Enable required extensions for macOS (MoltenVK) + std::vector required_extensions; + + // Check for portability enumeration extension (required on macOS) + bool has_portability = false; + for (const auto& extension : available_extensions) { + if (strcmp(extension.extensionName, + VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME) == 0) { + has_portability = true; + break; + } + } + + if (has_portability) { + required_extensions.push_back( + VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); + std::cout << "Enabling portability enumeration extension for macOS\n"; + } + + VkInstanceCreateInfo create_info{}; + create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + create_info.pApplicationInfo = &app_info; + create_info.enabledExtensionCount = + static_cast(required_extensions.size()); + create_info.ppEnabledExtensionNames = required_extensions.data(); + create_info.enabledLayerCount = 0; + create_info.ppEnabledLayerNames = nullptr; + + // Enable portability subset flag for macOS + if (has_portability) { + create_info.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; + } + + std::cout << "Creating Vulkan instance...\n"; + VkResult result = + vkCreateInstance(&create_info, nullptr, &g_app_context.instance); + if (result != VK_SUCCESS) { + std::cerr << "Failed to create Vulkan instance, error code: " << result; + switch (result) { + case VK_ERROR_OUT_OF_HOST_MEMORY: + std::cerr << " (VK_ERROR_OUT_OF_HOST_MEMORY)\n"; + break; + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + std::cerr << " (VK_ERROR_OUT_OF_DEVICE_MEMORY)\n"; + break; + case VK_ERROR_INITIALIZATION_FAILED: + std::cerr << " (VK_ERROR_INITIALIZATION_FAILED)\n"; + break; + case VK_ERROR_LAYER_NOT_PRESENT: + std::cerr << " (VK_ERROR_LAYER_NOT_PRESENT)\n"; + break; + case VK_ERROR_EXTENSION_NOT_PRESENT: + std::cerr << " (VK_ERROR_EXTENSION_NOT_PRESENT)\n"; + break; + case VK_ERROR_INCOMPATIBLE_DRIVER: + std::cerr << " (VK_ERROR_INCOMPATIBLE_DRIVER)\n"; + break; + default: + std::cerr << " (unknown error)\n"; + break; + } + return false; + } + + // Find physical device + uint32_t device_count = 0; + vkEnumeratePhysicalDevices(g_app_context.instance, &device_count, nullptr); + if (device_count == 0) { + std::cerr << "No Vulkan physical devices found\n"; + return false; + } + + std::vector devices(device_count); + vkEnumeratePhysicalDevices(g_app_context.instance, &device_count, + devices.data()); + g_app_context.physical_device = devices[0]; + + // Find queue family + uint32_t queue_family_count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(g_app_context.physical_device, + &queue_family_count, nullptr); + + std::vector queue_families(queue_family_count); + vkGetPhysicalDeviceQueueFamilyProperties(g_app_context.physical_device, + &queue_family_count, + queue_families.data()); + + for (uint32_t i = 0; i < queue_family_count; i++) { + if (queue_families[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { + g_app_context.queue_family_index = i; + break; + } + } + + // Create logical device + VkDeviceQueueCreateInfo queue_create_info{}; + queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_info.queueFamilyIndex = g_app_context.queue_family_index; + queue_create_info.queueCount = 1; + float queue_priority = 1.0f; + queue_create_info.pQueuePriorities = &queue_priority; + + VkDeviceCreateInfo device_info{}; + device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_info.queueCreateInfoCount = 1; + device_info.pQueueCreateInfos = &queue_create_info; + + if (vkCreateDevice(g_app_context.physical_device, &device_info, nullptr, + &g_app_context.device) != VK_SUCCESS) { + std::cerr << "Failed to create logical device\n"; + return false; + } + + vkGetDeviceQueue(g_app_context.device, g_app_context.queue_family_index, 0, + &g_app_context.queue); + + g_app_context.initialized = true; + std::cout << "Initialized application Vulkan context\n"; + return true; +} + +void cleanupVulkan() { + if (g_app_buffers.wrapped_input_buffer) { + std::cout << "Clearing wrapped input buffer reference...\n"; + if (halide_vulkan_detach_vk_buffer(nullptr, + g_app_buffers.wrapped_input_buffer)) { + std::cerr << "Failed to detach wrapped input buffer\n"; + } + } + if (g_app_buffers.wrapped_output_buffer) { + std::cout << "Clearing wrapped output buffer reference...\n"; + if (halide_vulkan_detach_vk_buffer(nullptr, + g_app_buffers.wrapped_output_buffer)) { + std::cerr << "Failed to detach wrapped output buffer\n"; + } + } + + if (g_app_context.input_mapped_memory) { + vkUnmapMemory(g_app_context.device, g_app_context.input_memory); + g_app_context.input_mapped_memory = nullptr; + } + if (g_app_context.output_mapped_memory) { + vkUnmapMemory(g_app_context.device, g_app_context.output_memory); + g_app_context.output_mapped_memory = nullptr; + } + + if (g_app_buffers.input_buffer) { + vkDestroyBuffer(g_app_context.device, g_app_buffers.input_buffer, nullptr); + g_app_buffers.input_buffer = VK_NULL_HANDLE; + } + if (g_app_buffers.output_buffer) { + vkDestroyBuffer(g_app_context.device, g_app_buffers.output_buffer, nullptr); + g_app_buffers.output_buffer = VK_NULL_HANDLE; + } + if (g_app_context.input_memory) { + vkFreeMemory(g_app_context.device, g_app_context.input_memory, nullptr); + g_app_context.input_memory = VK_NULL_HANDLE; + } + if (g_app_context.output_memory) { + vkFreeMemory(g_app_context.device, g_app_context.output_memory, nullptr); + g_app_context.output_memory = VK_NULL_HANDLE; + } + + // Unregister external context and release memory allocator + unregister_external_vulkan_context(); + + // Free heap-allocated regions + if (g_app_buffers.input_region) { + delete g_app_buffers.input_region; + g_app_buffers.input_region = nullptr; + } + if (g_app_buffers.output_region) { + delete g_app_buffers.output_region; + g_app_buffers.output_region = nullptr; + } + + if (g_app_context.device) { + vkDestroyDevice(g_app_context.device, nullptr); + g_app_context.device = VK_NULL_HANDLE; + } + if (g_app_context.instance) { + vkDestroyInstance(g_app_context.instance, nullptr); + g_app_context.instance = VK_NULL_HANDLE; + } + + g_app_context.initialized = false; + std::cout << "Cleaned up Vulkan resources\n"; +} + +// Access functions for external override registration +AppVulkanContext& getAppVulkanContext() { + return g_app_context; +} + +AppVulkanBuffers& getAppVulkanBuffers() { + return g_app_buffers; +} + +Halide::Runtime::Buffer loadTestImage() { + std::cout << "Creating synthetic test image for external context demo" + << std::endl; + + // Create a simple synthetic RGB image for testing with proper interleaved + // layout + const int width = 256, height = 256, channels = 3; + + // Allocate buffer with proper interleaved RGB layout [x, y, c] where + // stride(0) = 3 + Halide::Runtime::Buffer synthetic_img = + Halide::Runtime::Buffer::make_interleaved(width, height, + channels); + + // Fill with a simple pattern - checkerboard with gradients + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + // Create a checkerboard pattern with gradients + bool checker = ((x / 32) + (y / 32)) % 2; + if (checker) { + synthetic_img(x, y, 0) = (x + y) % 256; // Red gradient + synthetic_img(x, y, 1) = (x * 2) % 256; // Green gradient + synthetic_img(x, y, 2) = (y * 2) % 256; // Blue gradient + } else { + synthetic_img(x, y, 0) = 255 - (x % 256); // Inverted red + synthetic_img(x, y, 1) = 128; // Fixed green + synthetic_img(x, y, 2) = 255 - (y % 256); // Inverted blue + } + } + } + + std::cout << "Created synthetic RGB test image: " << width << "x" << height + << " pixels with interleaved layout" << std::endl; + return synthetic_img; +} + +bool allocateVkBuffersForImage( + const Halide::Runtime::Buffer& host_image) { + if (!g_app_context.initialized) { + std::cerr << "Vulkan context not initialized\n"; + return false; + } + + // Initial buffer sizes for VkBuffer creation (will be recalculated after + // alignment) + size_t initial_input_size = host_image.width() * host_image.height() * + host_image.channels(); // RGB: 3 channels + size_t initial_output_size = + host_image.width() * host_image.height() * 1; // Grayscale: 1 channel + + std::cout << "Allocating VkBuffers for image processing:\n"; + std::cout << " Input (RGB): " << host_image.width() << "x" + << host_image.height() << "x" << host_image.channels() + << " (initial: " << initial_input_size << " bytes)\n"; + std::cout << " Output (Grayscale): " << host_image.width() << "x" + << host_image.height() << "x1 (initial: " << initial_output_size + << " bytes)\n"; + + // Create input VkBuffer (RGB) + VkBufferCreateInfo input_buffer_info{}; + input_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + input_buffer_info.size = initial_input_size; + input_buffer_info.usage = + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + input_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateBuffer(g_app_context.device, &input_buffer_info, nullptr, + &g_app_buffers.input_buffer) != VK_SUCCESS) { + std::cerr << "Failed to create input VkBuffer\n"; + return false; + } + + // Create output VkBuffer (Grayscale) + VkBufferCreateInfo output_buffer_info{}; + output_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + output_buffer_info.size = initial_output_size; + output_buffer_info.usage = + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + output_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateBuffer(g_app_context.device, &output_buffer_info, nullptr, + &g_app_buffers.output_buffer) != VK_SUCCESS) { + std::cerr << "Failed to create output VkBuffer\n"; + return false; + } + + // Get memory requirements for both buffers + VkMemoryRequirements input_mem_req, output_mem_req; + vkGetBufferMemoryRequirements(g_app_context.device, + g_app_buffers.input_buffer, &input_mem_req); + vkGetBufferMemoryRequirements(g_app_context.device, + g_app_buffers.output_buffer, &output_mem_req); + + // Calculate stride based on Vulkan alignment requirements + // For RGB interleaved input: stride is total bytes per row with alignment + size_t input_row_bytes = host_image.width() * host_image.channels(); + g_app_buffers.input_stride = (input_row_bytes + input_mem_req.alignment - 1) & + ~(input_mem_req.alignment - 1); + + // For grayscale output: stride is total bytes per row with alignment + size_t output_row_bytes = host_image.width(); + g_app_buffers.output_stride = + (output_row_bytes + output_mem_req.alignment - 1) & + ~(output_mem_req.alignment - 1); + + // Recalculate actual buffer sizes based on aligned stride + size_t input_size = g_app_buffers.input_stride * host_image.height(); + size_t output_size = g_app_buffers.output_stride * host_image.height(); + + // Find suitable memory type (host-visible and coherent) + VkPhysicalDeviceMemoryProperties mem_properties; + vkGetPhysicalDeviceMemoryProperties(g_app_context.physical_device, + &mem_properties); + + // Find memory type for input buffer + uint32_t input_memory_type_index = UINT32_MAX; + for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { + if ((input_mem_req.memoryTypeBits & (1 << i)) && + (mem_properties.memoryTypes[i].propertyFlags & + (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))) { + input_memory_type_index = i; + break; + } + } + + // Find memory type for output buffer + uint32_t output_memory_type_index = UINT32_MAX; + for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) { + if ((output_mem_req.memoryTypeBits & (1 << i)) && + (mem_properties.memoryTypes[i].propertyFlags & + (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))) { + output_memory_type_index = i; + break; + } + } + + if (input_memory_type_index == UINT32_MAX || + output_memory_type_index == UINT32_MAX) { + std::cerr << "Failed to find suitable memory types\n"; + return false; + } + + // Allocate separate memory for input buffer + VkMemoryAllocateInfo input_alloc_info{}; + input_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + input_alloc_info.allocationSize = input_mem_req.size; + input_alloc_info.memoryTypeIndex = input_memory_type_index; + + if (vkAllocateMemory(g_app_context.device, &input_alloc_info, nullptr, + &g_app_context.input_memory) != VK_SUCCESS) { + std::cerr << "Failed to allocate input VkBuffer memory\n"; + return false; + } + + // Allocate separate memory for output buffer + VkMemoryAllocateInfo output_alloc_info{}; + output_alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + output_alloc_info.allocationSize = output_mem_req.size; + output_alloc_info.memoryTypeIndex = output_memory_type_index; + + if (vkAllocateMemory(g_app_context.device, &output_alloc_info, nullptr, + &g_app_context.output_memory) != VK_SUCCESS) { + std::cerr << "Failed to allocate output VkBuffer memory\n"; + return false; + } + + // Bind buffers to their separate memory (both at offset 0) + if (vkBindBufferMemory(g_app_context.device, g_app_buffers.input_buffer, + g_app_context.input_memory, 0) != VK_SUCCESS) { + std::cerr << "Failed to bind input VkBuffer memory\n"; + return false; + } + + if (vkBindBufferMemory(g_app_context.device, g_app_buffers.output_buffer, + g_app_context.output_memory, 0) != VK_SUCCESS) { + std::cerr << "Failed to bind output VkBuffer memory\n"; + return false; + } + + // Map memory for CPU access (separate mappings) + if (vkMapMemory(g_app_context.device, g_app_context.input_memory, 0, + VK_WHOLE_SIZE, 0, + &g_app_context.input_mapped_memory) != VK_SUCCESS) { + std::cerr << "Failed to map input VkBuffer memory\n"; + return false; + } + + if (vkMapMemory(g_app_context.device, g_app_context.output_memory, 0, + VK_WHOLE_SIZE, 0, + &g_app_context.output_mapped_memory) != VK_SUCCESS) { + std::cerr << "Failed to map output VkBuffer memory\n"; + return false; + } + + // Allocate ExternalVulkanBuffer regions on heap + g_app_buffers.input_region = new ExternalVulkanBuffer(); + g_app_buffers.input_region->handle = &g_app_buffers.input_buffer; + g_app_buffers.input_region->offset = 0; + g_app_buffers.input_region->size = input_size; + g_app_buffers.input_region->is_owner = true; + + g_app_buffers.output_region = new ExternalVulkanBuffer(); + g_app_buffers.output_region->handle = &g_app_buffers.output_buffer; + g_app_buffers.output_region->offset = 0; + g_app_buffers.output_region->size = output_size; + g_app_buffers.output_region->is_owner = true; + + std::cout << "Successfully allocated and bound VkBuffers:\n"; + std::cout << " Input buffer: separate memory, size " << input_size + << " bytes\n"; + std::cout << " Output buffer: separate memory, size " << output_size + << " bytes\n"; + std::cout << " Input stride: " << g_app_buffers.input_stride + << " bytes per row\n"; + std::cout << " Output stride: " << g_app_buffers.output_stride + << " bytes per row\n"; + + return true; +} + +// Helper function to set up proper Halide buffer dimensions with stride +void setupHalideBufferDimensions(halide_buffer_t* buf, int width, int height, + int channels, int stride_bytes) { + if (channels > 1) { + // RGB interleaved: [x, y, c] + buf->dimensions = 3; + buf->dim[0].min = 0; + buf->dim[0].extent = width; + buf->dim[0].stride = channels; // Skip channels to get to next x + + buf->dim[1].min = 0; + buf->dim[1].extent = height; + buf->dim[1].stride = + stride_bytes; // Total bytes per row (includes alignment) + + buf->dim[2].min = 0; + buf->dim[2].extent = channels; + buf->dim[2].stride = 1; // Adjacent channels + } else { + // Grayscale: [x, y] + buf->dimensions = 2; + buf->dim[0].min = 0; + buf->dim[0].extent = width; + buf->dim[0].stride = 1; + + buf->dim[1].min = 0; + buf->dim[1].extent = height; + buf->dim[1].stride = + stride_bytes; // Total bytes per row (includes alignment) + } +} + +Halide::Runtime::Buffer wrapVkBufferInput( + const Halide::Runtime::Buffer& host_image) { + if (!g_app_context.initialized || !g_app_buffers.input_buffer) { + std::cerr + << "VkBuffer not allocated - call allocateVkBuffersForImage first\n"; + return Halide::Runtime::Buffer(); + } + + std::cout << "Wrapping input VkBuffer with Halide buffer...\n"; + + // Create a Halide buffer with the same dimensions as the host image + // (GPU-only) + Halide::Runtime::Buffer vk_input_buffer( + nullptr, host_image.width(), host_image.height(), host_image.channels()); + + // Use the actual stride calculated from Vulkan alignment requirements + int stride = g_app_buffers.input_stride; + setupHalideBufferDimensions(vk_input_buffer.raw_buffer(), host_image.width(), + host_image.height(), host_image.channels(), + stride); + + // Register our external context with Halide + register_external_vulkan_context(g_app_context.instance, g_app_context.device, + g_app_context.physical_device, + g_app_context.queue, + g_app_context.queue_family_index); + + // Wrap the VkBuffer using the MemoryRegion pattern + uint64_t memory_region_ptr = + reinterpret_cast(g_app_buffers.input_region); + + // Ensure device interface is set before wrapping + vk_input_buffer.raw_buffer()->device_interface = + halide_vulkan_device_interface(); + g_app_buffers.wrapped_input_buffer = vk_input_buffer.raw_buffer(); + + int result = halide_vulkan_wrap_vk_buffer( + nullptr, g_app_buffers.wrapped_input_buffer, memory_region_ptr); + if (result != 0) { + std::cerr << "Failed to wrap input VkBuffer with Halide, error code: " + << result << "\n"; + return Halide::Runtime::Buffer(); + } + + // Verify buffer setup + size_t calculated_size = vk_input_buffer.size_in_bytes(); + std::cout << "Successfully wrapped input VkBuffer (" << host_image.width() + << "x" << host_image.height() << "x" << host_image.channels() + << "), calculated size: " << calculated_size << " bytes\n"; + + return vk_input_buffer; +} + +Halide::Runtime::Buffer wrapVkBufferOutput( + const Halide::Runtime::Buffer& host_image) { + if (!g_app_context.initialized || !g_app_buffers.output_buffer) { + std::cerr + << "VkBuffer not allocated - call allocateVkBuffersForImage first\n"; + return Halide::Runtime::Buffer(); + } + + std::cout << "Wrapping output VkBuffer with Halide buffer...\n"; + + // Create a Halide buffer for grayscale output (2D - width x height, no + // channels, GPU-only) + Halide::Runtime::Buffer vk_output_buffer( + nullptr, host_image.width(), host_image.height()); + + // Use the actual stride calculated from Vulkan alignment requirements + int stride = g_app_buffers.output_stride; + setupHalideBufferDimensions(vk_output_buffer.raw_buffer(), host_image.width(), + host_image.height(), 1, stride); + + // Wrap the VkBuffer using the MemoryRegion pattern + uint64_t memory_region_ptr = + reinterpret_cast(g_app_buffers.output_region); + + // Ensure device interface is set before wrapping + vk_output_buffer.raw_buffer()->device_interface = + halide_vulkan_device_interface(); + g_app_buffers.wrapped_output_buffer = vk_output_buffer.raw_buffer(); + + int result = halide_vulkan_wrap_vk_buffer( + nullptr, g_app_buffers.wrapped_output_buffer, memory_region_ptr); + if (result != 0) { + std::cerr << "Failed to wrap output VkBuffer with Halide, error code: " + << result << "\n"; + return Halide::Runtime::Buffer(); + } + + // Verify buffer setup + size_t calculated_size = vk_output_buffer.size_in_bytes(); + std::cout << "Successfully wrapped output VkBuffer (" << host_image.width() + << "x" << host_image.height() + << "), calculated size: " << calculated_size << " bytes\n"; + + return vk_output_buffer; +} + +bool copyHostDataToVkBuffer( + const Halide::Runtime::Buffer& host_image, + const Halide::Runtime::Buffer& vk_buffer) { + if (!g_app_context.initialized || !g_app_buffers.input_buffer) { + std::cerr + << "Vulkan context not initialized or input buffer not allocated\n"; + return false; + } + + std::cout + << "Copying host image data to VkBuffer using halide_buffer_copy...\n"; + std::cout << " Source (host): " << host_image.width() << "x" + << host_image.height() << "x" << host_image.channels() << "\n"; + std::cout << " Dest (VkBuffer): " << vk_buffer.width() << "x" + << vk_buffer.height() << "x" << vk_buffer.channels() << "\n"; + + // Use Halide's buffer copy function to handle the transfer + // Need to cast away const for halide_buffer_copy API + int result = halide_buffer_copy( + nullptr, const_cast(host_image.raw_buffer()), + halide_vulkan_device_interface(), + const_cast(vk_buffer.raw_buffer())); + + if (result != 0) { + std::cerr << "halide_buffer_copy failed with error code: " << result + << "\n"; + return false; + } + + std::cout << "Successfully copied host image data to VkBuffer!\n"; + return true; +} + +bool executeConversionWithWrappedBuffers( + const Halide::Runtime::Buffer& vk_input, + const Halide::Runtime::Buffer& vk_output) { + if (!g_app_context.initialized) { + std::cerr << "Vulkan context not initialized\n"; + return false; + } + + std::cout << "Executing RGB to grayscale conversion using AOT generated " + "function...\n"; + std::cout << " Input buffer: " << vk_input.width() << "x" + << vk_input.height() << "x" << vk_input.channels() << "\n"; + std::cout << " Output buffer: " << vk_output.width() << "x" + << vk_output.height() << "\n"; + + // Call the AOT generated convert_generator function + int result = + convert_generator(const_cast(vk_input.raw_buffer()), + const_cast(vk_output.raw_buffer())); + + if (result != 0) { + std::cerr << "convert_generator failed with error code: " << result << "\n"; + return false; + } + + std::cout << "Successfully executed RGB to grayscale conversion!\n"; + return true; +} \ No newline at end of file diff --git a/apps/external_vk_demo/vulkan_app.h b/apps/external_vk_demo/vulkan_app.h new file mode 100644 index 000000000000..e4d47368abb1 --- /dev/null +++ b/apps/external_vk_demo/vulkan_app.h @@ -0,0 +1,74 @@ +#pragma once + +#include "HalideBuffer.h" +#include "vk_buffer_wrap_halide_defs.h" + +#include + +#include + +// Application Vulkan context +struct AppVulkanContext { + bool initialized = false; + VkInstance instance = VK_NULL_HANDLE; + VkDevice device = VK_NULL_HANDLE; + VkPhysicalDevice physical_device = VK_NULL_HANDLE; + VkQueue queue = VK_NULL_HANDLE; + uint32_t queue_family_index = 0; + VkDeviceMemory input_memory = VK_NULL_HANDLE; + VkDeviceMemory output_memory = VK_NULL_HANDLE; + void* input_mapped_memory = nullptr; + void* output_mapped_memory = nullptr; +}; + +// Application buffer resources +struct AppVulkanBuffers { + VkBuffer input_buffer = VK_NULL_HANDLE; + VkBuffer output_buffer = VK_NULL_HANDLE; + ExternalVulkanBuffer* input_region = nullptr; // Heap allocated + ExternalVulkanBuffer* output_region = nullptr; // Heap allocated + + // Stride information calculated from Vulkan alignment requirements + int input_stride = 0; + int output_stride = 0; + + // Track wrapped Halide buffers for proper cleanup + halide_buffer_t* wrapped_input_buffer = nullptr; + halide_buffer_t* wrapped_output_buffer = nullptr; +}; + +// Vulkan application interface +bool initializeVulkanContext(); +bool createVulkanBuffers(size_t buffer_size); +void cleanupVulkan(); + +// Image loading functions +Halide::Runtime::Buffer loadTestImage(); + +// VkBuffer allocation functions +bool allocateVkBuffersForImage( + const Halide::Runtime::Buffer& host_image); + +// VkBuffer wrapping with Halide functions +Halide::Runtime::Buffer wrapVkBufferInput( + const Halide::Runtime::Buffer& host_image); +Halide::Runtime::Buffer wrapVkBufferOutput( + const Halide::Runtime::Buffer& host_image); + +// Data copying functions +bool copyHostDataToVkBuffer( + const Halide::Runtime::Buffer& host_image, + const Halide::Runtime::Buffer& vk_buffer); + +// Conversion functions +bool executeConversionWithWrappedBuffers( + const Halide::Runtime::Buffer& vk_input, + const Halide::Runtime::Buffer& vk_output); + +// Test functions +bool testJITWithExternalResources(); +bool testAOTWithExternalResources(); + +// Access to global context (for external override registration) +AppVulkanContext& getAppVulkanContext(); +AppVulkanBuffers& getAppVulkanBuffers(); \ No newline at end of file diff --git a/src/runtime/HalideRuntimeVulkan.h b/src/runtime/HalideRuntimeVulkan.h index e150b7c6d00b..d49f559fc531 100644 --- a/src/runtime/HalideRuntimeVulkan.h +++ b/src/runtime/HalideRuntimeVulkan.h @@ -105,6 +105,22 @@ extern int halide_vulkan_release_context(void *user_context, VkDevice device, VkQueue queue, VkDebugUtilsMessengerEXT messenger); + +// - halide_vulkan_export_memory_allocator +// exports the internally allocated memory allocator in case the user wants to just set +// up their own context but use Halide's memory allocator. Must have overridden halide_vulkan_acquire_context +// and halide_vulkan_release_context. Must override also halide_vulkan_export_memory_allocator. Use same global spin +// lock to protect access to the allocator. This allows to save the allocator for future halide_vulkan_acquire_context calls +// halide will automatically issue to retrieve custom context. +extern int halide_vulkan_export_memory_allocator(void *user_context, + struct halide_vulkan_memory_allocator *allocator); +// - halide_vulkan_memory_allocator_release +// releases the internally allocated memory allocator, important for proper memory cleanup. Must have overridden halide_vulkan_acquire_context +// and halide_vulkan_release_context. Must also use the same global spin lock to protect access to the allocator. +extern int halide_vulkan_memory_allocator_release(void *user_context, + struct halide_vulkan_memory_allocator *allocator, + VkInstance instance, + VkDebugUtilsMessengerEXT messenger); // -- // Override the default allocation callbacks (default uses Vulkan runtime implementation) diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 776088448891..2da946c86673 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -86,6 +86,18 @@ WEAK bool halide_vulkan_is_initialized() { return is_initialized; } +WEAK int halide_vulkan_export_memory_allocator(void *user_context, halide_vulkan_memory_allocator *allocator) { + halide_mutex_lock(&thread_lock); + halide_error_code_t status = halide_error_code_success; + if (allocator == nullptr) { + halide_mutex_unlock(&thread_lock); + error(user_context) << "Vulkan: Memory allocator is null!\n"; + status = halide_error_code_buffer_argument_is_null; + } + halide_mutex_unlock(&thread_lock); + return status; +} + WEAK int halide_vulkan_device_free(void *user_context, halide_buffer_t *halide_buffer) { debug(user_context) << "halide_vulkan_device_free (user_context: " << user_context @@ -253,6 +265,21 @@ WEAK int halide_vulkan_device_release(void *user_context) { return destroy_status; } +WEAK int halide_vulkan_memory_allocator_release(void *user_context, + struct halide_vulkan_memory_allocator *allocator, + VkInstance instance, + VkDebugUtilsMessengerEXT messenger) { + debug(user_context) << "halide_vulkan_memory_allocator_release (user_context: " << user_context << ")\n"; + // Destroy the context if we created it + if (allocator == nullptr) { + error(user_context) << "Vulkan: Memory allocator is null!\n"; + return halide_error_code_buffer_argument_is_null; + } + + return vk_release_memory_allocator(user_context, (VulkanMemoryAllocator *)allocator, + instance, messenger); +} + WEAK int halide_vulkan_device_malloc(void *user_context, halide_buffer_t *buf) { debug(user_context) << "halide_vulkan_device_malloc (user_context: " << user_context diff --git a/src/runtime/vulkan_context.h b/src/runtime/vulkan_context.h index 9e82b9fb5b64..1a45af8da0ea 100644 --- a/src/runtime/vulkan_context.h +++ b/src/runtime/vulkan_context.h @@ -58,6 +58,43 @@ class VulkanContext { error = halide_error_code_device_interface_no_device; halide_error_no_device_interface(user_context); } + // If user overrode halide_vulkan_acquire_context and returned nullptr for allocator, + // create Halide's allocator for the provided device. User must override `halide_vulkan_export_memory_allocator` + // and make sure to propagate it back at the next call of `halide_vulkan_acquire_context` as he overrides it. + if (allocator == nullptr && + instance != VK_NULL_HANDLE && + device != VK_NULL_HANDLE && + physical_device != VK_NULL_HANDLE) { +#ifdef DEBUG_RUNTIME + // Initialize clock for debug timing - normally done in halide_vulkan_acquire_context + halide_start_clock(user_context); +#endif + // make sure halide vulkan is loaded BEFORE creating allocator + debug(user_context) << "VulkanContext: Loading Vulkan function pointers for context override...\n"; + + vk_load_vulkan_loader_functions(user_context); + if (vkGetInstanceProcAddr == nullptr) { + debug(user_context) << "VulkanContext: Failed to load vkGetInstanceProcAddr from loader!\n"; + } else { + debug(user_context) << "VulkanContext: vkGetInstanceProcAddr loaded successfully: " << (void*)vkGetInstanceProcAddr << "\n"; + vk_load_vulkan_instance_functions(user_context, instance); + vk_load_vulkan_device_functions(user_context, device); + } + + allocator = vk_create_memory_allocator(user_context, device, physical_device, + halide_vulkan_get_allocation_callbacks(user_context)); + if (allocator == nullptr) { + error = halide_error_code_out_of_memory; + debug(user_context) << "Vulkan: Failed to create memory allocator for device!\n"; + return; + } + int result = halide_vulkan_export_memory_allocator(user_context, reinterpret_cast(allocator)); + if (result != halide_error_code_success) { + error = static_cast(result); + debug(user_context) << "Vulkan: Failed to export memory allocator for device!\n"; + return; + } + } halide_debug_assert(user_context, allocator != nullptr); halide_debug_assert(user_context, instance != VK_NULL_HANDLE); halide_debug_assert(user_context, device != VK_NULL_HANDLE); @@ -560,6 +597,20 @@ int vk_destroy_context(void *user_context, VulkanMemoryAllocator *allocator, return halide_error_code_success; } +// Clean up only Halide's internal resources for external context (leaves device/instance alone) +int vk_release_memory_allocator(void *user_context, VulkanMemoryAllocator *allocator, + VkInstance instance, VkDebugUtilsMessengerEXT messenger) { + debug(user_context) << "vk_release_memory_allocator (user_context: " << user_context << ")\n"; + // Clean up only Halide's internal resources, not the device/instance we don't own + if (allocator != nullptr) { + vk_destroy_shader_modules(user_context, allocator); + vk_destroy_memory_allocator(user_context, allocator); + vk_destroy_debug_utils_messenger(user_context, instance, allocator, messenger); + } + + return halide_error_code_success; +} + // -------------------------------------------------------------------------- VKAPI_ATTR VkBool32 VKAPI_CALL vk_debug_utils_messenger_callback( diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h index aeef545385cc..af56fb136cfc 100644 --- a/src/runtime/vulkan_internal.h +++ b/src/runtime/vulkan_internal.h @@ -66,6 +66,12 @@ int vk_destroy_context( VkPhysicalDevice physical_device, VkQueue queue); +int vk_release_memory_allocator( + void *user_context, + VulkanMemoryAllocator *allocator, + VkInstance instance, + VkDebugUtilsMessengerEXT messenger); + int vk_find_compute_capability(void *user_context, int *major, int *minor); int vk_create_instance(void *user_context, const StringTable &requested_layers, VkInstance *instance, const VkAllocationCallbacks *alloc_callbacks);