diff --git a/CMakeLists.txt b/CMakeLists.txt index 8617902..d57d76c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,14 +4,36 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) # project name PROJECT(FourierConvolutionCUDALib CXX C) if(NOT(${CMAKE_VERSION} VERSION_LESS "3.0.0")) -cmake_policy(SET CMP0042 NEW) + cmake_policy(SET CMP0042 NEW) endif() if(${CMAKE_VERSION} VERSION_GREATER "3.1") -cmake_policy(SET CMP0054 NEW) + cmake_policy(SET CMP0054 NEW) endif() +set(CMAKE_CXX_STANDARD 03) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + option(ENABLE_TESTING "enable the test suite (requires boost to be installed" ON) +option(ENABLE_BENCHMARKS "enable the benchmark suite (requires google/benchmark to be installed" OFF) +# option(ENABLE_CXX11_ABI "enable _GLIBCXX_USE_CXX11_ABI in GCC 5.0+" ON) +# if(${WITH_CXX11_ABI}) +# set(CXX11_ABI_VALUE 1) +# else() +# set(CXX11_ABI_VALUE 0) +# endif() + + +# IF(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") +# if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0") +# set(WITH_CXX11_ABI ON) +# set(CXX11_ABI_VALUE 1) +# endif() + +# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI_VALUE}) +# message(">> [${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}] adding -D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI_VALUE}") +# endif() + # version number SET (FOURIERCONVOLUTIONCUDALIB_NAME "CUDA FOURIER CONVOLUTION LIBRARY") SET (FOURIERCONVOLUTIONCUDALIB_CODENAME "${PROJECT_NAME}") @@ -75,7 +97,7 @@ IF(INCLUDE_CUDA) FIND_PACKAGE(CUDA) IF(CUDA_FOUND) SET(CUDA_VERBOSE_BUILD ON) - set(CUDA_ARCHS 10;20;30;35;37;50;52;60;61;70) + #set(CUDA_ARCHS 10;20;30;35;37;50;52;60;61;70) SET(CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}") IF(APPLE) @@ -92,6 +114,7 @@ IF(INCLUDE_CUDA) ENDIF() ENDIF(APPLE) + if(NOT DEFINED SMS) set(CUDA_ARCHS 10;20;21) IF("${CUDA_VERSION}" VERSION_GREATER "4.5") @@ -118,7 +141,9 @@ IF(INCLUDE_CUDA) IF("${CUDA_VERSION}" VERSION_GREATER "8.0") list(APPEND CUDA_ARCHS 70)#8.0+ ENDIF() - + else() + set(CUDA_ARCHS ${SMS}) + endif() list(SORT CUDA_ARCHS) @@ -133,7 +158,11 @@ IF(INCLUDE_CUDA) list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_${CUDA_HIGHEST_SM},code=compute_${CUDA_HIGHEST_SM}") - MESSAGE(">> CUDA version ${CUDA_VERSION} detected, compiling for Compute Capability/ies ${CUDA_ARCHS} (highest SM: ${CUDA_HIGHEST_SM})") + MESSAGE(">> CUDA version ${CUDA_VERSION} detected, compiling for Compute Capability/ies ${CUDA_ARCHS} (highest SM: ${CUDA_HIGHEST_SM})") + + # if(WITH_CXX11_ABI) + # list(APPEND CUDA_NVCC_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=${CXX11_ABI_VALUE}") + # endif() set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE};-O2;--use_fast_math) set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG};-g;-G) diff --git a/README.md b/README.md index 707dc4e..e66edd4 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,20 @@ $ cmake -DCMAKE_INSTALL_PREFIX=/directory/of/your/choice -DBOOST_ROOT=/path/to/b Here, ```/path/to/boost/root``` should contain the boost libraries and the boost headers. +Benchmarks +---------- + +The repo contains a small utility (in alpha stage) that can be used to run benchmarks. To enable building it, do: + +``` bash +$ cd repo +$ mkdir build +$ cd build +$ cmake -DENABLE_BENCHMARKS=ON .. +$ make +$ ./tests/bench_gpu_convolve +[gpu 0] inplace, 10x, (image 128**3, kernel 3**3) 1.326021s wall, 1.020000s user + 0.300000s system = 1.320000s CPU (99.5%) +``` How to get Help =============== diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b8047c0..4c15f59 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,7 +1,7 @@ INCLUDE_DIRECTORIES(.) -FIND_PACKAGE (Boost 1.42 QUIET COMPONENTS # system filesystem - unit_test_framework REQUIRED) +FIND_PACKAGE (Boost 1.42 QUIET COMPONENTS system filesystem timer unit_test_framework program_options + REQUIRED) IF(Boost_FOUND) INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) @@ -9,6 +9,7 @@ ENDIF() FIND_PACKAGE(CUDA) + IF(CUDA_FOUND) INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src) LINK_DIRECTORIES(${PROJECT_BINARY_DIR}/src) @@ -17,21 +18,27 @@ CUDA_ADD_EXECUTABLE(test_gpu_convolve test_gpu_convolve.cpp image_stack_utils.cp CUDA_ADD_EXECUTABLE(test_gpu_numerical_stability test_gpu_numerical_stability.cpp image_stack_utils.cpp) CUDA_ADD_EXECUTABLE(test_how_cufft_works test_how_cufft_works.cu image_stack_utils.cpp) + IF(Boost_FOUND) - MESSAGE(">> Boost UTF: ${Boost_LIBRARIES} ") - target_link_libraries(test_gpu_convolve ${Boost_LIBRARIES} ${PROJECT_NAME}) + MESSAGE(">> Boost UTF: ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ") + target_link_libraries(test_gpu_convolve ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${PROJECT_NAME}) set_target_properties(test_gpu_convolve PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK") - target_link_libraries(test_gpu_numerical_stability ${Boost_LIBRARIES} ${PROJECT_NAME}) + target_link_libraries(test_gpu_numerical_stability ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${PROJECT_NAME}) set_target_properties(test_gpu_numerical_stability PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK") - target_link_libraries(test_how_cufft_works ${Boost_LIBRARIES} ) + target_link_libraries(test_how_cufft_works ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ) CUDA_ADD_CUFFT_TO_TARGET( test_how_cufft_works ) + if(ENABLE_BENCHMARKS) + CUDA_ADD_EXECUTABLE(bench_gpu_convolve bench_gpu_convolve.cu) + target_link_libraries(bench_gpu_convolve ${Boost_TIMER_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY} ${PROJECT_NAME}) +endif() ENDIF(Boost_FOUND) + ELSE(CUDA_FOUND) MESSAGE(WARNING "Skipping GPU based tests, CUDA not found\!") ENDIF(CUDA_FOUND) diff --git a/tests/bench_gpu_convolve.cu b/tests/bench_gpu_convolve.cu new file mode 100644 index 0000000..1301716 --- /dev/null +++ b/tests/bench_gpu_convolve.cu @@ -0,0 +1,59 @@ +#include +#include + +#include "padd_utils.h" + +#include "convolution3Dfft.h" +#include "test_utils.hpp" +#include "image_stack_utils.h" +#include "traits.hpp" + +#include +#include + +using namespace boost::timer; +namespace po = boost::program_options; + +int main(int ac, char** av) { + + // Declare the supported options. + po::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("image_size", po::value()->default_value(128), "set the 3D image size, so the image will extent sizexsizexsize") + ("kernel_size", po::value()->default_value(3), "set the kernel size, so the kernel will extent sizexsizexsize") + ("gpu", po::value()->default_value(-1), "gpu device to use, if value=-1, the highest device with highest compute capability is used") + ; + + po::variables_map vm; + po::store(po::parse_command_line(ac, av, desc), vm); + po::notify(vm); + + if (vm.count("help")) { + std::cout << desc << "\n"; + return 1; + } + + int device_id = vm["gpu"].as(); + if(device_id < 0) + device_id = selectDeviceWithHighestComputeCapability(); + + std::vector image_dims(3,vm["image_size"].as()); + std::size_t image_len = std::pow(vm["image_size"].as(),3); + std::vector image(image_len,0.); + + std::vector kernel_dims(3,vm["kernel_size"].as()); + std::size_t kernel_len = std::pow(vm["kernel_size"].as(),3); + std::vector kernel(kernel_len,0); + + cpu_timer timer; + for (int i = 0;i<10;++i){ + + convolution3DfftCUDAInPlace(&image[0], &image_dims[0] , + &kernel[0], &kernel_dims[0] , + device_id); + } + std::cout << "[gpu "<< device_id << "] inplace, 10x, (image "<< image_dims.front() <<"**3, kernel "<< kernel_dims.front() <<"**3)" << timer.format() << '\n'; + + +}