From ade72310b8dcb6ab50f6ea8904ad0bd9026b6b1c Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 20 Jan 2026 04:32:00 +0000 Subject: [PATCH 1/5] Set LTO option only if not using debug config Previously, the CMakeLists.txt files added `-flto` to some of the compilation targets unconditionally. The addition should only be done if the compiler supports LTO, and it shouldn't be done if the cmake configuration is the debug configuration. --- CMakeLists.txt | 4 ++++ pybind_interface/avx2/CMakeLists.txt | 8 +++++++- pybind_interface/avx512/CMakeLists.txt | 8 +++++++- pybind_interface/basic/CMakeLists.txt | 8 +++++++- pybind_interface/sse/CMakeLists.txt | 8 +++++++- 5 files changed, 32 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b824c81d..604e39dc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,10 @@ endif() find_package(OpenMP REQUIRED) +# Check for LTO support (which in CMake is lumped in with IPO). +include(CheckIPOSupported) +check_ipo_supported(RESULT HAVE_IPO) + # Always build the basic part. add_subdirectory(pybind_interface/basic) add_subdirectory(pybind_interface/decide) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index b8a989d2b..eaee747ab 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -21,7 +21,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) elseif(LINUX) - add_compile_options(-mavx2 -mfma -O3 -flto=auto) + add_compile_options(-mavx2 -mfma -O3) execute_process( COMMAND bash --noprofile -c "grep -qs bmi2 /proc/cpuinfo" RESULT_VARIABLE _EXIT_CODE @@ -50,3 +50,9 @@ include(../GetPybind11.cmake) pybind11_add_module(qsim_avx2 pybind_main_avx2.cpp) target_link_libraries(qsim_avx2 PUBLIC OpenMP::OpenMP_CXX) + +if(HAVE_IPO) + set_property(TARGET qsim_avx2 PROPERTY + INTERPROCEDURAL_OPTIMIZATION $> + ) +endif() diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index a718a99f3..46321e11a 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -21,7 +21,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-mavx512f -mbmi2 -O3 -flto=auto) + add_compile_options(-mavx512f -mbmi2 -O3) endif() if(APPLE) @@ -43,3 +43,9 @@ include(../GetPybind11.cmake) pybind11_add_module(qsim_avx512 pybind_main_avx512.cpp) target_link_libraries(qsim_avx512 PUBLIC OpenMP::OpenMP_CXX) + +if(HAVE_IPO) + set_property(TARGET qsim_avx512 PROPERTY + INTERPROCEDURAL_OPTIMIZATION $> + ) +endif() diff --git a/pybind_interface/basic/CMakeLists.txt b/pybind_interface/basic/CMakeLists.txt index c8731ec8a..380cf7cc8 100644 --- a/pybind_interface/basic/CMakeLists.txt +++ b/pybind_interface/basic/CMakeLists.txt @@ -21,7 +21,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-O3 -flto=auto) + add_compile_options(-O3) endif() if(APPLE) @@ -43,3 +43,9 @@ include(../GetPybind11.cmake) pybind11_add_module(qsim_basic pybind_main_basic.cpp) target_link_libraries(qsim_basic PUBLIC OpenMP::OpenMP_CXX) + +if(HAVE_IPO) + set_property(TARGET qsim_basic PROPERTY + INTERPROCEDURAL_OPTIMIZATION $> + ) +endif() diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index 195af28c3..3562ca2b9 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -21,7 +21,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-msse4 -O3 -flto=auto) + add_compile_options(-msse4 -O3) endif() if(APPLE) @@ -43,3 +43,9 @@ include(../GetPybind11.cmake) pybind11_add_module(qsim_sse pybind_main_sse.cpp) target_link_libraries(qsim_sse PUBLIC OpenMP::OpenMP_CXX) + +if(HAVE_IPO) + set_property(TARGET qsim_sse PROPERTY + INTERPROCEDURAL_OPTIMIZATION $> + ) +endif() From c42bd3f52cc4e29d9345b768b4ae2426572ac960 Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 20 Jan 2026 04:49:35 +0000 Subject: [PATCH 2/5] Encapsulate LTO option setting in a function --- CMakeLists.txt | 9 +++++++++ pybind_interface/avx2/CMakeLists.txt | 6 +----- pybind_interface/avx512/CMakeLists.txt | 6 +----- pybind_interface/basic/CMakeLists.txt | 6 +----- pybind_interface/sse/CMakeLists.txt | 6 +----- 5 files changed, 13 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 604e39dc3..e10984ee6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,6 +58,15 @@ find_package(OpenMP REQUIRED) include(CheckIPOSupported) check_ipo_supported(RESULT HAVE_IPO) +# Helper function for setting LTO flag (but only if the config is not debug). +function(enable_lto target_name) + if(HAVE_IPO) + set_property(TARGET ${target_name} PROPERTY + INTERPROCEDURAL_OPTIMIZATION $> + ) + endif() +endfunction() + # Always build the basic part. add_subdirectory(pybind_interface/basic) add_subdirectory(pybind_interface/decide) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index eaee747ab..411cada99 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -51,8 +51,4 @@ pybind11_add_module(qsim_avx2 pybind_main_avx2.cpp) target_link_libraries(qsim_avx2 PUBLIC OpenMP::OpenMP_CXX) -if(HAVE_IPO) - set_property(TARGET qsim_avx2 PROPERTY - INTERPROCEDURAL_OPTIMIZATION $> - ) -endif() +enable_lto(qsim_avx2) diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 46321e11a..7932b3d7b 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -44,8 +44,4 @@ pybind11_add_module(qsim_avx512 pybind_main_avx512.cpp) target_link_libraries(qsim_avx512 PUBLIC OpenMP::OpenMP_CXX) -if(HAVE_IPO) - set_property(TARGET qsim_avx512 PROPERTY - INTERPROCEDURAL_OPTIMIZATION $> - ) -endif() +enable_lto(qsim_avx512) diff --git a/pybind_interface/basic/CMakeLists.txt b/pybind_interface/basic/CMakeLists.txt index 380cf7cc8..6b8a04b87 100644 --- a/pybind_interface/basic/CMakeLists.txt +++ b/pybind_interface/basic/CMakeLists.txt @@ -44,8 +44,4 @@ pybind11_add_module(qsim_basic pybind_main_basic.cpp) target_link_libraries(qsim_basic PUBLIC OpenMP::OpenMP_CXX) -if(HAVE_IPO) - set_property(TARGET qsim_basic PROPERTY - INTERPROCEDURAL_OPTIMIZATION $> - ) -endif() +enable_lto(qsim_basic) diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index 3562ca2b9..4c5ee2b57 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -44,8 +44,4 @@ pybind11_add_module(qsim_sse pybind_main_sse.cpp) target_link_libraries(qsim_sse PUBLIC OpenMP::OpenMP_CXX) -if(HAVE_IPO) - set_property(TARGET qsim_sse PROPERTY - INTERPROCEDURAL_OPTIMIZATION $> - ) -endif() +enable_lto(qsim_sse) From d1545c12288269c80a6de051669e0d87af890d57 Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 20 Jan 2026 20:01:26 +0000 Subject: [PATCH 3/5] The Makefile should also test if can use `-fopenmp` The CMake files now check if OpenMP can be used. The Makefiles should too. --- Makefile | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 0f7e4ed25..cf5c60e94 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ CXX ?= g++ NVCC ?= nvcc HIPCC ?= hipcc -BASE_CXXFLAGS := -std=c++17 -fopenmp +BASE_CXXFLAGS := -std=c++17 BASE_NVCCFLAGS := -std c++17 -Wno-deprecated-gpu-targets BASE_HIPCCFLAGS := @@ -48,8 +48,16 @@ HIPCCFLAGS := $(BASE_HIPCCFLAGS) $(HIPCCFLAGS) LTO_FLAGS := -flto=auto USING_CLANG := $(shell $(CXX) --version | grep -isq clang && echo "true") -ifeq ($(USING_CLANG),"true") - LTO_FLAGS := -flto +ifeq ($(USING_CLANG),true) + LTO_FLAGS := -flto +endif + +# Test if OpenMP header files are available and we can link with the library. +OMP_CHECK_CMD := echo "int main() { return 0; }" | \ + $(CXX) -fopenmp -x c++ - -o /dev/null 2>/dev/null +HAVE_OPENMP := $(shell $(OMP_CHECK_CMD) && echo "true") +ifeq ($(HAVE_OPENMP),true) + OPENMP_FLAGS := -fopenmp endif ifdef DEBUG @@ -58,7 +66,7 @@ ifdef DEBUG NVCCFLAGS += $(DEBUG_FLAGS) HIPCCFLAGS += $(DEBUG_FLAGS) else - CXXFLAGS += -O3 $(LTO_FLAGS) + CXXFLAGS += -O3 $(OPENMP_FLAGS) $(LTO_FLAGS) NVCCFLAGS += -O3 HIPCCFLAGS += -O3 endif From c5369d98a33d35629b8fe7571af1138a1fd1c7a7 Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 20 Jan 2026 20:47:37 +0000 Subject: [PATCH 4/5] Revert change made in incorrect branch --- Makefile | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index cf5c60e94..dc87de73d 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ CXX ?= g++ NVCC ?= nvcc HIPCC ?= hipcc -BASE_CXXFLAGS := -std=c++17 +BASE_CXXFLAGS := -std=c++17 -fopenmp BASE_NVCCFLAGS := -std c++17 -Wno-deprecated-gpu-targets BASE_HIPCCFLAGS := @@ -52,21 +52,13 @@ ifeq ($(USING_CLANG),true) LTO_FLAGS := -flto endif -# Test if OpenMP header files are available and we can link with the library. -OMP_CHECK_CMD := echo "int main() { return 0; }" | \ - $(CXX) -fopenmp -x c++ - -o /dev/null 2>/dev/null -HAVE_OPENMP := $(shell $(OMP_CHECK_CMD) && echo "true") -ifeq ($(HAVE_OPENMP),true) - OPENMP_FLAGS := -fopenmp -endif - ifdef DEBUG DEBUG_FLAGS := -g -O0 CXXFLAGS += $(DEBUG_FLAGS) NVCCFLAGS += $(DEBUG_FLAGS) HIPCCFLAGS += $(DEBUG_FLAGS) else - CXXFLAGS += -O3 $(OPENMP_FLAGS) $(LTO_FLAGS) + CXXFLAGS += -O3 $(LTO_FLAGS) NVCCFLAGS += -O3 HIPCCFLAGS += -O3 endif From da24b586e638693fe98216818cd52971e2b73b18 Mon Sep 17 00:00:00 2001 From: mhucka Date: Wed, 21 Jan 2026 15:14:53 +0000 Subject: [PATCH 5/5] Fix incorrect elseif() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As noted by Sergei, `elseif(LINUX)` → `else()`. --- pybind_interface/avx2/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index 417d1f147..f79bc0738 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -20,7 +20,7 @@ if(MSVC) # Add /O2 to any configuration that is NOT Debug. # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) -elseif(LINUX) +else() add_compile_options(-mavx2 -mfma -O3) execute_process( COMMAND bash --noprofile -c "grep -qs bmi2 /proc/cpuinfo"