From 18bffac9f6ac722c57e186ce7e9dc1b7cc9a54fe Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 00:58:47 +0000 Subject: [PATCH 01/33] Move most metadata from setup.py to pyproject.toml The modern approach is to put more things into `pyproject.toml`. This moves most things out of `setup.py`. The version number is now read from from the file `qsimcirq/_version.py` in both setup.py and pyproject.toml, so that there is a single source of truth for that value. --- pyproject.toml | 135 +++++++++++++++++++++++++++++++++++++++++++++---- setup.py | 91 +++------------------------------ 2 files changed, 132 insertions(+), 94 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 37d714455..e519fedd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,28 +12,133 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Note: there are altogether 3 types of dependencies listed in this file: +# +# [build-system].requires: the packages needed for the build system. This list +# is not stored in the package metadata. +# +# [project].dependencies: other packages are minimally needed to be able to +# install and run qsimcirq. These are things like Cirq, NumPy, etc. Equivalent +# to "install_requires" in setuptools' setup.py. The list gets stored in the +# metadata of the package; when the project is installed by pip, this is the +# specification that is used to install its dependencies. +# +# [dependency-groups].dev: the development dependencies; i.e., what a +# developer needs in order to run unit tests, linters, and formatters. The +# "[dependency-groups]" section is a Python packaging feature introduced in +# 2025. This list is not stored in the metadata of the package. To install the +# development dependencies, use "pip install --group dev". + [build-system] +build-backend = "setuptools.build_meta" requires = [ - "packaging", - "setuptools>=78.1.1", - "pybind11[global]", - # "pip install" from sources needs to build Pybind, which needs CMake too. - "cmake~=3.28.1", + "setuptools>=78.1.1", + "setuptools-scm[toml]>=6.2", + "wheel", ] -build-backend = "setuptools.build_meta" + +[project] +name = "qsimcirq" +description = "High-performance quantum circuit simulator for C++ and Python." +authors = [ + { name = "The qsim/qsimh Developers", email = "qsim-qsimh-dev@googlegroups.com" } +] +maintainers = [ + { name = "Google Quantum AI", email = "quantum-oss-maintainers@google.com" } +] +readme = {file = "README.md", content-type = "text/markdown"} +license = "Apache-2.0" +requires-python = ">=3.10.0" +dynamic = ["version", "dependencies"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: GPU :: NVIDIA CUDA", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: C++", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering :: Quantum Computing", + "Topic :: Software Development :: Libraries :: Python Modules", + "Typing :: Typed", +] +keywords = [ + "algorithms", + "cirq", + "nisq", + "quantum algorithm development", + "quantum circuit simulator", + "quantum computer simulator", + "quantum computing", + "quantum programming", + "quantum simulation", + "quantum", + "schrödinger-feynman simulation", + "simulation", + "state vector simulator", +] + +[project.urls] +documentation = "https://quantumai.google/qsim" +download = "https://pypi.org/project/qsimcirq/#files" +homepage = "https://quantumai.google/qsim" +issues = "https://github.com/quantumlib/qsim/issues" +source = "https://github.com/quantumlib/qsim" + +[dependency-groups] +# Development dependencies. Install these with "pip install --group dev". +dev = [ + "black~=25.9.0", + "cibuildwheel", + "flynt~=1.0", + "isort[colors]~=6.0.1", + "py-cpuinfo", + "pylint~=4.0.2", + "pytest", + "pytest-xdist", +] + +[tool.setuptools] +packages = ["qsimcirq"] +package-data = {"qsimcirq" = ["py.typed"]} + +[tool.setuptools.dynamic] +# The next one becomes the value of [project].version. +version = {attr = "qsimcirq._version.__version__"} +# The next one becomes [project].dependencies, equivalent to "install_requires" +# in setuptools' setup.py. "pip install qsim" installs these automatically. +dependencies = {file = ["requirements.txt"] } [tool.cibuildwheel] -test-extras = "dev" +build = "cp310-* cp311-* cp312-* cp313-*" dependency-versions = "latest" enable = ["cpython-prerelease"] environment.PIP_PREFER_BINARY = "1" # Due to package & module name conflict, temporarily move it away to run tests: -before-test = "mv {package}/qsimcirq /tmp" -test-command = "pytest -s -v {package}/qsimcirq_tests/qsimcirq_test.py && mv /tmp/qsimcirq {package}" +before-test = "pip install --group dev && mv {package}/qsimcirq /tmp" +test-command = """ +pytest -n auto -s -v {package}/qsimcirq_tests/qsimcirq_test.py && +mv /tmp/qsimcirq {package} +""" [tool.cibuildwheel.macos] -before-build = "brew install -q libomp llvm@19 && brew unlink libomp && brew unlink llvm@19 && brew link --force libomp && brew link --force llvm@19" -repair-wheel-command = "delocate-listdeps {wheel} && delocate-wheel --verbose --require-archs {delocate_archs} -w {dest_dir} {wheel}" +before-build = """ +brew install -q libomp llvm@19 && +brew unlink libomp && +brew unlink llvm@19 && +brew link --force libomp && +brew link --force llvm@19 +""" +repair-wheel-command = """ +delocate-listdeps {wheel} && +delocate-wheel --verbose --require-archs {delocate_archs} -w {dest_dir} {wheel} +""" [tool.cibuildwheel.linux] manylinux-x86_64-image = "manylinux2014" @@ -43,3 +148,11 @@ skip = "*musllinux*" [tool.black] target-version = ['py310', 'py311', 'py312', 'py313'] extend-exclude = 'third_party' + +[tool.isort] +profile = 'black' +order_by_type = false # Sort alphabetically, irrespective of case. +skip_gitignore = true +combine_as_imports = true +known_first_party = ["cirq*"] +extend_skip = ["__init__.py"] diff --git a/setup.py b/setup.py index 328b27cdd..d3ccfa54c 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,10 @@ from setuptools import Extension, setup from setuptools.command.build_ext import build_ext +# qsimcirq/_version.py contains the source of truth for the version nhumber. +__version__ = runpy.run_path("qsimcirq/_version.py")["__version__"] +assert __version__, "The version string must not be empty" + class CMakeExtension(Extension): def __init__(self, name, sourcedir=""): @@ -67,6 +71,8 @@ def build_extension(self, ext): "-DCMAKE_CUDA_COMPILER=nvcc", ] + # Append additional CMake arguments from the environment variable. + # This is e.g. used by cibuildwheel to force a certain C++ standard. additional_cmake_args = os.environ.get("CMAKE_ARGS", "") if additional_cmake_args: cmake_args += additional_cmake_args.split() @@ -110,9 +116,7 @@ def build_extension(self, ext): env = os.environ.copy() cxxflags = env.get("CXXFLAGS", "") - env["CXXFLAGS"] = ( - f'{cxxflags} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"' - ) + env["CXXFLAGS"] = f'{cxxflags} -DVERSION_INFO=\\"{__version__}\\"' if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) subprocess.check_call( @@ -124,42 +128,7 @@ def build_extension(self, ext): ) -with open("requirements.txt") as f: - requirements = [ - line.strip() for line in f if line.strip() and not line.strip().startswith("#") - ] -with open("dev-requirements.txt") as f: - dev_requirements = [ - line.strip() for line in f if line.strip() and not line.strip().startswith("#") - ] - -description = "Schrödinger and Schrödinger-Feynman simulators for quantum circuits." - -# README file as long_description. -with open("README.md", encoding="utf-8") as f: - long_description = f.read() - -__version__ = runpy.run_path("qsimcirq/_version.py")["__version__"] -if not __version__: - raise ValueError("Version string cannot be empty") - setup( - name="qsimcirq", - version=__version__, - url="https://github.com/quantumlib/qsim", - author="The qsim/qsimh Developers", - author_email="qsim-qsimh-dev@googlegroups.com", - maintainer="Google Quantum AI", - maintainer_email="quantum-oss-maintainers@google.com", - python_requires=">=3.10.0", - install_requires=requirements, - extras_require={ - "dev": dev_requirements, - }, - license="Apache-2.0", - description=description, - long_description=long_description, - long_description_content_type="text/markdown", ext_modules=[ CMakeExtension("qsimcirq/qsim_avx512"), CMakeExtension("qsimcirq/qsim_avx2"), @@ -170,49 +139,5 @@ def build_extension(self, ext): CMakeExtension("qsimcirq/qsim_decide"), CMakeExtension("qsimcirq/qsim_hip"), ], - cmdclass=dict(build_ext=CMakeBuild), - zip_safe=False, - packages=["qsimcirq"], - package_data={"qsimcirq": ["py.typed"]}, - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Environment :: GPU :: NVIDIA CUDA", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Programming Language :: C++", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Topic :: Scientific/Engineering :: Quantum Computing", - "Topic :: Software Development :: Libraries :: Python Modules", - "Typing :: Typed", - ], - keywords=[ - "algorithms", - "api", - "application programming interface", - "cirq", - "google quantum", - "google", - "nisq", - "python", - "quantum algorithm development", - "quantum circuit simulator", - "quantum computer simulator", - "quantum computing", - "quantum computing research", - "quantum programming", - "quantum simulation", - "quantum", - "schrödinger-feynman simulation", - "sdk", - "simulation", - "state vector simulator", - "software development kit", - ], + cmdclass={"build_ext": CMakeBuild}, ) From 0f7a2b6ba641799f50fc4885f8feca959b15753d Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 00:59:50 +0000 Subject: [PATCH 02/33] Move contents of dev-requirements.txt into pyproject.toml Remove no-longer-needed dev-requirements.txt. Its contents are in pyproject.toml now. --- dev-requirements.txt | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 dev-requirements.txt diff --git a/dev-requirements.txt b/dev-requirements.txt deleted file mode 100644 index 2a0fc0a36..000000000 --- a/dev-requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -cmake~=3.28.1 -black~=25.9.0 -flynt~=1.0 -isort[colors]~=6.0.1 -# The global option to pybind11 makes it include CMake files in a location where -# CMake will find them. It makes a crucial difference in some environments. -pybind11[global] -pylint~=4.0.2 -pytest -pytest-xdist -py-cpuinfo -setuptools>=78.1.1 From 78383e150a9361a177f875b7a3d977676f2aef9d Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 01:01:04 +0000 Subject: [PATCH 03/33] Replace references to dev-requirements.txt The more modern way of handling development dependencies is to put them in pyproject.toml and then use `pip install --group dev` to install them. --- .github/workflows/ci.yaml | 36 ++++++++++++++++++------ .github/workflows/cirq_compatibility.yml | 4 +-- Dockerfile | 13 +++++---- MANIFEST.in | 1 - docs/install_qsimcirq.md | 20 ++++++++----- 5 files changed, 50 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d74432949..6fd9d7842 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -73,10 +73,15 @@ jobs: cache: pip cache-dependency-path: | requirements.txt - dev-requirements.txt + pyproject.toml + + - name: Upgrade pip + run: python -m pip install --upgrade pip - name: Install dependencies - run: pip install -r requirements.txt -r dev-requirements.txt + run: | + pip install -r requirements.txt + pip install --group dev - name: Check format continue-on-error: ${{inputs.soft-linting == 'true'}} @@ -237,10 +242,15 @@ jobs: cache: pip cache-dependency-path: | requirements.txt - dev-requirements.txt + pyproject.toml + + - name: Upgrade pip + run: python -m pip install --upgrade pip - name: Install dependencies - run: pip install -r requirements.txt -r dev-requirements.txt + run: | + pip install -r requirements.txt + pip install --group dev - name: Set up Bazel uses: './.github/actions/set-up-bazel' @@ -322,10 +332,15 @@ jobs: cache: pip cache-dependency-path: | requirements.txt - dev-requirements.txt + pyproject.toml + + - name: Upgrade pip + run: python -m pip install --upgrade pip - name: Install dependencies - run: pip install -r requirements.txt -r dev-requirements.txt + run: | + pip install -r requirements.txt + pip install --group dev - name: Set up Bazel uses: './.github/actions/set-up-bazel' @@ -377,10 +392,15 @@ jobs: cache: pip cache-dependency-path: | requirements.txt - dev-requirements.txt + pyproject.toml + + - name: Upgrade pip + run: python -m pip install --upgrade pip - name: Install dependencies - run: pip install -r requirements.txt -r dev-requirements.txt + run: | + pip install -r requirements.txt + pip install --group dev - name: Set up Bazel uses: './.github/actions/set-up-bazel' diff --git a/.github/workflows/cirq_compatibility.yml b/.github/workflows/cirq_compatibility.yml index 5fc76ff0f..18d974bef 100644 --- a/.github/workflows/cirq_compatibility.yml +++ b/.github/workflows/cirq_compatibility.yml @@ -52,7 +52,7 @@ jobs: cache: pip cache-dependency-path: | requirements.txt - dev-requirements.txt + pyproject.toml - name: Install latest dev version of Cirq run: pip install --upgrade cirq~=1.0.dev @@ -60,7 +60,7 @@ jobs: - name: Install qsim dev requirements run: | pip install -r requirements.txt - pip install -r dev-requirements.txt + pip install --group dev - name: Run Python tests env: diff --git a/Dockerfile b/Dockerfile index 7d82f2eb5..07c5bcbab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_PATH ENV PATH="$CUDA_PATH/bin:$PATH" # Update package list & install some basic tools we'll need. -# hadolint ignore=DL3009,DL3008 +# hadolint ignore=DL3008,DL3009 RUN apt-get update && \ apt-get install -y make g++ wget git --no-install-recommends && \ apt-get install -y python3-dev python3-pip python3-venv --no-install-recommends @@ -37,8 +37,10 @@ COPY ./circuits/ /qsim/circuits/ COPY ./lib/ /qsim/lib/ COPY ./pybind_interface/ /qsim/lib/ COPY ./qsimcirq_tests/ /qsim/qsimcirq_tests/ +COPY ./pyproject.toml /qsim/pyproject.toml COPY ./requirements.txt /qsim/requirements.txt -COPY ./dev-requirements.txt /qsim/dev-requirements.txt + +WORKDIR /qsim/ # Create venv to avoid collision between system packages and what we install. RUN python3 -m venv --upgrade-deps test_env @@ -47,12 +49,11 @@ RUN python3 -m venv --upgrade-deps test_env ENV PATH="/test_env/bin:$PATH" # Install qsim requirements. -# hadolint ignore=DL3042 -RUN python3 -m pip install -r /qsim/requirements.txt && \ - python3 -m pip install -r /qsim/dev-requirements.txt +# hadolint ignore=DL3013 +RUN python3 -m pip install --no-cache-dir --upgrade pip && \ + python3 -m pip install --no-cache-dir -r requirements.txt # Compile qsim. -WORKDIR /qsim/ RUN make -j qsim ENTRYPOINT ["/qsim/apps/qsim_base.x"] diff --git a/MANIFEST.in b/MANIFEST.in index 4b487267f..2968589be 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,4 @@ include requirements.txt -include dev-requirements.txt include CMakeLists.txt graft pybind_interface diff --git a/docs/install_qsimcirq.md b/docs/install_qsimcirq.md index 6887a57f1..441dc3ead 100644 --- a/docs/install_qsimcirq.md +++ b/docs/install_qsimcirq.md @@ -10,18 +10,24 @@ directly in C++ code without building and installing the qsimcirq interface. ## Before installation -Prior to installation, consider opening a +Prior to installation, consider creating a [virtual environment](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). -Prerequisites are included in the +Prerequisites for installing and running qsim are included in the [`requirements.txt`](https://github.com/quantumlib/qsim/blob/main/requirements.txt) -file, and will be automatically installed along with qsimcirq. +file, and will be automatically installed along with qsimcirq when you install +it with pip. -If you'd like to develop qsimcirq, a separate set of dependencies are includes +If you'd like to develop qsimcirq, a separate set of dependencies are defined in the -[`dev-requirements.txt`](https://github.com/quantumlib/qsim/blob/main/dev-requirements.txt) -file. You can install them with `pip3 install -r dev-requirements.txt` or -`pip3 install qsimcirq[dev]`. +[`pyproject.toml`](https://github.com/quantumlib/qsim/blob/main/pyproject.toml) +file. Using pip version 25.1 or higher, you can install them with the following +commands: + +```shell +pip install -r requirements.txt +pip install --group dev +``` ## Linux installation From 2c772143b0e3d3a6084a3261c17a433288e8f1aa Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 01:02:32 +0000 Subject: [PATCH 04/33] Add more constraints & comments to requirements.txt Some transitive dependencies have had version updates that cause conflicts for some combinations of our builds, such as contourpy requiring Python 3.11. This puts more constraints on this so that qsimcirq builds everywhere. --- requirements.txt | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 21e8a4ade..a927b3363 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,17 @@ +# Installation and run-time dependencies for qsimcirq. This file is read +# by pyproject.toml. + absl-py cirq-core~=1.0 -numpy>=1.26.0 +numpy>=1.26.0,<2.0; python_version < '3.11' +numpy>=2.0; python_version >= '3.11' + +# These are needed because installing qsimcirq in some environments may require +# pip to compile Pybind for that specific platform: +cmake~=3.28.1 +pybind11[global] + +# These are transitive dependencies we need to constrain to avoid unresolvable +# installation conflicts due to them requiring higher Python versions: +scipy<1.16; python_version < '3.11' +contourpy<1.3; python_version < '3.11' From aedd47568b979197c3b4fdfd9082142b0ab16288 Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 01:41:58 +0000 Subject: [PATCH 05/33] More Dockerfile fixes 1. Need to change the workdir at a different point. 2. Need to activate the venv. --- Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 07c5bcbab..2dd040373 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,10 +40,11 @@ COPY ./qsimcirq_tests/ /qsim/qsimcirq_tests/ COPY ./pyproject.toml /qsim/pyproject.toml COPY ./requirements.txt /qsim/requirements.txt -WORKDIR /qsim/ - # Create venv to avoid collision between system packages and what we install. -RUN python3 -m venv --upgrade-deps test_env +RUN python3 -m venv --upgrade-deps test_env && \ + . test_env/bin/activate + +WORKDIR /qsim/ # Activate venv. ENV PATH="/test_env/bin:$PATH" From c2d1d3a8c2c64d4afb2511545187908b78bf3ab6 Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 01:53:24 +0000 Subject: [PATCH 06/33] Remove some changes that belong in a separate PR --- pyproject.toml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e519fedd2..55235fa18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,7 +123,7 @@ environment.PIP_PREFER_BINARY = "1" # Due to package & module name conflict, temporarily move it away to run tests: before-test = "pip install --group dev && mv {package}/qsimcirq /tmp" test-command = """ -pytest -n auto -s -v {package}/qsimcirq_tests/qsimcirq_test.py && +pytest -s -v {package}/qsimcirq_tests/qsimcirq_test.py && mv /tmp/qsimcirq {package} """ @@ -148,11 +148,3 @@ skip = "*musllinux*" [tool.black] target-version = ['py310', 'py311', 'py312', 'py313'] extend-exclude = 'third_party' - -[tool.isort] -profile = 'black' -order_by_type = false # Sort alphabetically, irrespective of case. -skip_gitignore = true -combine_as_imports = true -known_first_party = ["cirq*"] -extend_skip = ["__init__.py"] From 59c8cb160b255c2dcbc890843186721a0318b09e Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 01:55:40 +0000 Subject: [PATCH 07/33] Remove a spurious change to reduce the diff noise --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2dd040373..100c68e92 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_PATH ENV PATH="$CUDA_PATH/bin:$PATH" # Update package list & install some basic tools we'll need. -# hadolint ignore=DL3008,DL3009 +# hadolint ignore=DL3009,DL3008 RUN apt-get update && \ apt-get install -y make g++ wget git --no-install-recommends && \ apt-get install -y python3-dev python3-pip python3-venv --no-install-recommends @@ -37,8 +37,8 @@ COPY ./circuits/ /qsim/circuits/ COPY ./lib/ /qsim/lib/ COPY ./pybind_interface/ /qsim/lib/ COPY ./qsimcirq_tests/ /qsim/qsimcirq_tests/ -COPY ./pyproject.toml /qsim/pyproject.toml COPY ./requirements.txt /qsim/requirements.txt +COPY ./pyproject.toml /qsim/pyproject.toml # Create venv to avoid collision between system packages and what we install. RUN python3 -m venv --upgrade-deps test_env && \ From 794c818c323c7dc41e89e71122db8abd036ec8c5 Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 02:12:27 +0000 Subject: [PATCH 08/33] Need to install setuptools on Python >= 3.12 --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 55235fa18..a2d82787f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,6 +96,8 @@ source = "https://github.com/quantumlib/qsim" dev = [ "black~=25.9.0", "cibuildwheel", + # Distutils was removed from Python in 3.12. + "setuptools; python_version >= 3.12", "flynt~=1.0", "isort[colors]~=6.0.1", "py-cpuinfo", From 3b4102a991a845f3fca73db8d295bceff4559cd0 Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 02:14:01 +0000 Subject: [PATCH 09/33] Fix syntax --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a2d82787f..a4f3e4613 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,7 +97,7 @@ dev = [ "black~=25.9.0", "cibuildwheel", # Distutils was removed from Python in 3.12. - "setuptools; python_version >= 3.12", + "setuptools; python_version >= '3.12'", "flynt~=1.0", "isort[colors]~=6.0.1", "py-cpuinfo", From 2a565d1a27aec7b4b911e9accc084258f994ed32 Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 2 Jan 2026 02:31:53 +0000 Subject: [PATCH 10/33] Need to install Python dev dependencies --- pybind_interface/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pybind_interface/Dockerfile b/pybind_interface/Dockerfile index 54a4ff511..2b1cb4fd0 100644 --- a/pybind_interface/Dockerfile +++ b/pybind_interface/Dockerfile @@ -26,5 +26,9 @@ WORKDIR /qsim/ # Build pybind code early to cache the results RUN make -j -C /qsim/ pybind +# Install Python development dependencies. +# hadolint ignore=DL3013 +RUN pip install --no-cache-dir --group dev + # Compile and run qsim tests ENTRYPOINT ["make", "-C", "/qsim/", "run-py-tests"] From 80f980e86b649229718904b614e6b9523327137a Mon Sep 17 00:00:00 2001 From: Sergei Isakov <54642992+sergeisakov@users.noreply.github.com> Date: Mon, 5 Jan 2026 07:30:24 +0100 Subject: [PATCH 11/33] Add experimental support for cuStateVecEx. (#965) --- CMakeLists.txt | 1 + Makefile | 19 + apps/Makefile | 9 + apps/make.sh | 3 +- apps/qsim_base_custatevecex.cu | 160 +++++ docs/cirq_interface.md | 8 +- lib/BUILD | 73 +++ lib/io.h | 12 + lib/io_file.h | 4 + lib/multiprocess_custatevecex.h | 213 ++++++ lib/run_custatevecex.h | 313 +++++++++ lib/simulator_custatevec.h | 17 +- lib/simulator_custatevecex.h | 243 +++++++ lib/statespace_custatevec.h | 6 +- lib/statespace_custatevecex.h | 431 +++++++++++++ lib/util_cuda.h | 19 +- lib/util_custatevec.h | 4 +- lib/util_custatevecex.h | 46 ++ lib/vectorspace_custatevecex.h | 610 ++++++++++++++++++ pybind_interface/Makefile | 10 +- pybind_interface/cuda/CMakeLists.txt | 2 +- pybind_interface/custatevec/CMakeLists.txt | 2 +- pybind_interface/custatevecex/CMakeLists.txt | 59 ++ .../custatevecex/pybind_main_custatevecex.cpp | 74 +++ .../custatevecex/pybind_main_custatevecex.h | 17 + pybind_interface/decide/CMakeLists.txt | 5 +- pybind_interface/decide/decide.cpp | 20 +- pybind_interface/hip/CMakeLists.txt | 2 +- qsimcirq/__init__.py | 12 +- qsimcirq/qsim_simulator.py | 25 +- qsimcirq_tests/qsimcirq_test.py | 109 ++++ setup.py | 1 + tests/Makefile | 17 + tests/hybrid_custatevecex_test.cu | 59 ++ tests/qtrajectory_custatevecex_test.cu | 88 +++ tests/run_custatevecex_test.cu | 262 ++++++++ tests/simulator_custatevecex_test.cu | 105 +++ tests/simulator_testfixture.h | 35 +- tests/statespace_custatevecex_test.cu | 119 ++++ 39 files changed, 3167 insertions(+), 47 deletions(-) create mode 100644 apps/qsim_base_custatevecex.cu create mode 100644 lib/multiprocess_custatevecex.h create mode 100644 lib/run_custatevecex.h create mode 100644 lib/simulator_custatevecex.h create mode 100644 lib/statespace_custatevecex.h create mode 100644 lib/util_custatevecex.h create mode 100644 lib/vectorspace_custatevecex.h create mode 100644 pybind_interface/custatevecex/CMakeLists.txt create mode 100644 pybind_interface/custatevecex/pybind_main_custatevecex.cpp create mode 100644 pybind_interface/custatevecex/pybind_main_custatevecex.h create mode 100644 tests/hybrid_custatevecex_test.cu create mode 100644 tests/qtrajectory_custatevecex_test.cu create mode 100644 tests/run_custatevecex_test.cu create mode 100644 tests/simulator_custatevecex_test.cu create mode 100644 tests/statespace_custatevecex_test.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index e8a92a47b..8b824c81d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,7 @@ if(NOT CMAKE_APPLE_SILICON_PROCESSOR) add_subdirectory(pybind_interface/cuda) if(DEFINED ENV{CUQUANTUM_ROOT}) add_subdirectory(pybind_interface/custatevec) + add_subdirectory(pybind_interface/custatevecex) endif() elseif(has_hipcc) add_subdirectory(pybind_interface/hip) diff --git a/Makefile b/Makefile index b4e16da06..4b37317c2 100644 --- a/Makefile +++ b/Makefile @@ -94,7 +94,10 @@ ifneq (,$(strip $(CUQUANTUM_ROOT))) CUSVFLAGS += -lcustatevec -lcublas CUSTATEVECFLAGS ?= $(CUSVFLAGS) TARGETS += qsim-custatevec + TARGETS += qsim-custatevecex TESTS += run-custatevec-tests + TESTS += run-custatevecex-tests + TESTS += run-custatevecex-mpi-tests else $(warning $$CUQUANTUM_ROOT is set, but the path does not seem to exist) endif @@ -120,6 +123,10 @@ qsim-cuda: qsim-custatevec: | check-cuquantum-root-set $(MAKE) -C apps/ qsim-custatevec +.PHONY: qsim-custatevecex +qsim-custatevecex: | check-cuquantum-root-set + $(MAKE) -C apps/ qsim-custatevecex + .PHONY: qsim-hip qsim-hip: $(MAKE) -C apps/ qsim-hip @@ -140,6 +147,10 @@ cuda-tests: custatevec-tests: | check-cuquantum-root-set $(MAKE) -C tests/ custatevec-tests +.PHONY: custatevecex-tests +custatevecex-tests: | check-cuquantum-root-set + $(MAKE) -C tests/ custatevecex-tests + .PHONY: hip-tests hip-tests: $(MAKE) -C tests/ hip-tests @@ -156,6 +167,14 @@ run-cuda-tests: cuda-tests run-custatevec-tests: custatevec-tests $(MAKE) -C tests/ run-custatevec-tests +.PHONY: run-custatevecex-tests +run-custatevecex-tests: custatevecex-tests + $(MAKE) -C tests/ run-custatevecex-tests + +.PHONY: run-custatevecex-mpi-tests +run-custatevecex-mpi-tests: custatevecex-tests + $(MAKE) -C tests/ run-custatevecex-mpi-tests + .PHONY: run-hip-tests run-hip-tests: hip-tests $(MAKE) -C tests/ run-hip-tests diff --git a/apps/Makefile b/apps/Makefile index 48b25cabd..19ccbc422 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -7,6 +7,9 @@ CUDA_TARGETS := $(CUDA_TARGETS:%cuda.cu=%cuda.x) CUSTATEVEC_TARGETS = $(shell find . -maxdepth 1 -name "*custatevec.cu") CUSTATEVEC_TARGETS := $(CUSTATEVEC_TARGETS:%custatevec.cu=%custatevec.x) +CUSTATEVECEX_TARGETS = $(shell find . -maxdepth 1 -name "*custatevecex.cu") +CUSTATEVECEX_TARGETS := $(CUSTATEVECEX_TARGETS:%custatevecex.cu=%custatevecex.x) + HIP_TARGETS = $(shell find . -maxdepth 1 -name '*cuda.cu') HIP_TARGETS := $(HIP_TARGETS:%cuda.cu=%hip.x) @@ -19,6 +22,9 @@ qsim-cuda: $(CUDA_TARGETS) .PHONY: qsim-custatevec qsim-custatevec: $(CUSTATEVEC_TARGETS) +.PHONY: qsim-custatevecex +qsim-custatevecex: $(CUSTATEVECEX_TARGETS) + .PHONY: qsim-hip qsim-hip: $(HIP_TARGETS) @@ -31,6 +37,9 @@ qsim-hip: $(HIP_TARGETS) %custatevec.x: %custatevec.cu $(NVCC) -o ./$@ $< $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%custatevecex.x: %custatevecex.cu + $(NVCC) -o ./$@ $< $(NVCCFLAGS) $(CUSTATEVECFLAGS) + %hip.x: %cuda.cu $(HIPCC) -o ./$@ $< $(HIPCCFLAGS) diff --git a/apps/make.sh b/apps/make.sh index 610b2eb4f..7ebf00c7b 100755 --- a/apps/make.sh +++ b/apps/make.sh @@ -37,7 +37,8 @@ if command -v nvcc &>/dev/null; then ) nvcc -O3 "${CUSTATEVECFLAGS[@]}" \ -o qsim_base_custatevec.x qsim_base_custatevec.cu - + nvcc -O3 "${CUSTATEVECFLAGS[@]}" \ + -o qsim_base_custatevecex.x qsim_base_custatevecex.cu fi elif command -v hipcc &>/dev/null; then hipcc -O3 -o qsim_base_hip.x qsim_base_cuda.cu diff --git a/apps/qsim_base_custatevecex.cu b/apps/qsim_base_custatevecex.cu new file mode 100644 index 000000000..99ce1a283 --- /dev/null +++ b/apps/qsim_base_custatevecex.cu @@ -0,0 +1,160 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +#include "../lib/circuit_qsim_parser.h" +#include "../lib/formux.h" +#include "../lib/gates_qsim.h" +#include "../lib/io_file.h" +#include "../lib/multiprocess_custatevecex.h" +#include "../lib/run_custatevecex.h" +#include "../lib/simulator_custatevecex.h" +#include "../lib/util_custatevec.h" + +struct Options { + std::string circuit_file; + unsigned maxtime = std::numeric_limits::max(); + unsigned seed = 1; + unsigned verbosity = 0; +}; + +Options GetOptions(int argc, char* argv[]) { + constexpr char usage[] = "usage:\n ./qsim_base -c circuit -d maxtime " + "-s seed -v verbosity\n"; + + Options opt; + + int k; + + while ((k = getopt(argc, argv, "c:d:s:v:")) != -1) { + switch (k) { + case 'c': + opt.circuit_file = optarg; + break; + case 'd': + opt.maxtime = std::atoi(optarg); + break; + case 's': + opt.seed = std::atoi(optarg); + break; + case 'v': + opt.verbosity = std::atoi(optarg); + break; + default: + qsim::IO::errorf(usage); + exit(1); + } + } + + return opt; +} + +bool ValidateOptions(const Options& opt) { + if (opt.circuit_file.empty()) { + qsim::IO::errorf("circuit file is not provided.\n"); + return false; + } + + return true; +} + +template +void PrintAmplitudes( + unsigned num_qubits, const StateSpace& state_space, const State& state) { + static constexpr char const* bits[8] = { + "000", "001", "010", "011", "100", "101", "110", "111", + }; + + uint64_t size = std::min(uint64_t{8}, uint64_t{1} << num_qubits); + unsigned s = 3 - std::min(unsigned{3}, num_qubits); + + for (uint64_t i = 0; i < size; ++i) { + auto a = state_space.GetAmpl(state, i); + qsim::IO::messagef("%s:%16.8g%16.8g%16.8g\n", + bits[i] + s, std::real(a), std::imag(a), std::norm(a)); + } +} + +int main(int argc, char* argv[]) { + using namespace qsim; + + auto opt = GetOptions(argc, argv); + if (!ValidateOptions(opt)) { + return 1; + } + + using fp_type = float; + + Circuit> circuit; + if (!CircuitQsimParser::FromFile(opt.maxtime, opt.circuit_file, + circuit)) { + return 1; + } + + struct Factory { + using Simulator = qsim::SimulatorCuStateVecEx; + using StateSpace = Simulator::StateSpace; + + explicit Factory(unsigned verbosity = 0) : verbosity(verbosity) { + mp.initialize(); + } + + StateSpace CreateStateSpace() const { + StateSpace::Parameter param; + param.verbosity = verbosity; + + return StateSpace{mp, param}; + } + + Simulator CreateSimulator() const { + return Simulator{}; + } + + MultiProcessCuStateVecEx mp; + unsigned verbosity; + }; + + using Simulator = Factory::Simulator; + using StateSpace = Simulator::StateSpace; + using State = StateSpace::State; + using Runner = CuStateVecExRunner; + + Factory factory(opt.verbosity); + + StateSpace state_space = factory.CreateStateSpace(); + State state = state_space.Create(circuit.num_qubits); + + if (state_space.IsNull(state)) { + IO::errorf("not enough memory: is the number of qubits too large?\n"); + return 1; + } + + state_space.SetStateZero(state); + + Runner::Parameter param; + param.seed = opt.seed; + param.verbosity = opt.verbosity; + + if (Runner::Run(param, factory, circuit, state)) { + PrintAmplitudes(circuit.num_qubits, state_space, state); + } + + return 0; +} diff --git a/docs/cirq_interface.md b/docs/cirq_interface.md index 593da1700..5dd1ddb4f 100644 --- a/docs/cirq_interface.md +++ b/docs/cirq_interface.md @@ -186,8 +186,11 @@ library. `QSimOptions` provides five parameters to configure GPU execution. `use_gpu` is required to enable GPU execution: * `use_gpu`: if True, use GPU instead of CPU for simulation. -* `gpu_mode`: use CUDA if set to 0 (default value) or use the NVIDIA cuStateVec -library if set to any other value. +* `gpu_mode`: use CUDA if set to 0 (default value), use the NVIDIA cuStateVec +if set to 1 or use the NVIDIA cuStateVecEx library if set to any other value. + +In the case of the NVIDIA cuStateVecEx library, simulations can be performed +in multi-device / multi-node environments. If `use_gpu` is set and `gpu_mode` is set to 0, the remaining parameters can optionally be set to fine-tune StateSpace performance for a specific device. @@ -196,3 +199,4 @@ In most cases, the default values provide good performance. StateSpace. This must be a power of 2 in the range [32, 1024]. * `gpu_data_blocks`: number of data blocks to use for the GPU StateSpace. Below 16 data blocks, performance is noticeably reduced. + diff --git a/lib/BUILD b/lib/BUILD index 02aa71bb0..60fd0e51a 100644 --- a/lib/BUILD +++ b/lib/BUILD @@ -186,8 +186,10 @@ cuda_library( "matrix.h", "mps_simulator.h", "mps_statespace.h", + "multiprocess_custatevecex.h", "parfor.h", "qtrajectory.h", + "run_custatevecex.h", "run_qsim.h", "run_qsimh.h", "seqfor.h", @@ -198,12 +200,14 @@ cuda_library( "simulator_avx512.h", "simulator_basic.h", "simulator_custatevec.h", + "simulator_custatevecex.h", "simulator_sse.h", "statespace.h", "statespace_avx.h", "statespace_avx512.h", "statespace_basic.h", "statespace_custatevec.h", + "statespace_custatevecex.h", "statespace_sse.h", "umux.h", "unitary_calculator_avx.h", @@ -219,8 +223,10 @@ cuda_library( "util_cpu.h", "util_cuda.h", "util_custatevec.h", + "util_custatevecex.h", "vectorspace.h", "vectorspace_cuda.h", + "vectorspace_custatevecex.h", ], copts = ["-D__CUSTATEVEC__"], deps = [ @@ -357,6 +363,11 @@ cuda_library( hdrs = ["util_custatevec.h"], ) +cuda_library( + name = "util_custatevecex", + hdrs = ["util_custatevecex.h"], +) + ### Input/output libraries ### cc_library( @@ -506,6 +517,29 @@ cc_library( ], ) +cuda_library( + name = "run_custatevecex", + hdrs = ["run_custatevecex.h"], + deps = [ + ":circuit", + ":util", + ":util_custatevec", + ":util_custatevecex", + ], +) + +### Multi-process library ### + +cuda_library( + name = "multiprocess_custatevecex", + hdrs = ["multiprocess_custatevecex.h"], + deps = [ + ":io", + ":util_custatevec", + ":util_custatevecex", + ], +) + ### Vectorspace libraries ### cc_library( @@ -518,6 +552,18 @@ cuda_library( hdrs = ["vectorspace_cuda.h"], ) +cuda_library( + name = "vectorspace_custatevecex", + hdrs = ["vectorspace_custatevecex.h"], + deps = [ + "io", + ":multiprocess_custatevecex", + ":util_cuda", + ":util_custatevec", + ":util_custatevecex", + ], +) + ### Statespace libraries ### cc_library( @@ -591,6 +637,20 @@ cuda_library( ], ) +cuda_library( + name = "statespace_custatevecex", + hdrs = [ + "statespace_custatevecex.h", + ], + deps = [ + ":multiprocess_custatevecex", + ":statespace", + ":util_custatevec", + ":util_custatevecex", + ":vectorspace_custatevecex", + ], +) + ### Simulator libraries ### cc_library( @@ -660,6 +720,19 @@ cuda_library( ], ) +cuda_library( + name = "simulator_custatevecex", + hdrs = [ + "simulator_custatevecex.h", + ], + deps = [ + ":io", + ":statespace_custatevecex", + ":util_custatevec", + ":util_custatevecex", + ], +) + # All three state-vector simulators with multiplexer cc_library( name = "simulator", diff --git a/lib/io.h b/lib/io.h index 3b26c7cc6..97de5fc12 100644 --- a/lib/io.h +++ b/lib/io.h @@ -20,11 +20,19 @@ namespace qsim { +namespace output { + static bool enabled = true; +} + /** * Controller for output logs. */ struct IO { static void errorf(const char* format, ...) { + if (!output::enabled) { + return; + } + va_list args; va_start(args, format); vfprintf(stderr, format, args); @@ -32,6 +40,10 @@ struct IO { } static void messagef(const char* format, ...) { + if (!output::enabled) { + return; + } + va_list args; va_start(args, format); vprintf(format, args); diff --git a/lib/io_file.h b/lib/io_file.h index 3cfac12db..789efbd60 100644 --- a/lib/io_file.h +++ b/lib/io_file.h @@ -47,6 +47,10 @@ struct IOFile : public IO { static bool WriteToFile( const std::string& file, const void* data, uint64_t size) { + if (!output::enabled) { + return true; + } + auto fs = std::fstream(file, std::ios::out | std::ios::binary); if (!fs) { diff --git a/lib/multiprocess_custatevecex.h b/lib/multiprocess_custatevecex.h new file mode 100644 index 000000000..9c4a13bb1 --- /dev/null +++ b/lib/multiprocess_custatevecex.h @@ -0,0 +1,213 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MULTIPROCESS_CUSTATEVECEX_H_ +#define MULTIPROCESS_CUSTATEVECEX_H_ + +#include +#include + +#include +#include + +#include +#include + +#include "io.h" +#include "util_custatevec.h" +#include "util_custatevecex.h" + +namespace qsim { + +struct MultiProcessCuStateVecEx { + enum NetworkType { + kSuperPod = 0, + kGB200NVL = 1, + kSwitchTree = 2, + kCommunicator = 3, + }; + + struct Parameter { + uint64_t transfer_buffer_size = 16777216; + NetworkType network_type = kSuperPod; + }; + + MultiProcessCuStateVecEx(Parameter param = Parameter{16777216, kSuperPod}) + : param_(param), communicator_(nullptr), initialized_(false) {} + + ~MultiProcessCuStateVecEx() { + if (communicator_) { + custatevecExCommunicatorDestroy(communicator_); + } + + custatevecExCommunicatorStatus_t status; + custatevecExCommunicatorFinalize(&status); + } + + custatevecExCommunicatorDescriptor_t communicator() const { + return communicator_; + } + + unsigned num_processes() const { + return num_processes_; + } + + unsigned rank() const { + return rank_; + } + + bool initialized() const { + return initialized_; + } + + void initialize() { + int argc = 0; + char** argv = nullptr; + + auto comm_type = CUSTATEVEC_COMMUNICATOR_TYPE_OPENMPI; + + custatevecExCommunicatorStatus_t comm_status; + auto status = custatevecExCommunicatorInitialize( + comm_type, nullptr, &argc, &argv, &comm_status); + + if (status != CUSTATEVEC_STATUS_SUCCESS || + comm_status != CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS) { + return; + } + + communicator_ = nullptr; + status = custatevecExCommunicatorCreate(&communicator_); + + if (status != CUSTATEVEC_STATUS_SUCCESS) { + return; + } + + int num_processes, rank; + ErrorCheck(communicator_->intf->getSize(communicator_, &num_processes)); + ErrorCheck(communicator_->intf->getRank(communicator_, &rank)); + + ErrorCheck(communicator_->intf->getRank(communicator_, &rank)); + if (rank != 0) { + output::enabled = false; + } + + if (num_processes < 2 || (num_processes & (num_processes - 1)) != 0) { + return; + } + + num_global_qubits_ = get_num_global_qubits(num_processes); + + unsigned num_acc_global_qubits = 0; + auto network_layers = get_network_layers(param_.network_type); + + num_global_qubits_per_layer_.reserve(2); + global_index_bit_classes_.reserve(2); + + for (const auto& layer : network_layers) { + auto k = num_global_qubits_ - num_acc_global_qubits; + global_index_bit_classes_.push_back(layer.global_index_bit_class); + + if (layer.num_global_qubits == 0 || k <= layer.num_global_qubits) { + num_global_qubits_per_layer_.push_back(k); + num_acc_global_qubits = num_global_qubits_; + break; + } + + num_global_qubits_per_layer_.push_back(layer.num_global_qubits); + num_acc_global_qubits += layer.num_global_qubits; + } + + if (num_acc_global_qubits < num_global_qubits_) { + IO::errorf("erorr: too few network layers at %s %d.\n", + __FILE__, __LINE__); + exit(1); + } + + memory_sharing_method_ = CUSTATEVEC_EX_MEMORY_SHARING_METHOD_NONE; + + for (const auto& layer : network_layers) { + if (layer.global_index_bit_class == + CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P) { + memory_sharing_method_ = CUSTATEVEC_EX_MEMORY_SHARING_METHOD_AUTODETECT; + break; + } + } + + num_processes_ = num_processes; + rank_ = rank; + initialized_ = true; + } + + auto create_sv_config(unsigned num_qubits, cudaDataType_t data_type) const { + custatevecExDictionaryDescriptor_t sv_config = nullptr; + + if (!initialized_ || + num_qubits < 3 || num_global_qubits_ + 2 > num_qubits) { + return sv_config; + } + + unsigned num_local_qubits = num_qubits - num_global_qubits_; + + ErrorCheck(custatevecExConfigureStateVectorMultiProcess( + &sv_config, data_type, num_qubits, num_local_qubits, -1, + memory_sharing_method_, global_index_bit_classes_.data(), + reinterpret_cast(num_global_qubits_per_layer_.data()), + static_cast(global_index_bit_classes_.size()), + param_.transfer_buffer_size, nullptr, 0)); + + return sv_config; + } + + private: + Parameter param_; + custatevecExCommunicatorDescriptor_t communicator_; + std::vector num_global_qubits_per_layer_; + std::vector global_index_bit_classes_; + custatevecExMemorySharingMethod_t memory_sharing_method_; + unsigned num_processes_; + unsigned num_global_qubits_; + unsigned rank_; + bool initialized_; + + struct NetworkLayer { + custatevecExGlobalIndexBitClass_t global_index_bit_class; + unsigned num_global_qubits; + }; + + using NetworkLayers = std::vector; + + static NetworkLayers get_network_layers(NetworkType id) { + switch (id) { + case kSuperPod: + return {{CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P, 3}, + {CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_COMMUNICATOR, 0}}; + case kGB200NVL: + return {{CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P, 0}}; + break; + case kSwitchTree: + return {{CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P, 2}, + {CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P, 1}}; + break; + case kCommunicator: + return {{CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_COMMUNICATOR, 0}}; + break; + } + + return NetworkLayers{}; + } +}; + +} // namespace qsim + +#endif // MULTIPROCESS_CUSTATEVECEX_H_ diff --git a/lib/run_custatevecex.h b/lib/run_custatevecex.h new file mode 100644 index 000000000..2a2b8b1da --- /dev/null +++ b/lib/run_custatevecex.h @@ -0,0 +1,313 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef RUN_CUSTATEVECEX_H_ +#define RUN_CUSTATEVECEX_H_ + +#include +#include +#include + +#include + +#include "circuit.h" +#include "util.h" +#include "util_custatevec.h" +#include "util_custatevecex.h" + +namespace qsim { + +/** + * Helper struct for running qsim with the cuStateVecEx library. + */ +template +struct CuStateVecExRunner final { + public: + using Simulator = typename Factory::Simulator; + using StateSpace = typename Simulator::StateSpace; + using State = typename StateSpace::State; + using MeasurementResult = typename StateSpace::MeasurementResult; + + /** + * User-specified parameters for simulation. + */ + struct Parameter { + /** + * Random number generator seed to apply measurement gates. + */ + uint64_t seed; + + unsigned verbosity = 0; + }; + + /** + * Runs the given circuit, only measuring at the end. + * @param param Options for gate fusion, parallelism and logging. + * @param factory Object to create simulators and state spaces. + * @param circuit The circuit to be simulated. + * @param measure Function that performs measurements (in the sense of + * computing expectation values, etc). + * @return True if the simulation completed successfully; false otherwise. + */ + template + static bool Run(const Parameter& param, const Factory& factory, + const Circuit& circuit, MeasurementFunc measure) { + return Run(param, factory, {circuit.gates.back().time}, circuit, measure); + } + + /** + * Runs the given circuit, measuring at user-specified times. + * @param param Options for gate fusion, parallelism and logging. + * @param factory Object to create simulators and state spaces. + * @param times_to_measure_at Time steps at which to perform measurements. + * @param circuit The circuit to be simulated. + * @param measure Function that performs measurements (in the sense of + * computing expectation values, etc). + * @return True if the simulation completed successfully; false otherwise. + */ + template + static bool Run(const Parameter& param, const Factory& factory, + const std::vector& times_to_measure_at, + const Circuit& circuit, MeasurementFunc measure) { + std::vector discarded_results; + + StateSpace state_space = factory.CreateStateSpace(); + Simulator simulator = factory.CreateSimulator(); + + auto state = state_space.Create(circuit.num_qubits); + if (state_space.IsNull(state)) { + IO::errorf("not enough memory: is the number of qubits too large?\n"); + return false; + } + + state_space.SetStateZero(state); + + return Run(param, circuit, state_space, simulator, state, + times_to_measure_at, measure, discarded_results); + } + + /** + * Runs the given circuit and make the final state available to the caller, + * recording the result of any intermediate measurements in the circuit. + * @param param Options for gate fusion, parallelism and logging. + * @param factory Object to create simulators and state spaces. + * @param circuit The circuit to be simulated. + * @param state As an input parameter, this should contain the initial state + * of the system. After a successful run, it will be populated with the + * final state of the system. + * @param measure_results As an input parameter, this should be empty. + * After a successful run, this will contain all measurements results from + * the run, ordered by time and qubit index. + * @return True if the simulation completed successfully; false otherwise. + */ + template + static bool Run(const Parameter& param, const Factory& factory, + const Circuit& circuit, State& state, + std::vector& measure_results) { + auto measure = [](unsigned, const StateSpace&, const State&) {}; + + StateSpace state_space = factory.CreateStateSpace(); + Simulator simulator = factory.CreateSimulator(); + + return Run(param, circuit, state_space, simulator, state, + {}, measure, measure_results); + } + + /** + * Runs the given circuit and make the final state available to the caller, + * discarding the result of any intermediate measurements in the circuit. + * @param param Options for gate fusion, parallelism and logging. + * @param factory Object to create simulators and state spaces. + * @param circuit The circuit to be simulated. + * @param state As an input parameter, this should contain the initial state + * of the system. After a successful run, it will be populated with the + * final state of the system. + * @return True if the simulation completed successfully; false otherwise. + */ + template + static bool Run(const Parameter& param, const Factory& factory, + const Circuit& circuit, State& state) { + auto measure = [](unsigned, const StateSpace&, const State&) {}; + + StateSpace state_space = factory.CreateStateSpace(); + Simulator simulator = factory.CreateSimulator(); + + std::vector discarded_results; + + return Run(param, circuit, state_space, simulator, state, + {}, measure, discarded_results); + } + + /** + * Runs the given circuit and make the final state available to the caller, + * recording the result of any intermediate measurements in the circuit. + * @param param Options for gate fusion, parallelism and logging. + * @param circuit The circuit to be simulated. + * @param state_space StateSpace object required to perform measurements. + * @param simulator Simulator object. Provides specific implementations for + * applying gates. + * @param state As an input parameter, this should contain the initial state + * of the system. After a successful run, it will be populated with the + * final state of the system. + * @param measure_results As an input parameter, this should be empty. + * After a successful run, this will contain all measurements results from + * the run, ordered by time and qubit index. + * @return True if the simulation completed successfully; false otherwise. + */ + template + static bool Run(const Parameter& param, const Circuit& circuit, + const StateSpace& state_space, const Simulator& simulator, + State& state, + std::vector& measure_results) { + auto measure = [](unsigned, const StateSpace&, const State&) {}; + + return Run(param, circuit, state_space, simulator, state, + {}, measure, measure_results); + } + + /** + * Runs the given circuit and make the final state available to the caller, + * discarding the result of any intermediate measurements in the circuit. + * @param param Options for gate fusion, parallelism and logging. + * @param circuit The circuit to be simulated. + * @param state_space StateSpace object required to perform measurements. + * @param simulator Simulator object. Provides specific implementations for + * applying gates. + * @param state As an input parameter, this should contain the initial state + * of the system. After a successful run, it will be populated with the + * final state of the system. + * @return True if the simulation completed successfully; false otherwise. + */ + template + static bool Run(const Parameter& param, const Circuit& circuit, + const StateSpace& state_space, const Simulator& simulator, + State& state) { + auto measure = [](unsigned, const StateSpace&, const State&) {}; + + std::vector discarded_results; + + return Run(param, circuit, state_space, simulator, state, + {}, measure, discarded_results); + } + + private: + template + static bool Run(const Parameter& param, const Circuit& circuit, + const StateSpace& state_space, const Simulator& simulator, + State& state, + const std::vector& times_to_measure_at, + MeasurementFunc measure, + std::vector& measure_results) { + double t0 = 0.0; + + RGen rgen(param.seed); + + custatevecExSVUpdaterDescriptor_t sv_updater = nullptr; + custatevecExDictionaryDescriptor_t sv_updater_config = nullptr; + + ErrorCheck(custatevecExConfigureSVUpdater( + &sv_updater_config, StateSpace::kStateDataType, nullptr, 0)); + + ErrorCheck( + custatevecExSVUpdaterCreate(&sv_updater, sv_updater_config, nullptr)); + ErrorCheck(custatevecExDictionaryDestroy(sv_updater_config)); + + if (param.verbosity > 0) { + t0 = GetTime(); + } + + unsigned cur_time_index = 0; + + using Gates = detail::Gates; + const auto& gates = Gates::get(circuit); + + for (std::size_t i = 0; i < gates.size(); ++i) { + const auto& gate = Gates::gate(gates[i]); + unsigned num_qubits = gate.qubits.size(); + unsigned num_cqubits = gate.controlled_by.size(); + + if (gate.kind == gate::kMeasurement) { + ErrorCheck( + custatevecExSVUpdaterApply(sv_updater, state.get(), nullptr, 0)); + ErrorCheck(custatevecExSVUpdaterClear(sv_updater)); + + auto measure_result = state_space.Measure(gate.qubits, rgen, state); + if (measure_result.valid) { + measure_results.push_back(std::move(measure_result)); + } else { + IO::errorf("measurement failed.\n"); + return false; + } + } else if (num_cqubits == 0) { + if (num_qubits == 0) { + ErrorCheck( + custatevecExSVUpdaterApply(sv_updater, state.get(), nullptr, 0)); + ErrorCheck(custatevecExSVUpdaterClear(sv_updater)); + + simulator.ApplyGate(gate.qubits, gate.matrix.data(), state); + } else { + ErrorCheck(custatevecExSVUpdaterEnqueueMatrix( + sv_updater, gate.matrix.data(), StateSpace::kMatrixDataType, + StateSpace::kExMatrixType, StateSpace::kMatrixLayout, 0, + reinterpret_cast(gate.qubits.data()), + num_qubits, nullptr, nullptr, 0)); + } + } else { + std::vector control_bits; + control_bits.reserve(num_cqubits); + + for (std::size_t i = 0; i < num_cqubits; ++i) { + control_bits.push_back((gate.cmask >> i) & 1); + } + + ErrorCheck(custatevecExSVUpdaterEnqueueMatrix( + sv_updater, gate.matrix.data(), StateSpace::kMatrixDataType, + StateSpace::kExMatrixType, StateSpace::kMatrixLayout, 0, + reinterpret_cast(gate.qubits.data()), num_qubits, + reinterpret_cast(gate.controlled_by.data()), + control_bits.data(), num_cqubits)); + } + + if (times_to_measure_at.size() > 0) { + unsigned t = times_to_measure_at[cur_time_index]; + + if (i == gates.size() - 1 || t < Gates::gate(gates[i + 1]).time) { + ErrorCheck( + custatevecExSVUpdaterApply(sv_updater, state.get(), nullptr, 0)); + ErrorCheck(custatevecExSVUpdaterClear(sv_updater)); + + // Call back to perform measurements. + measure(cur_time_index, state_space, state); + ++cur_time_index; + } + } + } + + ErrorCheck(custatevecExSVUpdaterApply(sv_updater, state.get(), nullptr, 0)); + + if (param.verbosity > 0) { + state_space.DeviceSync(); + double t1 = GetTime(); + IO::messagef("simu time is %g seconds.\n", t1 - t0); + } + + ErrorCheck(custatevecExSVUpdaterDestroy(sv_updater)); + + return true; + } +}; + +} // namespace qsim + +#endif // RUN_CUSTATEVECEX_H_ diff --git a/lib/simulator_custatevec.h b/lib/simulator_custatevec.h index b3f3cb8fa..a13c6e1af 100644 --- a/lib/simulator_custatevec.h +++ b/lib/simulator_custatevec.h @@ -82,8 +82,9 @@ class SimulatorCuStateVec final { ErrorCheck(custatevecApplyMatrix( custatevec_handle_, state.get(), kStateType, state.num_qubits(), matrix, kMatrixType, kMatrixLayout, 0, - (int32_t*) qs.data(), qs.size(), nullptr, nullptr, 0, - kComputeType, workspace_, workspace_size)); + reinterpret_cast(qs.data()), qs.size(), + nullptr, nullptr, 0, kComputeType, workspace_, + workspace_size)); } } @@ -118,9 +119,10 @@ class SimulatorCuStateVec final { ErrorCheck(custatevecApplyMatrix( custatevec_handle_, state.get(), kStateType, state.num_qubits(), matrix, kMatrixType, kMatrixLayout, 0, - (int32_t*) qs.data(), qs.size(), - (int32_t*) cqs.data(), control_bits.data(), cqs.size(), - kComputeType, workspace_, workspace_size)); + reinterpret_cast(qs.data()), qs.size(), + reinterpret_cast(cqs.data()), + control_bits.data(), cqs.size(), kComputeType, + workspace_, workspace_size)); } } @@ -144,9 +146,12 @@ class SimulatorCuStateVec final { ErrorCheck(custatevecComputeExpectation( custatevec_handle_, state.get(), kStateType, state.num_qubits(), &eval, kExpectType, nullptr, matrix, - kMatrixType, kMatrixLayout, (int32_t*) qs.data(), qs.size(), + kMatrixType, kMatrixLayout, + reinterpret_cast(qs.data()), qs.size(), kComputeType, workspace_, workspace_size)); + ErrorCheck(cudaDeviceSynchronize()); + return {cuCreal(eval), cuCimag(eval)}; } diff --git a/lib/simulator_custatevecex.h b/lib/simulator_custatevecex.h new file mode 100644 index 000000000..bcfb2c519 --- /dev/null +++ b/lib/simulator_custatevecex.h @@ -0,0 +1,243 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SIMULATOR_CUSTATEVECEX_H_ +#define SIMULATOR_CUSTATEVECEX_H_ + +#include +#include +#include + +#include +#include + +#include "io.h" +#include "statespace_custatevecex.h" +#include "util_custatevec.h" +#include "util_custatevecex.h" + +namespace qsim { + +/** + * Quantum circuit simulator using the NVIDIA cuStateVec library. + */ +template +class SimulatorCuStateVecEx final { + public: + using StateSpace = StateSpaceCuStateVecEx; + using State = typename StateSpace::State; + using fp_type = typename StateSpace::fp_type; + + static constexpr auto kStateDataType = StateSpace::kStateDataType; + static constexpr auto kMatrixDataType = StateSpace::kMatrixDataType; + static constexpr auto kExMatrixType = StateSpace::kExMatrixType; + static constexpr auto kMatrixLayout = StateSpace::kMatrixLayout; + static constexpr auto kExpectDataType = CUDA_C_64F; + static constexpr auto kComputeType = + StateSpace::is_float ? CUSTATEVEC_COMPUTE_32F : CUSTATEVEC_COMPUTE_64F; + + SimulatorCuStateVecEx() {} + + /** + * Applies a gate using the NVIDIA cuStateVec library. + * @param qs Indices of the qubits affected by this gate. + * @param matrix Matrix representation of the gate to be applied. + * @param state The state of the system, to be updated by this method. + */ + void ApplyGate(const std::vector& qs, + const fp_type* matrix, State& state) const { + if (qs.size() == 0) { + StateSpace::Multiply(matrix[0], matrix[1], state); + } else { + unsigned num_qubits = state.num_qubits(); + unsigned num_global_qubits = get_num_global_qubits(state.num_substates()); + unsigned num_local_qubits = num_qubits - num_global_qubits; + + if (qs.size() > num_local_qubits) { + IO::errorf("error: the number of gate qubits exceeds the number of " + "local qubits at %s %d.\n", __FILE__, __LINE__); + exit(1); + } + + ErrorCheck(custatevecExApplyMatrix( + state.get(), matrix, kMatrixDataType, kExMatrixType, kMatrixLayout, + 0, reinterpret_cast(qs.data()), qs.size(), + nullptr, nullptr, 0)); + } + } + + /** + * Applies a controlled gate using the NVIDIA cuStateVec library. + * @param qs Indices of the qubits affected by this gate. + * @param cqs Indices of control qubits. + * @param cmask Bit mask of control qubit values. + * @param matrix Matrix representation of the gate to be applied. + * @param state The state of the system, to be updated by this method. + */ + void ApplyControlledGate(const std::vector& qs, + const std::vector& cqs, uint64_t cmask, + const fp_type* matrix, State& state) const { + if (qs.size() == 0) { + IO::errorf( + "error: controlled global phase gate is not implemented %s %d.\n", + __FILE__, __LINE__); + exit(1); + } else { + unsigned num_qubits = state.num_qubits(); + unsigned num_global_qubits = get_num_global_qubits(state.num_substates()); + unsigned num_local_qubits = num_qubits - num_global_qubits; + + if (qs.size() > num_local_qubits) { + IO::errorf("error: the number of gate qubits exceeds the number of " + "local qubits at %s %d.\n", __FILE__, __LINE__); + exit(1); + } + + std::vector control_bits; + control_bits.reserve(cqs.size()); + + for (std::size_t i = 0; i < cqs.size(); ++i) { + control_bits.push_back((cmask >> i) & 1); + } + + ErrorCheck(custatevecExApplyMatrix( + state.get(), matrix, kMatrixDataType, kExMatrixType, kMatrixLayout, + 0, reinterpret_cast(qs.data()), qs.size(), + reinterpret_cast(cqs.data()), control_bits.data(), + cqs.size())); + } + } + + /** + * Computes the expectation value of an operator using the NVIDIA cuStateVec + * library. + * @param qs Indices of the qubits the operator acts on. + * @param matrix The operator matrix. + * @param state The state of the system. + * @return The computed expectation value. + */ + std::complex ExpectationValue(const std::vector& qs, + const fp_type* matrix, + const State& state) const { + unsigned num_qubits = state.num_qubits(); + unsigned num_global_qubits = get_num_global_qubits(state.num_substates()); + unsigned num_local_qubits = num_qubits - num_global_qubits; + + if (qs.size() > num_local_qubits) { + IO::errorf("error: the number of gate qubits exceeds the number of " + "local qubits at %s %d.\n", __FILE__, __LINE__); + exit(1); + } + + const auto& wire_ordering = state.get_wire_ordering(); + + // Wire ordering can be arbitrary. The following lines make qs consistent + // with wire ordering and permute bits if necessary. + + std::vector perm; + perm.reserve(num_qubits); + + for (unsigned i = 0; i < num_qubits; ++i) { + perm.push_back(i); + } + + unsigned l = 0; + std::vector qs2(qs.size()); + + for (unsigned k = 0; k < qs.size(); ++k) { + for (unsigned i = 0; i < num_qubits; ++i) { + if (qs[k] == (unsigned) wire_ordering[i]) { + qs2[k] = i; + break; + } + } + } + + for (unsigned k = 0; k < qs2.size(); ++k) { + if (qs2[k] >= num_local_qubits) { + unsigned j = 0; + while (j < qs2.size()) { + for (j = 0; j < qs2.size(); ++j) { + if (qs2[j] == l) { + ++l; + + if (l == num_local_qubits) { + // We should not get here. + IO::errorf("error: internal error at %s %d.\n", + __FILE__, __LINE__); + exit(1); + } + + break; + } + } + } + + std::swap(perm[qs2[k]], perm[l]); + qs2[k] = l++; + } + } + + if (l > 0) { + ErrorCheck(custatevecExStateVectorPermuteIndexBits( + state.get(), reinterpret_cast(perm.data()), + num_qubits, CUSTATEVEC_EX_PERMUTATION_SCATTER)); + } + + auto f = [&matrix, &state, &num_local_qubits, &qs2]( + unsigned i, const auto& r) { + void* workspace; + size_t workspace_size; + + ErrorCheck(cudaSetDevice(r.device_id)); + + ErrorCheck(custatevecComputeExpectationGetWorkspaceSize( + r.custatevec_handle, kStateDataType, num_local_qubits, matrix, + kMatrixDataType, kMatrixLayout, qs2.size(), kComputeType, + &workspace_size)); + + // TODO: reuse allocated memory. + ErrorCheck(cudaMalloc(&workspace, workspace_size)); + + cuDoubleComplex eval; + + ErrorCheck(custatevecComputeExpectation( + r.custatevec_handle, r.device_ptr, kStateDataType, num_local_qubits, + &eval, kExpectDataType, nullptr, matrix, kMatrixDataType, + kMatrixLayout, reinterpret_cast(qs2.data()), + qs2.size(), kComputeType, workspace, workspace_size)); + + // TODO: make it faster. + ErrorCheck(custatevecExStateVectorSynchronize(state.get())); + ErrorCheck(cudaFree(workspace)); + + return std::complex{cuCreal(eval), cuCimag(eval)}; + }; + + return state.reduce(f); + } + + /** + * @return The size of SIMD register if applicable. + */ + static unsigned SIMDRegisterSize() { + return 32; + } + + private: +}; + +} // namespace qsim + +#endif // SIMULATOR_CUSTATEVECEX_H_ diff --git a/lib/statespace_custatevec.h b/lib/statespace_custatevec.h index f2f5de107..6bd0a37d2 100644 --- a/lib/statespace_custatevec.h +++ b/lib/statespace_custatevec.h @@ -306,8 +306,10 @@ class StateSpaceCuStateVec : ErrorCheck(custatevecBatchMeasure( custatevec_handle_, state.get(), kStateType, - state.num_qubits(), (int*) result.bitstring.data(), - (int*) qubits.data(), qubits.size(), r, collapse)); + state.num_qubits(), + reinterpret_cast(result.bitstring.data()), + reinterpret_cast(qubits.data()), qubits.size(), + r, collapse)); for (std::size_t i = 0; i < result.bitstring.size(); ++i) { result.bits |= result.bitstring[i] << qubits[i]; diff --git a/lib/statespace_custatevecex.h b/lib/statespace_custatevecex.h new file mode 100644 index 000000000..ce5cb0c3e --- /dev/null +++ b/lib/statespace_custatevecex.h @@ -0,0 +1,431 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef STATESPACE_CUSTATEVECEX_H_ +#define STATESPACE_CUSTATEVECEX_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "multiprocess_custatevecex.h" +#include "statespace.h" +#include "util_custatevec.h" +#include "util_custatevecex.h" +#include "vectorspace_custatevecex.h" + +namespace qsim { + +namespace detail { + +template +__global__ void SetStateKernel(FP v, uint64_t size, void* state) { + uint64_t k = uint64_t{blockIdx.x} * blockDim.x + threadIdx.x; + + if (k < size) { + ((FP*) state)[2 * k] = v; + ((FP*) state)[2 * k + 1] = 0; + } +} + +} // namespace detail + +/** + * Object containing context and routines for cuStateVec state-vector + * manipulations. It is not recommended to use `GetAmpl` and `SetAmpl`. + */ +template +class StateSpaceCuStateVecEx : + public StateSpace, VectorSpaceCuStateVecEx, FP> { + private: + using Base = + StateSpace, VectorSpaceCuStateVecEx, FP>; + + public: + using State = typename Base::State; + using fp_type = typename Base::fp_type; + using Parameter = typename Base::Parameter; + + static constexpr auto kStateDataType = Base::kStateDataType; + static constexpr auto kMatrixDataType = kStateDataType; + static constexpr auto kExMatrixType = CUSTATEVEC_EX_MATRIX_DENSE; + static constexpr auto kMatrixLayout = CUSTATEVEC_MATRIX_LAYOUT_ROW; + + explicit StateSpaceCuStateVecEx(const MultiProcessCuStateVecEx& mp, + Parameter param = Parameter{}) + : Base(param, mp) {} + + static uint64_t MinSize(unsigned num_qubits) { + return 2 * (uint64_t{1} << num_qubits); + }; + + void InternalToNormalOrder(State& state) const { + state.to_normal_order(); + } + + void NormalToInternalOrder(State& state) const { + } + + void SetAllZeros(State& state) const { + uint64_t size = (uint64_t{1} << state.num_qubits()) / state.num_substates(); + + auto f = [&size](unsigned i, const auto& r) { + unsigned threads = size < 256 ? size : 256; + unsigned blocks = size / threads; + fp_type zero = 0.0; + detail::SetStateKernel<<>>(zero, size, r.device_ptr); + }; + + state.assign(f); + } + + // Uniform superposition. + void SetStateUniform(State& state) const { + uint64_t size = uint64_t{1} << state.num_qubits(); + fp_type v = double{1} / std::sqrt(size); + size /= state.num_substates(); + + auto f = [&size, &v](unsigned i, const auto& r) { + unsigned threads = size < 256 ? size : 256; + unsigned blocks = size / threads; + detail::SetStateKernel<<>>(v, size, r.device_ptr); + }; + + state.assign(f); + } + + // |0> state. + void SetStateZero(State& state) const { + ErrorCheck((custatevecExStateVectorSetZeroState(state.get()))); + } + + // It is not recommended to use this function. + std::complex GetAmpl(const State& state, uint64_t i) const { + fp_type buf[2] = {0, 0}; + + uint64_t k = 0; + const auto& wire_ordering = state.get_wire_ordering(); + for (unsigned j = 0; j < state.num_qubits(); ++j) { + k |= ((i >> wire_ordering[j]) & 1) << j; + } + + uint64_t size = (uint64_t{1} << state.num_qubits()) / state.num_substates(); + unsigned required_rank = k / size; + + if (state.distr_type() != Base::kMultiProcess + || Base::mp.rank() == required_rank) { + ErrorCheck(custatevecExStateVectorGetState( + state.get(), buf, kStateDataType, k, k + 1, 1)); + } + + ErrorCheck(custatevecExStateVectorSynchronize(state.get())); + + if (state.distr_type() == Base::kMultiProcess) { + auto cuda_type = GetCudaType>(); + auto comm = Base::mp.communicator(); + ErrorCheck(comm->intf->bcast(comm, buf, 1, cuda_type, required_rank)); + } + + return {buf[0], buf[1]}; + } + + // It is not recommended to use this function. + void SetAmpl( + State& state, uint64_t i, const std::complex& ampl) const { + fp_type buf[2] = {std::real(ampl), std::imag(ampl)}; + + uint64_t k = 0; + const auto& wire_ordering = state.get_wire_ordering(); + for (unsigned j = 0; j < state.num_qubits(); ++j) { + k |= ((i >> wire_ordering[j]) & 1) << j; + } + + uint64_t size = (uint64_t{1} << state.num_qubits()) / state.num_substates(); + unsigned required_rank = k / size; + + if (state.distr_type() != Base::kMultiProcess + || Base::mp.rank() == required_rank) { + ErrorCheck(custatevecExStateVectorSetState( + state.get(), buf, kStateDataType, k, k + 1, 1)); + } + + ErrorCheck(custatevecExStateVectorSynchronize(state.get())); + } + + // It is not recommended to use this function. + void SetAmpl(State& state, uint64_t i, fp_type re, fp_type im) const { + fp_type buf[2] = {re, im}; + + uint64_t k = 0; + const auto& wire_ordering = state.get_wire_ordering(); + for (unsigned j = 0; j < state.num_qubits(); ++j) { + k |= ((i >> wire_ordering[j]) & 1) << j; + } + + uint64_t size = (uint64_t{1} << state.num_qubits()) / state.num_substates(); + unsigned required_rank = k / size; + + if (state.distr_type() != Base::kMultiProcess + || Base::mp.rank() == required_rank) { + ErrorCheck(custatevecExStateVectorSetState( + state.get(), buf, kStateDataType, k, k + 1, 1)); + } + + ErrorCheck(custatevecExStateVectorSynchronize(state.get())); + } + + // Sets state[i] = complex(re, im) where (i & mask) == bits. + // if `exclude` is true then the criteria becomes (i & mask) != bits. + static void BulkSetAmpl(State& state, uint64_t mask, uint64_t bits, + const std::complex& val, + bool exclude = false) { + // Not implemented. + } + + // Sets state[i] = complex(re, im) where (i & mask) == bits. + // if `exclude` is true then the criteria becomes (i & mask) != bits. + static void BulkSetAmpl(State& state, uint64_t mask, uint64_t bits, fp_type re, + fp_type im, bool exclude = false) { + // Not implemented. + } + + // Does the equivalent of dest += src elementwise. + bool Add(const State& src, State& dest) const { + if (src.num_qubits() != dest.num_qubits()) { + return false; + } + + uint64_t size = (uint64_t{1} << src.num_qubits()) / src.num_substates(); + + auto f = [&size](unsigned i, const auto& rd, const auto& rs) { + cublasHandle_t cublas_handle; + ErrorCheck(cublasCreate(&cublas_handle)); + ErrorCheck(cublasSetStream(cublas_handle, rd.stream)); + + if (Base::is_float) { + cuComplex a = {1.0, 0.0}; + auto p1 = (const cuComplex*) rs.device_ptr; + auto p2 = (cuComplex*) rd.device_ptr; + ErrorCheck(cublasCaxpy(cublas_handle, size, &a, p1, 1, p2, 1)); + } else { + cuDoubleComplex a = {1.0, 0.0}; + auto p1 = (const cuDoubleComplex*) rs.device_ptr; + auto p2 = (cuDoubleComplex*) rd.device_ptr; + ErrorCheck(cublasZaxpy(cublas_handle, size, &a, p1, 1, p2, 1)); + } + + ErrorCheck(cudaStreamSynchronize(rd.stream)); + ErrorCheck(cublasDestroy(cublas_handle)); + }; + + dest.assign(src, f); + + return true; + } + + // Does the equivalent of state *= a elementwise. + static void Multiply(fp_type a, State& state) { + uint64_t size = (uint64_t{1} << state.num_qubits()) / state.num_substates(); + + auto f = [&a, &size](unsigned i, const auto& r) { + cublasHandle_t cublas_handle; + ErrorCheck(cublasCreate(&cublas_handle)); + ErrorCheck(cublasSetStream(cublas_handle, r.stream)); + + if (Base::is_float) { + float a1 = a; + auto p = (cuComplex*) r.device_ptr; + ErrorCheck(cublasCsscal(cublas_handle, size, &a1, p, 1)); + } else { + double a1 = a; + auto p = (cuDoubleComplex*) r.device_ptr; + ErrorCheck(cublasZdscal(cublas_handle, size, &a1, p, 1)); + } + + ErrorCheck(cudaStreamSynchronize(r.stream)); + ErrorCheck(cublasDestroy(cublas_handle)); + }; + + return state.assign(f); + } + + // Does the equivalent of state *= (re + i im) elementwise. + static void Multiply(fp_type re, fp_type im, State& state) { + uint64_t size = (uint64_t{1} << state.num_qubits()) / state.num_substates(); + + auto f = [&re, &im, &size](unsigned i, const auto& r) { + cublasHandle_t cublas_handle; + ErrorCheck(cublasCreate(&cublas_handle)); + ErrorCheck(cublasSetStream(cublas_handle, r.stream)); + + if (Base::is_float) { + cuComplex a = {float(re), float(im)}; + auto p = (cuComplex*) r.device_ptr; + ErrorCheck(cublasCscal(cublas_handle, size, &a, p, 1)); + } else { + cuDoubleComplex a = {re, im}; + auto p = (cuDoubleComplex*) r.device_ptr; + ErrorCheck(cublasZscal(cublas_handle, size, &a, p, 1)); + } + + ErrorCheck(cudaStreamSynchronize(r.stream)); + ErrorCheck(cublasDestroy(cublas_handle)); + }; + + return state.assign(f); + } + + static std::complex InnerProduct( + const State& state1, const State& state2) { + if (state1.num_qubits() != state2.num_qubits()) { + return std::nan(""); + } + + uint64_t size = + (uint64_t{1} << state1.num_qubits()) / state1.num_substates(); + + auto f = [&size](unsigned i, const auto& r1, const auto& r2) { + cublasHandle_t cublas_handle; + ErrorCheck(cublasCreate(&cublas_handle)); + ErrorCheck(cublasSetStream(cublas_handle, r1.stream)); + + if (Base::is_float) { + cuComplex result; + auto p1 = (const cuComplex*) r1.device_ptr; + auto p2 = (const cuComplex*) r2.device_ptr; + ErrorCheck(cublasCdotc(cublas_handle, size, p1, 1, p2, 1, &result)); + return std::complex{cuCrealf(result), cuCimagf(result)}; + } else { + cuDoubleComplex result; + auto p1 = (const cuDoubleComplex*) r1.device_ptr; + auto p2 = (const cuDoubleComplex*) r2.device_ptr; + ErrorCheck(cublasZdotc(cublas_handle, size, p1, 1, p2, 1, &result)); + return std::complex{cuCreal(result), cuCimag(result)}; + } + + ErrorCheck(cudaStreamSynchronize(r1.stream)); + ErrorCheck(cublasDestroy(cublas_handle)); + }; + + return state1.reduce(state2, f); + } + + double RealInnerProduct(const State& state1, const State& state2) const { + return std::real(InnerProduct(state1, state2)); + } + + double Norm(const State& state) const { + double norm; + + ErrorCheck(custatevecExAbs2SumArray( + state.get(), &norm, nullptr, 0, nullptr, nullptr, 0)); + ErrorCheck(custatevecExStateVectorSynchronize(state.get())); + + return norm; + } + + template + std::vector Sample( + const State& state, uint64_t num_samples, unsigned seed) const { + std::vector bitstrings; + + if (num_samples > 0) { + auto rs = GenerateRandomValues(num_samples, seed, 1.0); + + std::vector bitstrings0(num_samples); + + std::vector wires; + wires.reserve(state.num_qubits()); + for (unsigned i = 0; i < state.num_qubits(); ++i) { + wires[i] = i; + } + + ErrorCheck(custatevecExSample( + state.get(), bitstrings0.data(), wires.data(), state.num_qubits(), + rs.data(), num_samples, CUSTATEVEC_SAMPLER_OUTPUT_RANDNUM_ORDER, + nullptr)); + ErrorCheck(custatevecExStateVectorSynchronize(state.get())); + + bitstrings.reserve(num_samples); + for (unsigned i = 0; i < num_samples; ++i) { + bitstrings.push_back(bitstrings0[i]); + } + } + + return bitstrings; + } + + using MeasurementResult = typename Base::MeasurementResult; + + template + MeasurementResult Measure(const std::vector& qubits, + RGen& rgen, State& state, + bool no_collapse = false) const { + auto r = RandomValue(rgen, 1.0); + + MeasurementResult result; + + result.valid = true; + result.mask = 0; + result.bits = 0; + result.bitstring.resize(qubits.size(), 0); + + for (auto q : qubits) { + if (q >= state.num_qubits()) { + result.valid = false; + return result; + } + + result.mask |= uint64_t{1} << q; + } + + auto collapse = no_collapse ? + CUSTATEVEC_COLLAPSE_NONE : CUSTATEVEC_COLLAPSE_NORMALIZE_AND_ZERO; + + custatevecIndex_t bits; + + ErrorCheck(custatevecExMeasure( + state.get(), &bits, reinterpret_cast(qubits.data()), + qubits.size(), r, collapse, nullptr)); + ErrorCheck(custatevecExStateVectorSynchronize(state.get())); + + for (std::size_t i = 0; i < qubits.size(); ++i) { + uint64_t bit = (bits >> i) & 1; + result.bitstring[i] = bit; + result.bits |= bit << qubits[i]; + } + + return result; + } + + template + MeasurementResult VirtualMeasure(const std::vector& qubits, + RGen& rgen, const State& state) const { + return Measure(qubits, rgen, const_cast(state), true); + } + + void Collapse(const MeasurementResult& mr, State& state) const { + // Not implemented. + } +}; + +} // namespace qsim + +#endif // STATESPACE_CUSTATEVECEX_H_ diff --git a/lib/util_cuda.h b/lib/util_cuda.h index 5d8cb5df3..b34292753 100644 --- a/lib/util_cuda.h +++ b/lib/util_cuda.h @@ -22,6 +22,7 @@ #endif #include +#include #include "io.h" @@ -31,11 +32,27 @@ namespace qsim { inline void ErrorAssert(cudaError_t code, const char* file, unsigned line) { if (code != cudaSuccess) { - IO::errorf("CUDA error: %s %s %d\n", cudaGetErrorString(code), file, line); + IO::errorf( + "CUDA error: %s at %s %d\n", cudaGetErrorString(code), file, line); exit(code); } } +template +inline auto GetCudaType() { + if (std::is_same_v) { + return CUDA_R_32F; + } else if (std::is_same_v) { + return CUDA_R_64F; + } else if (std::is_same_v>) { + return CUDA_C_32F; + } else if (std::is_same_v>) { + return CUDA_C_64F; + } + + return CUDA_C_64F; +} + template struct Complex { __host__ __device__ __forceinline__ Complex() {} diff --git a/lib/util_custatevec.h b/lib/util_custatevec.h index 36f29efab..d37858b40 100644 --- a/lib/util_custatevec.h +++ b/lib/util_custatevec.h @@ -25,7 +25,7 @@ namespace qsim { inline void ErrorAssert(cublasStatus_t code, const char* file, unsigned line) { if (code != CUBLAS_STATUS_SUCCESS) { - IO::errorf("cuBLAS error %i: %s %d\n", code, file, line); + IO::errorf("cuBLAS error %d at %s %d\n", code, file, line); exit(code); } } @@ -33,7 +33,7 @@ inline void ErrorAssert(cublasStatus_t code, const char* file, unsigned line) { inline void ErrorAssert( custatevecStatus_t code, const char* file, unsigned line) { if (code != CUSTATEVEC_STATUS_SUCCESS) { - IO::errorf("custatevec error: %s %s %d\n", + IO::errorf("cuStateVec error: %s at %s %d\n", custatevecGetErrorString(code), file, line); exit(code); } diff --git a/lib/util_custatevecex.h b/lib/util_custatevecex.h new file mode 100644 index 000000000..ab57d7e85 --- /dev/null +++ b/lib/util_custatevecex.h @@ -0,0 +1,46 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef UTIL_CUSTATEVECEX_H_ +#define UTIL_CUSTATEVECEX_H_ + +#include +#include + +#include "io.h" +#include "util_cuda.h" + +namespace qsim { + +inline void ErrorAssert( + custatevecExCommunicatorStatus_t code, const char* file, unsigned line) { + if (code != CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS) { + IO::errorf( + "cuStateVecEx communicator error %d at %s %d\n", code, file, line); + exit(code); + } +} + +inline unsigned get_num_global_qubits(unsigned num_devices) { + unsigned num_global_qubits = 0; + while ((num_devices >>= 1) > 0) { + ++num_global_qubits; + } + + return num_global_qubits; +} + +} // namespace qsim + +#endif // UTIL_CUSTATEVECEX_H_ diff --git a/lib/vectorspace_custatevecex.h b/lib/vectorspace_custatevecex.h new file mode 100644 index 000000000..3fa26a931 --- /dev/null +++ b/lib/vectorspace_custatevecex.h @@ -0,0 +1,610 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VECTORSPACE_CUSTATEVECEX_H_ +#define VECTORSPACE_CUSTATEVECEX_H_ + +#include +#include + +#include + +#include +#include +#include +#include + +#include "io.h" +#include "multiprocess_custatevecex.h" +#include "util_cuda.h" +#include "util_custatevec.h" +#include "util_custatevecex.h" + +namespace qsim { + +namespace detail { + +inline void free(void* ptr) {} + +} // namespace detail + +// Routines for vector manipulations. +template +class VectorSpaceCuStateVecEx { + public: + using fp_type = FP; + + static constexpr auto is_float = std::is_same::value; + static constexpr auto kStateDataType = is_float ? CUDA_C_32F : CUDA_C_64F; + + enum DistributionType { + kNoDistr, + kSingleDevice, + kMultiDevice, + kMultiProcess, + }; + + enum DeviceNetworkType { + kSwitch = 0, + kFullMesh = 1, + }; + + struct Parameter { + unsigned num_devices = 0; + DeviceNetworkType device_network_type = kSwitch; + unsigned verbosity = 0; + }; + + class Vector { + public: + struct CuStateVecResources { + int32_t device_id = -1; + void* device_ptr = nullptr; + cudaStream_t stream = nullptr; + custatevecHandle_t custatevec_handle = nullptr; + }; + + Vector(const Vector&) = delete; + Vector& operator=(const Vector&) = delete; + + Vector() : mp_(nullptr), ptr_(nullptr), + num_qubits_(0), num_substates_(0), distr_type_(kNoDistr) {} + + Vector(const MultiProcessCuStateVecEx* mp, + custatevecExStateVectorDescriptor_t ptr, unsigned num_qubits, + unsigned num_substates, DistributionType distr_type) + : mp_(mp), ptr_(ptr), wire_ordering_(num_qubits), + num_qubits_(num_qubits), num_substates_(num_substates), + distr_type_(distr_type) {} + + Vector(Vector&& r) : mp_(r.mp_), ptr_(r.ptr_), + wire_ordering_(std::move(r.wire_ordering_)), + num_qubits_(r.num_qubits_), num_substates_(r.num_substates_), + distr_type_(r.distr_type_) { + r.mp_ = nullptr; + r.ptr_ = nullptr; + r.num_qubits_ = 0; + r.num_substates_ = 0; + r.distr_type_ = kNoDistr; + } + + ~Vector() { + if (ptr_ != nullptr) { + ErrorCheck(custatevecExStateVectorDestroy(ptr_)); + } + } + + Vector& operator=(Vector&& r) { + if (this != &r) { + mp_ = r.mp_; + ptr_ = r.ptr_; + wire_ordering_ = std::move(r.wire_ordering_); + num_qubits_ = r.num_qubits_; + num_substates_ = r.num_substates_; + distr_type_ = r.distr_type_; + + r.mp_ = nullptr; + r.ptr_ = nullptr; + r.num_qubits_ = 0; + r.num_substates_ = 0; + r.distr_type_ = kNoDistr; + } + + return *this; + } + + auto get() { + return ptr_; + } + + const auto get() const { + return ptr_; + } + + custatevecExStateVectorDescriptor_t release() { + auto ptr = ptr_; + + mp_ = nullptr; + ptr_ = nullptr; + num_qubits_ = 0; + num_substates_ = 0; + distr_type_ = kNoDistr; + + return ptr; + } + + unsigned num_qubits() const { + return num_qubits_; + } + + unsigned num_substates() const { + return num_substates_; + } + + DistributionType distr_type() const { + return distr_type_; + } + + static constexpr bool requires_copy_to_host() { + return true; + } + + const auto& get_wire_ordering() const { + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_WIRE_ORDERING, + wire_ordering_.data(), sizeof(int32_t) * num_qubits_)); + + return wire_ordering_; + } + + void to_normal_order() const { + const auto& wire_ordering = get_wire_ordering(); + + ErrorCheck(custatevecExStateVectorPermuteIndexBits( + ptr_, wire_ordering.data(), num_qubits_, + CUSTATEVEC_EX_PERMUTATION_SCATTER)); + } + + CuStateVecResources get_resources(unsigned substate_index) const { + CuStateVecResources r; + + ErrorCheck(custatevecExStateVectorGetResourcesFromDeviceSubSV( + ptr_, substate_index, &r.device_id, &r.device_ptr, &r.stream, + &r.custatevec_handle)); + + return r; + } + + template + void assign(Callback&& callback) const { + if (distr_type_ == kMultiProcess) { + unsigned num_devices = 1; + std::vector substate_indices(num_devices); + + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_devices * sizeof(int32_t))); + + unsigned k = substate_indices[0]; + auto res = get_resources(k); + + ErrorCheck(cudaSetDevice(res.device_id)); + + callback(k, res); + } else { + if (num_substates_ == 1) { + callback(0, get_resources(0)); + } else { + std::vector substate_indices(num_substates_); + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_substates_ * sizeof(int32_t))); + + for (unsigned i = 0; i < num_substates_; ++i) { + unsigned k = substate_indices[i]; + auto res = get_resources(k); + + ErrorCheck(cudaSetDevice(res.device_id)); + + callback(k, res); + } + } + } + } + + template + auto reduce(Callback&& callback) const { + using ResultType = std::invoke_result_t; + + if (distr_type_ == kMultiProcess) { + unsigned num_devices = 1; + std::vector substate_indices(num_devices); + + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_devices * sizeof(int32_t))); + + unsigned k = substate_indices[0]; + auto res = get_resources(k); + + ErrorCheck(cudaSetDevice(res.device_id)); + + ResultType r; + ResultType local_r = callback(k, res); + + auto cuda_type = GetCudaType(); + auto comm = mp_->communicator(); + ErrorCheck(comm->intf->allreduce(comm, &local_r, &r, 1, cuda_type)); + + return r; + } else { + if (num_substates_ == 1) { + return callback(0, get_resources(0)); + } else { + std::vector substate_indices(num_substates_); + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_substates_ * sizeof(int32_t))); + + ResultType r = 0; + + for (unsigned i = 0; i < num_substates_; ++i) { + unsigned k = substate_indices[i]; + auto res = get_resources(k); + + ErrorCheck(cudaSetDevice(res.device_id)); + + r += callback(k, res); + } + + return r; + } + } + } + + template + void assign(const Vector& vec, Callback&& callback) const { + if (distr_type_ == kMultiProcess) { + unsigned num_devices = 1; + std::vector substate_indices(num_devices); + + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_devices * sizeof(int32_t))); + + unsigned k = substate_indices[0]; + auto res1 = get_resources(k); + auto res2 = vec.get_resources(k); + + ErrorCheck(cudaSetDevice(res1.device_id)); + + callback(k, res1, res2); + } else { + if (num_substates_ == 1) { + callback(0, get_resources(0), vec.get_resources(0)); + } else { + std::vector substate_indices(num_substates_); + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_substates_ * sizeof(int32_t))); + + for (unsigned i = 0; i < num_substates_; ++i) { + unsigned k = substate_indices[i]; + auto res1 = get_resources(k); + auto res2 = vec.get_resources(k); + + ErrorCheck(cudaSetDevice(res1.device_id)); + + callback(k, res1, res2); + } + } + } + } + + template + auto reduce(const Vector& vec, Callback&& callback) const { + using ResultType = std::invoke_result_t; + + if (distr_type_ == kMultiProcess) { + unsigned num_devices = 1; + std::vector substate_indices(num_devices); + + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_devices * sizeof(int32_t))); + + unsigned k = substate_indices[0]; + auto res1 = get_resources(k); + auto res2 = vec.get_resources(k); + + ErrorCheck(cudaSetDevice(res2.device_id)); + ErrorCheck(cudaStreamSynchronize(res2.stream)); + + ResultType r; + ResultType local_r = callback(k, res1, res2); + + auto cuda_type = GetCudaType(); + auto comm = mp_->communicator(); + ErrorCheck(comm->intf->allreduce(comm, &local_r, &r, 1, cuda_type)); + + return r; + } else { + if (num_substates_ == 1) { + return callback(0, get_resources(0), vec.get_resources(0)); + } else { + std::vector substate_indices(num_substates_); + ErrorCheck(custatevecExStateVectorGetProperty( + ptr_, CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, + substate_indices.data(), num_substates_ * sizeof(int32_t))); + + ResultType r = 0; + + for (unsigned i = 0; i < num_substates_; ++i) { + unsigned k = substate_indices[i]; + auto res1 = get_resources(k); + auto res2 = vec.get_resources(k); + + ErrorCheck(cudaSetDevice(res2.device_id)); + ErrorCheck(cudaStreamSynchronize(res2.stream)); + + r += callback(k, res1, res2); + } + + return r; + } + } + } + + private: + const MultiProcessCuStateVecEx* mp_; + custatevecExStateVectorDescriptor_t ptr_; + mutable std::vector wire_ordering_; + unsigned num_qubits_; + unsigned num_substates_; + DistributionType distr_type_; + }; + + VectorSpaceCuStateVecEx(const Parameter& param, + const MultiProcessCuStateVecEx& mp) + : param(param), mp(mp) {} + + Vector Create(unsigned num_qubits) const { + custatevecExStateVectorDescriptor_t state_vec; + custatevecExDictionaryDescriptor_t sv_config + = mp.create_sv_config(num_qubits, kStateDataType); + + unsigned num_substates = 1; + DistributionType distr_type = kNoDistr; + + if (sv_config != nullptr) { + ErrorCheck(custatevecExStateVectorCreateMultiProcess( + &state_vec, sv_config, nullptr, mp.communicator(), nullptr)); + + num_substates = mp.num_processes(); + distr_type = kMultiProcess; + + if (param.verbosity > 2) { + unsigned num_global_qubits = get_num_global_qubits(num_substates); + IO::messagef("multi-process mode: %u %u.\n", + num_qubits, num_global_qubits); + } + } else { + num_substates = param.num_devices; + + if (num_qubits < 3) { + num_substates = 1; + } else if (num_substates == 0) { + int count = 0; + ErrorCheck(cudaGetDeviceCount(&count)); + num_substates = count; + } + + if (num_substates == 1) { + ErrorCheck(custatevecExConfigureStateVectorSingleDevice( + &sv_config, kStateDataType, num_qubits, num_qubits, -1, 0)); + + distr_type = kSingleDevice; + + if (param.verbosity > 2) { + IO::messagef("single device mode.\n"); + } + } else { + unsigned num_global_qubits = get_num_global_qubits(num_substates); + + while (num_global_qubits + 2 > num_qubits && num_substates > 1) { + num_substates /= 2; + --num_global_qubits; + } + + if (num_substates == 1) { + ErrorCheck(custatevecExConfigureStateVectorSingleDevice( + &sv_config, kStateDataType, num_qubits, num_qubits, -1, 0)); + + distr_type = kSingleDevice; + + if (param.verbosity > 2) { + IO::messagef("single-device mode (too few qubits).\n"); + } + } else { + std::vector device_ids(num_substates); + for (unsigned i = 0; i < num_substates; ++i) { + device_ids[i] = i; + } + + unsigned num_local_qubits = num_qubits - num_global_qubits; + + auto device_network_type = + get_device_network_type(param.device_network_type); + + ErrorCheck(custatevecExConfigureStateVectorMultiDevice( + &sv_config, kStateDataType, num_qubits, num_local_qubits, + device_ids.data(), num_substates, device_network_type, 0)); + + distr_type = kMultiDevice; + + if (param.verbosity > 2) { + IO::messagef("multi-device mode: %u %u.\n", + num_qubits, num_global_qubits); + } + } + } + + ErrorCheck(custatevecExStateVectorCreateSingleProcess( + &state_vec, sv_config, nullptr, 0, nullptr)); + } + + ErrorCheck(custatevecExDictionaryDestroy(sv_config)); + + return Vector{&mp, state_vec, num_qubits, num_substates, distr_type}; + } + + static Vector Null() { + return Vector{nullptr, nullptr, 0, 0, kNoDistr}; + } + + static bool IsNull(const Vector& vector) { + return vector.get() == nullptr; + } + + bool Copy(const Vector& src, Vector& dest) const { + if (src.num_qubits() != dest.num_qubits()) { + return false; + } + + uint64_t size = (uint64_t{1} << src.num_qubits()) / src.num_substates(); + + auto f = [&size](unsigned i, const auto& rd, const auto& rs) { + ErrorCheck(cudaMemcpy( + rd.device_ptr, rs.device_ptr, 2 * sizeof(fp_type) * size, + cudaMemcpyDeviceToDevice)); + }; + + dest.assign(src, f); + + return true; + } + + // It is the client's responsibility to make sure that dest has at least + // 2^src.num_qubits() elements. + bool Copy(const Vector& src, fp_type* dest) const { + if (src.distr_type() == kMultiProcess) { + uint64_t size = (uint64_t{1} << src.num_qubits()) / src.num_substates(); + uint64_t offset = size * mp.rank(); + + ErrorCheck(custatevecExStateVectorGetState( + src.get(), dest + 2 * offset, kStateDataType, + offset, offset + size, 1)); + ErrorCheck(custatevecExStateVectorSynchronize(src.get())); + + auto cuda_type = GetCudaType>(); + auto comm = mp.communicator(); + ErrorCheck(comm->intf->allgather( + comm, dest + 2 * offset, dest, size, cuda_type)); + } else { + uint64_t size = uint64_t{1} << src.num_qubits(); + ErrorCheck(custatevecExStateVectorGetState( + src.get(), dest, kStateDataType, 0, size, 1)); + ErrorCheck(custatevecExStateVectorSynchronize(src.get())); + } + + return true; + } + + // It is the client's responsibility to make sure that src has at least + // 2^dest.num_qubits() elements. + bool Copy(const fp_type* src, Vector& dest) const { + if (dest.distr_type() == kMultiProcess) { + uint64_t size = (uint64_t{1} << dest.num_qubits()) / dest.num_substates(); + uint64_t offset = size * mp.rank(); + + ErrorCheck(custatevecExStateVectorSetState( + dest.get(), src + 2 * offset, kStateDataType, + offset, offset + size, 1)); + } else { + uint64_t size = uint64_t{1} << dest.num_qubits(); + ErrorCheck(custatevecExStateVectorSetState( + dest.get(), src, kStateDataType, 0, size, 1)); + } + + ErrorCheck(custatevecExStateVectorSynchronize(dest.get())); + + // TODO: do we need that? + dest.to_normal_order(); + + return true; + } + + // It is the client's responsibility to make sure that src has at least + // 2^dest.num_qubits() elements. + bool Copy(const fp_type* src, uint64_t size, Vector& dest) const { + size = size / 2; + + if (size != (uint64_t{1} << dest.num_qubits())) { + IO::errorf("wrong size in VectorSpaceCuStateVecEx::Copy.\n"); + return false; + } + + if (dest.distr_type() == kMultiProcess) { + size /= dest.num_substates(); + uint64_t offset = size * mp.rank(); + + ErrorCheck(custatevecExStateVectorSetState( + dest.get(), src + 2 * offset, kStateDataType, + offset, offset + size, 1)); + } else { + ErrorCheck(custatevecExStateVectorSetState( + dest.get(), src, kStateDataType, 0, size, 1)); + } + + ErrorCheck(custatevecExStateVectorSynchronize(dest.get())); + + // TODO: do we need that? + dest.to_normal_order(); + + return true; + } + + static void DeviceSync() { + ErrorCheck(cudaDeviceSynchronize()); + } + + protected: + Parameter param; + const MultiProcessCuStateVecEx& mp; + + private: + static custatevecDeviceNetworkType_t get_device_network_type( + DeviceNetworkType id) { + custatevecDeviceNetworkType_t device_network_type = + CUSTATEVEC_DEVICE_NETWORK_TYPE_SWITCH; + + switch (id) { + case kSwitch: + device_network_type = CUSTATEVEC_DEVICE_NETWORK_TYPE_SWITCH; + break; + case kFullMesh: + device_network_type = CUSTATEVEC_DEVICE_NETWORK_TYPE_FULLMESH; + break; + } + + return device_network_type; + } +}; + +} // namespace qsim + +#endif // VECTORSPACE_CUSTATEVECEX_H_ diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index 9bcc54b63..f9693c82c 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -21,6 +21,7 @@ QSIMLIB_AVX2 = ../qsimcirq/qsim_avx2$(SUFFIX) QSIMLIB_AVX512 = ../qsimcirq/qsim_avx512$(SUFFIX) QSIMLIB_CUDA = ../qsimcirq/qsim_cuda$(SUFFIX) QSIMLIB_CUSTATEVEC = ../qsimcirq/qsim_custatevec$(SUFFIX) +QSIMLIB_CUSTATEVECEX = ../qsimcirq/qsim_custatevecex$(SUFFIX) QSIMLIB_HIP = ../qsimcirq/qsim_hip$(SUFFIX) QSIMLIB_DECIDE = ../qsimcirq/qsim_decide$(SUFFIX) @@ -66,7 +67,7 @@ else ifeq ($(CUQUANTUM_ROOT),) pybind: pybind-cpu pybind-cuda decide-cuda else -pybind: pybind-cpu pybind-cuda pybind-custatevec decide-custatevec +pybind: pybind-cpu pybind-cuda pybind-custatevec pybind-custatevecex decide-custatevec endif endif @@ -94,9 +95,13 @@ decide-cuda: pybind-custatevec: $(NVCC) custatevec/pybind_main_custatevec.cpp -o $(QSIMLIB_CUSTATEVEC) $(NVCCFLAGS) $(PYBINDFLAGS_CUSTATEVEC) +.PHONY: pybind-custatevecex +pybind-custatevecex: + $(NVCC) custatevecex/pybind_main_custatevecex.cpp -o $(QSIMLIB_CUSTATEVECEX) $(NVCCFLAGS) $(PYBINDFLAGS_CUSTATEVEC) + .PHONY: decide-custatevec decide-custatevec: - $(NVCC) decide/decide.cpp -D__CUSTATEVEC__ -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) + $(NVCC) decide/decide.cpp -D__CUSTATEVEC__ -D__CUSTATEVECEX__ -o $(QSIMLIB_DECIDE) $(NVCCFLAGS) $(PYBINDFLAGS_CUDA) .PHONY: pybind-hip pybind-hip: @@ -119,4 +124,5 @@ clean: -rm -f ./cuda/*.x ./cuda/*.a ./cuda/*.so ./cuda/*.mod $(QSIMLIB_CUDA) -rm -f ./hip/*.x ./hip/*.a ./hip/*.so ./hip/*.mod $(QSIMLIB_HIP) -rm -f ./custatevec/*.x ./custatevec/*.a ./custatevec/*.so ./custatevec/*.mod $(QSIMLIB_CUSTATEVEC) + -rm -f ./custatevecex/*.x ./custatevecex/*.a ./custatevecex/*.so ./custatevecex/*.mod $(QSIMLIB_CUSTATEVECEX) -rm -f ./decide/*.x ./decide/*.a ./decide/*.so ./decide/*.mod $(QSIMLIB_DECIDE) diff --git a/pybind_interface/cuda/CMakeLists.txt b/pybind_interface/cuda/CMakeLists.txt index 6ef6be3cf..39d0b8bba 100644 --- a/pybind_interface/cuda/CMakeLists.txt +++ b/pybind_interface/cuda/CMakeLists.txt @@ -22,7 +22,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-O3 -flto=auto) + add_compile_options(-O3 -fno-lto) endif() if(APPLE) diff --git a/pybind_interface/custatevec/CMakeLists.txt b/pybind_interface/custatevec/CMakeLists.txt index 2bdd34c12..e4a5f808c 100644 --- a/pybind_interface/custatevec/CMakeLists.txt +++ b/pybind_interface/custatevec/CMakeLists.txt @@ -21,7 +21,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-O3 -flto=auto) + add_compile_options(-O3 -fno-lto) endif() if(APPLE) diff --git a/pybind_interface/custatevecex/CMakeLists.txt b/pybind_interface/custatevecex/CMakeLists.txt new file mode 100644 index 000000000..f25216af4 --- /dev/null +++ b/pybind_interface/custatevecex/CMakeLists.txt @@ -0,0 +1,59 @@ +# Copyright 2025 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.28) +project(qsim LANGUAGES CXX CUDA) + +if(WIN32) + add_compile_options(/openmp) + # Add /O2 to any configuration that is NOT Debug. + # This prevents a conflict with /RTC1 in DEBUG builds. + add_compile_options($<$>:/O2>) +else() + add_compile_options(-O3 -fno-lto) +endif() + +if(APPLE) + include_directories( + "/usr/local/include" + "/usr/local/opt/llvm/include" + "/opt/homebrew/include" + "/opt/homebrew/opt/llvm@19/include" + ) + link_directories( + "/usr/local/lib" + "/usr/local/opt/llvm/lib" + "/opt/homebrew/lib" + "/opt/homebrew/opt/llvm@19/lib" + ) +endif() + +include(../GetPybind11.cmake) +find_package(Python3 3.10 REQUIRED) + +include_directories(${pybind11_INCLUDE_DIRS}) + +include_directories($ENV{CUQUANTUM_ROOT}/include) +link_directories($ENV{CUQUANTUM_ROOT}/lib $ENV{CUQUANTUM_ROOT}/lib64) + +add_library(qsim_custatevecex MODULE pybind_main_custatevecex.cpp) +target_link_libraries(qsim_custatevecex -lcustatevec -lcublas) + +set_target_properties(qsim_custatevecex PROPERTIES + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" +) +set_source_files_properties(pybind_main_custatevecex.cpp PROPERTIES LANGUAGE CUDA) + +target_link_libraries(qsim_custatevecex OpenMP::OpenMP_CXX) diff --git a/pybind_interface/custatevecex/pybind_main_custatevecex.cpp b/pybind_interface/custatevecex/pybind_main_custatevecex.cpp new file mode 100644 index 000000000..c29a608a6 --- /dev/null +++ b/pybind_interface/custatevecex/pybind_main_custatevecex.cpp @@ -0,0 +1,74 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "pybind_main_custatevecex.h" + +#include "../../lib/fuser_mqubit.h" +#include "../../lib/gates_cirq.h" +#include "../../lib/io.h" +#include "../../lib/multiprocess_custatevecex.h" +#include "../../lib/run_custatevecex.h" +#include "../../lib/simulator_custatevecex.h" + +namespace { + +qsim::MultiProcessCuStateVecEx mp; + +} // namespace { + +namespace qsim { + using Simulator = SimulatorCuStateVecEx; + + struct Factory { + // num_sim_threads, num_state_threads and num_dblocks are unused, but kept + // for consistency with other factories. + Factory(unsigned num_sim_threads, + unsigned num_state_threads, + unsigned num_dblocks) { + if (!mp.initialized()) { + mp.initialize(); + } + } + + using Simulator = qsim::Simulator; + using StateSpace = Simulator::StateSpace; + + using Gate = Cirq::GateCirq; + using Runner = CuStateVecExRunner; + struct RunnerParameter : public Runner::Parameter { + // max_fused_size is not used, but kept for consistency. + unsigned max_fused_size = 2; + }; + using NoisyRunner = qsim::QuantumTrajectorySimulator; + struct NoisyRunnerParameter : public NoisyRunner::Parameter { + // max_fused_size is not used, but kept for consistency. + unsigned max_fused_size = 2; + }; + + StateSpace CreateStateSpace() const { + return StateSpace{mp}; + } + + Simulator CreateSimulator() const { + return Simulator{}; + } + }; + + inline void SetFlushToZeroAndDenormalsAreZeros() {} + inline void ClearFlushToZeroAndDenormalsAreZeros() {} +} + +#include "../pybind_main.cpp" diff --git a/pybind_interface/custatevecex/pybind_main_custatevecex.h b/pybind_interface/custatevecex/pybind_main_custatevecex.h new file mode 100644 index 000000000..06290dda5 --- /dev/null +++ b/pybind_interface/custatevecex/pybind_main_custatevecex.h @@ -0,0 +1,17 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "../pybind_main.h" + +PYBIND11_MODULE(qsim_custatevecex, m) { GPU_MODULE_BINDINGS } diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 0c8b8d941..b48e0e587 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -24,7 +24,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-O3 -flto=auto) + add_compile_options(-O3 -fno-lto) endif() if(APPLE) @@ -52,6 +52,9 @@ if(CMAKE_CUDA_COMPILER) target_compile_options(qsim_decide PRIVATE $<$:-D__CUSTATEVEC__> ) + target_compile_options(qsim_decide PRIVATE + $<$:-D__CUSTATEVECEX__> + ) endif() find_package(Python3 3.10 REQUIRED COMPONENTS Interpreter Development) include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) diff --git a/pybind_interface/decide/decide.cpp b/pybind_interface/decide/decide.cpp index b40f6975e..51cd52425 100644 --- a/pybind_interface/decide/decide.cpp +++ b/pybind_interface/decide/decide.cpp @@ -61,7 +61,8 @@ int detect_instructions() { } enum GPUCapabilities { - CUDA = 0, CUSTATEVEC = 1, HIP = 2, NO_GPU = 10, NO_CUSTATEVEC = 11 }; + CUDA = 0, CUSTATEVEC = 1, CUSTATEVECEX = 2, HIP = 3, NO_GPU = 10, + NO_CUSTATEVEC = 11, NO_CUSTATEVECEX = 12 }; // For now, GPU detection is performed at compile time, as our wheels are // generated on Github Actions runners which do not have GPU support. @@ -93,6 +94,20 @@ int detect_custatevec() { return gpu; } +// For now, cuStateVecEx detection is performed at compile time, as our wheels +// are generated on Github Actions runners which do not have GPU support. +// +// Users wishing to use qsim with cuStateVecEx will need to compile locally on +// a device which has the necessary CUDA toolkit and cuStateVecEx library. +int detect_custatevecex() { + #if defined(__NVCC__) && defined(__CUSTATEVECEX__) + GPUCapabilities gpu = CUSTATEVECEX; + #else + GPUCapabilities gpu = NO_CUSTATEVECEX; + #endif + return gpu; +} + PYBIND11_MODULE(qsim_decide, m) { m.doc() = "pybind11 plugin"; // optional module docstring @@ -104,4 +119,7 @@ PYBIND11_MODULE(qsim_decide, m) { // Detect cuStateVec. m.def("detect_custatevec", &detect_custatevec, "Detect cuStateVec"); + + // Detect cuStateVecEx. + m.def("detect_custatevecex", &detect_custatevecex, "Detect cuStateVecEx"); } diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index 56f0cd0e6..4cad3230c 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -21,7 +21,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-O3 -flto=auto) + add_compile_options(-O3 -fno-lto) endif() include(../GetPybind11.cmake) diff --git a/qsimcirq/__init__.py b/qsimcirq/__init__.py index 7de9cc506..4a91936c6 100644 --- a/qsimcirq/__init__.py +++ b/qsimcirq/__init__.py @@ -35,7 +35,7 @@ def _load_qsim_gpu(): instr = qsim_decide.detect_gpu() if instr == 0: qsim_gpu = importlib.import_module("qsimcirq.qsim_cuda") - elif instr == 2: + elif instr == 3: qsim_gpu = importlib.import_module("qsimcirq.qsim_hip") else: qsim_gpu = None @@ -51,9 +51,19 @@ def _load_qsim_custatevec(): return qsim_custatevec +def _load_qsim_custatevecex(): + instr = qsim_decide.detect_custatevecex() + if instr == 2: + qsim_custatevecex = importlib.import_module("qsimcirq.qsim_custatevecex") + else: + qsim_custatevecex = None + return qsim_custatevecex + + qsim = _load_simd_qsim() qsim_gpu = _load_qsim_gpu() qsim_custatevec = _load_qsim_custatevec() +qsim_custatevecex = _load_qsim_custatevecex() # Note: the following imports must remain at the bottom of this file. diff --git a/qsimcirq/qsim_simulator.py b/qsimcirq/qsim_simulator.py index b3c0106fc..240715639 100644 --- a/qsimcirq/qsim_simulator.py +++ b/qsimcirq/qsim_simulator.py @@ -21,7 +21,7 @@ import qsimcirq.qsim_circuit as qsimc -from . import qsim, qsim_custatevec, qsim_gpu +from . import qsim, qsim_custatevec, qsim_custatevecex, qsim_gpu # This should probably live in Cirq... @@ -60,9 +60,10 @@ class QSimOptions: simulation modes. use_gpu: whether to use GPU instead of CPU for simulation. The "gpu_*" arguments below are only considered if this is set to True. - gpu_mode: use CUDA if set to 0 (default value) or use the NVIDIA - cuStateVec library if set to any other value. The "gpu_*" - arguments below are only considered if this is set to 0. + gpu_mode: use CUDA if set to 0 (default value), use the NVIDIA + cuStateVec library if set to 1 or use the NVIDIA cuStateVecEx + library if set to any other value. The "gpu_*" arguments below are + only considered if this is set to 0. gpu_state_threads: number of threads per CUDA block to use for the GPU StateSpace. This must be a power of 2 in the range [32, 1024]. gpu_data_blocks: number of data blocks to use for the GPU StateSpace. @@ -180,16 +181,26 @@ def __init__( ) else: self._sim_module = qsim_gpu - else: + elif self.qsim_options["gmode"] == 1: if qsim_custatevec is None: raise ValueError( "cuStateVec GPU execution requested, but not " "supported. If your device has GPU support and the " - "NVIDIA cuStateVec library is installed, you may need " - "to compile qsim locally." + "NVIDIA cuStateVec library is installed, you may " + "need to compile qsim locally." ) else: self._sim_module = qsim_custatevec + else: + if qsim_custatevecex is None: + raise ValueError( + "cuStateVecEx GPU execution requested, but not " + "supported. If your device has GPU support and the " + "NVIDIA cuStateVecEx library is installed, you may " + "need to compile qsim locally." + ) + else: + self._sim_module = qsim_custatevecex else: self._sim_module = qsim diff --git a/qsimcirq_tests/qsimcirq_test.py b/qsimcirq_tests/qsimcirq_test.py index 37573b9c7..0b5cdbc8e 100644 --- a/qsimcirq_tests/qsimcirq_test.py +++ b/qsimcirq_tests/qsimcirq_test.py @@ -1533,6 +1533,115 @@ def test_qsim_custatevec_input_state(): assert cirq.approx_eq(state_vector[i], 0, atol=1e-6) +def test_cirq_qsim_custatevecex_amplitudes(): + if qsimcirq.qsim_custatevecex is None: + pytest.skip("cuStateVecEx library is not available for testing.") + # Pick qubits. + a, b = [cirq.GridQubit(0, 0), cirq.GridQubit(0, 1)] + + # Create a circuit + cirq_circuit = cirq.Circuit(cirq.CNOT(a, b), cirq.CNOT(b, a), cirq.X(a)) + + # Enable GPU acceleration. + custatevecex_options = qsimcirq.QSimOptions(use_gpu=True, gpu_mode=2) + qsimGpuSim = qsimcirq.QSimSimulator(qsim_options=custatevecex_options) + result = qsimGpuSim.compute_amplitudes( + cirq_circuit, bitstrings=[0b00, 0b01, 0b10, 0b11] + ) + assert np.allclose(result, [0j, 0j, (1 + 0j), 0j]) + + +def test_cirq_qsim_custatevecex_simulate(): + if qsimcirq.qsim_custatevecex is None: + pytest.skip("cuStateVecEx library is not available for testing.") + # Pick qubits. + a, b = [cirq.GridQubit(0, 0), cirq.GridQubit(0, 1)] + + # Create a circuit + cirq_circuit = cirq.Circuit(cirq.H(a), cirq.CNOT(a, b), cirq.X(b)) + + # Enable GPU acceleration. + custatevecex_options = qsimcirq.QSimOptions(use_gpu=True, gpu_mode=2) + qsimGpuSim = qsimcirq.QSimSimulator(qsim_options=custatevecex_options) + result = qsimGpuSim.simulate(cirq_circuit) + assert result.state_vector().shape == (4,) + + cirqSim = cirq.Simulator() + cirq_result = cirqSim.simulate(cirq_circuit) + assert cirq.linalg.allclose_up_to_global_phase( + result.state_vector(), cirq_result.state_vector(), atol=1.0e-6 + ) + + +def test_cirq_qsim_custatevecex_expectation_values(): + if qsimcirq.qsim_custatevecex is None: + pytest.skip("cuStateVecEx library is not available for testing.") + # Pick qubits. + a, b = [cirq.GridQubit(0, 0), cirq.GridQubit(0, 1)] + + # Create a circuit + cirq_circuit = cirq.Circuit(cirq.H(a), cirq.CNOT(a, b), cirq.X(b)) + obs = [cirq.Z(a) * cirq.Z(b)] + + # Enable GPU acceleration. + custatevecex_options = qsimcirq.QSimOptions(use_gpu=True, gpu_mode=2) + qsimGpuSim = qsimcirq.QSimSimulator(qsim_options=custatevecex_options) + result = qsimGpuSim.simulate_expectation_values(cirq_circuit, obs) + + cirqSim = cirq.Simulator() + cirq_result = cirqSim.simulate_expectation_values(cirq_circuit, obs) + assert np.allclose(result, cirq_result) + + +def test_cirq_qsim_custatevecex_input_state(): + if qsimcirq.qsim_custatevecex is None: + pytest.skip("cuStateVecEx library is not available for testing.") + # Pick qubits. + a, b = [cirq.GridQubit(0, 0), cirq.GridQubit(0, 1)] + + # Create a circuit + cirq_circuit = cirq.Circuit(cirq.H(a), cirq.CNOT(a, b), cirq.X(b)) + + # Enable GPU acceleration. + custatevecex_options = qsimcirq.QSimOptions(use_gpu=True, gpu_mode=2) + qsimGpuSim = qsimcirq.QSimSimulator(qsim_options=custatevecex_options) + initial_state = np.asarray([0.5] * 4, dtype=np.complex64) + result = qsimGpuSim.simulate(cirq_circuit, initial_state=initial_state) + assert result.state_vector().shape == (4,) + + cirqSim = cirq.Simulator() + cirq_result = cirqSim.simulate(cirq_circuit, initial_state=initial_state) + assert cirq.linalg.allclose_up_to_global_phase( + result.state_vector(), cirq_result.state_vector(), atol=1.0e-6 + ) + + +def test_qsim_custatevecex_input_state(): + if qsimcirq.qsim_custatevecex is None: + pytest.skip("cuStateVecEx library is not available for testing.") + + for num_qubits in range(1, 8): + size = 2**num_qubits + qubits = cirq.LineQubit.range(num_qubits) + circuit = cirq.Circuit() + + for k in range(num_qubits): + circuit.append(cirq.H(qubits[k])) + + # Enable GPU acceleration. + custatevecex_options = qsimcirq.QSimOptions(use_gpu=True, gpu_mode=2) + qsimGpuSim = qsimcirq.QSimSimulator(qsim_options=custatevecex_options) + initial_state = np.asarray([np.sqrt(1.0 / size)] * size, dtype=np.complex64) + result = qsimGpuSim.simulate(circuit, initial_state=initial_state) + state_vector = result.state_vector() + + assert result.state_vector().shape == (size,) + assert cirq.approx_eq(state_vector[0], 1, atol=1e-6) + + for i in range(1, size): + assert cirq.approx_eq(state_vector[i], 0, atol=1e-6) + + def test_cirq_qsim_old_options(): old_options = {"f": 3, "t": 4, "r": 100, "v": 1} old_sim = qsimcirq.QSimSimulator(qsim_options=old_options) diff --git a/setup.py b/setup.py index d3ccfa54c..2ac063ee8 100644 --- a/setup.py +++ b/setup.py @@ -136,6 +136,7 @@ def build_extension(self, ext): CMakeExtension("qsimcirq/qsim_basic"), CMakeExtension("qsimcirq/qsim_cuda"), CMakeExtension("qsimcirq/qsim_custatevec"), + CMakeExtension("qsimcirq/qsim_custatevecex"), CMakeExtension("qsimcirq/qsim_decide"), CMakeExtension("qsimcirq/qsim_hip"), ], diff --git a/tests/Makefile b/tests/Makefile index e09b5fc4b..46f35e492 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -47,6 +47,9 @@ CUDA_TARGETS := $(CUDA_FILES:%cuda_test.cu=%cuda_test.x) CUSTATEVEC_FILES := $(wildcard *custatevec_test.cu) CUSTATEVEC_TARGETS := $(CUSTATEVEC_FILES:%custatevec_test.cu=%custatevec_test.x) +CUSTATEVECEX_FILES := $(wildcard *custatevecex_test.cu) +CUSTATEVECEX_TARGETS := $(CUSTATEVECEX_FILES:%custatevecex_test.cu=%custatevecex_test.x) + HIP_FILES := $(wildcard *cuda_test.cu) HIP_TARGETS := $(HIP_FILES:%cuda_test.cu=%hip_test.x) @@ -66,6 +69,9 @@ cuda-tests: $(CUDA_TARGETS) .PHONY: custatevec-tests custatevec-tests: $(CUSTATEVEC_TARGETS) +.PHONY: custatevecex-tests +custatevecex-tests: $(CUSTATEVECEX_TARGETS) + .PHONY: hip-tests hip-tests: $(HIP_TARGETS) @@ -81,6 +87,14 @@ run-cuda-tests: | $(GTEST_DIR)/build cuda-tests run-custatevec-tests: | $(GTEST_DIR)/build custatevec-tests for exe in $(CUSTATEVEC_TARGETS); do if ! ./$$exe; then exit 1; fi; done +.PHONY: run-custatevecex-tests +run-custatevecex-tests: | $(GTEST_DIR)/build custatevecex-tests + for exe in $(CUSTATEVECEX_TARGETS); do if ! ./$$exe; then exit 1; fi; done + +.PHONY: run-custatevecex-mpi-tests +run-custatevecex-mpi-tests: | $(GTEST_DIR)/build custatevecex-tests + for exe in $(CUSTATEVECEX_TARGETS); do if ! mpirun -np 2 ./$$exe; then exit 1; fi; done + .PHONY: run-hip-tests run-hip-tests: | $(GTEST_DIR)/build hip-tests for exe in $(HIP_TARGETS); do if ! ./$$exe; then exit 1; fi; done @@ -100,6 +114,9 @@ $(GTEST_DIR)/build: %custatevec_test.x: %custatevec_test.cu $(GTEST_DIR)/build $(NVCC) -o ./$@ $< $(TESTFLAGS) $(NVCCFLAGS) $(CUSTATEVECFLAGS) +%custatevecex_test.x: %custatevecex_test.cu $(GTEST_DIR)/build + $(NVCC) -o ./$@ $< $(TESTFLAGS) $(NVCCFLAGS) $(CUSTATEVECFLAGS) + %hip_test.x: %cuda_test.cu $(GTEST_DIR)/build $(HIPCC) -o ./$@ $< $(TESTFLAGS) $(HIPCCFLAGS) diff --git a/tests/hybrid_custatevecex_test.cu b/tests/hybrid_custatevecex_test.cu new file mode 100644 index 000000000..a0c75b031 --- /dev/null +++ b/tests/hybrid_custatevecex_test.cu @@ -0,0 +1,59 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "hybrid_testfixture.h" + +#include + +#include "gtest/gtest.h" + +#include "../lib/multiprocess_custatevecex.h" +#include "../lib/simulator_custatevecex.h" + +namespace qsim { + +MultiProcessCuStateVecEx mp; + +template +struct Factory { + using fp_type = FP; + using Simulator = qsim::SimulatorCuStateVecEx; + using StateSpace = typename Simulator::StateSpace; + + StateSpace CreateStateSpace() const { + return StateSpace{mp}; + } + + Simulator CreateSimulator() const { + return Simulator{}; + } +}; + +TEST(HybridCuStateVecExTest, Hybrid2) { + TestHybrid2(qsim::Factory()); +} + +TEST(HybridCuStateVecExTest, Hybrid4) { + TestHybrid4(qsim::Factory()); +} + +} // namespace qsim + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + qsim::mp.initialize(); + + return RUN_ALL_TESTS(); +} diff --git a/tests/qtrajectory_custatevecex_test.cu b/tests/qtrajectory_custatevecex_test.cu new file mode 100644 index 000000000..8d70bfc00 --- /dev/null +++ b/tests/qtrajectory_custatevecex_test.cu @@ -0,0 +1,88 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "qtrajectory_testfixture.h" + +#include + +#include "gtest/gtest.h" + +#include "../lib/io.h" +#include "../lib/multiprocess_custatevecex.h" +#include "../lib/run_custatevecex.h" +#include "../lib/simulator_custatevecex.h" + +namespace qsim { + +MultiProcessCuStateVecEx mp; + +template +struct Factory { + using fp_type = FP; + using Simulator = qsim::SimulatorCuStateVecEx; + using StateSpace = typename Simulator::StateSpace; + + StateSpace CreateStateSpace() const { + return StateSpace{mp}; + } + + Simulator CreateSimulator() const { + return Simulator{}; + } +}; + +TEST(QTrajectoryCuStateVecExTest, BitFlip) { + using Runner = CuStateVecExRunner>; + TestBitFlip(Factory()); +} + +TEST(QTrajectoryCuStateVecExTest, GenDump) { + using Runner = CuStateVecExRunner>; + TestGenDump(Factory()); +} + +TEST(QTrajectoryCuStateVecExTest, ReusingResults) { + using Runner = CuStateVecExRunner>; + TestReusingResults(Factory()); +} + +TEST(QTrajectoryCuStateVecExTest, CollectKopStat) { + using Runner = CuStateVecExRunner>; + TestCollectKopStat(Factory()); +} + +TEST(QTrajectoryCuStateVecExTest, CleanCircuit) { + using Runner = CuStateVecExRunner>; + TestCleanCircuit(Factory()); +} + +TEST(QTrajectoryCuStateVecExTest, InitialState) { + using Runner = CuStateVecExRunner>; + TestInitialState(Factory()); +} + +TEST(QTrajectoryCuStateVecExTest, UncomputeFinalState) { + using Runner = CuStateVecExRunner>; + TestUncomputeFinalState(Factory()); +} + +} // namespace qsim + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + qsim::mp.initialize(); + + return RUN_ALL_TESTS(); +} diff --git a/tests/run_custatevecex_test.cu b/tests/run_custatevecex_test.cu new file mode 100644 index 000000000..079fd2696 --- /dev/null +++ b/tests/run_custatevecex_test.cu @@ -0,0 +1,262 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "gates_cirq_testfixture.h" + +#include + +#include "gtest/gtest.h" + +#include "../lib/circuit_qsim_parser.h" +#include "../lib/gates_qsim.h" +#include "../lib/io.h" +#include "../lib/multiprocess_custatevecex.h" +#include "../lib/run_custatevecex.h" +#include "../lib/simulator_custatevecex.h" + +namespace qsim { + +constexpr char provider[] = "run_custatevecex_test"; + +constexpr char circuit_string[] = +R"(4 +0 h 0 +0 h 1 +0 h 2 +0 h 3 +1 cz 0 1 +1 cz 2 3 +2 t 0 +2 x 1 +2 y 2 +2 t 3 +3 y 0 +3 cz 1 2 +3 x 3 +4 t 1 +4 t 2 +5 cz 1 2 +6 x 1 +6 y 2 +7 cz 1 2 +8 t 1 +8 t 2 +9 cz 0 1 +9 cz 2 3 +10 h 0 +10 h 1 +10 h 2 +10 h 3 +)"; + +MultiProcessCuStateVecEx mp; + +struct Factory { + using Simulator = qsim::SimulatorCuStateVecEx; + using StateSpace = typename Simulator::StateSpace; + + StateSpace CreateStateSpace() const { + return StateSpace{mp}; + } + + Simulator CreateSimulator() const { + return Simulator{}; + } +}; + +TEST(RunQSimTest, QSimRunner1) { + std::stringstream ss(circuit_string); + Circuit> circuit; + + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); + EXPECT_EQ(circuit.num_qubits, 4); + EXPECT_EQ(circuit.gates.size(), 27); + + using Simulator = Factory::Simulator; + using StateSpace = Simulator::StateSpace; + using State = StateSpace::State; + using Runner = CuStateVecExRunner; + + float entropy = 0; + + auto measure = [&entropy]( + unsigned k, const StateSpace& state_space, const State& state) { + // Calculate entropy. + + entropy = 0; + auto size = uint64_t{1} << state.num_qubits(); + + for (uint64_t i = 0; i < size; ++i) { + auto ampl = state_space.GetAmpl(state, i); + float p = std::norm(ampl); + entropy -= p * std::log(p); + } + }; + + Runner::Parameter param; + param.seed = 1; + param.verbosity = 0; + + EXPECT_TRUE(Runner::Run(param, Factory(), circuit, measure)); + + EXPECT_NEAR(entropy, 2.2192848, 1e-6); +} + +TEST(RunQSimTest, QSimRunner2) { + std::stringstream ss(circuit_string); + Circuit> circuit; + + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); + EXPECT_EQ(circuit.num_qubits, 4); + EXPECT_EQ(circuit.gates.size(), 27); + + using Simulator = Factory::Simulator; + using StateSpace = Simulator::StateSpace; + using State = StateSpace::State; + using Runner = CuStateVecExRunner; + + Factory factory; + StateSpace state_space = factory.CreateStateSpace(); + State state = state_space.Create(circuit.num_qubits); + + EXPECT_FALSE(state_space.IsNull(state)); + + state_space.SetStateZero(state); + + Runner::Parameter param; + param.seed = 1; + param.verbosity = 0; + + EXPECT_TRUE(Runner::Run(param, Factory(), circuit, state)); + + // Calculate entropy. + + float entropy = 0; + auto size = uint64_t{1} << circuit.num_qubits; + + for (uint64_t i = 0; i < size; ++i) { + auto ampl = state_space.GetAmpl(state, i); + float p = std::norm(ampl); + entropy -= p * std::log(p); + } + + EXPECT_NEAR(entropy, 2.2192848, 1e-6); +} + +constexpr char sample_circuit_string[] = +R"(2 +0 h 0 +0 x 1 +1 m 1 +2 cx 0 1 +3 m 0 1 +4 m 0 +5 cx 1 0 +6 m 0 +7 x 0 +7 h 1 +8 m 0 1 +)"; + +TEST(RunQSimTest, QSimSampler) { + std::stringstream ss(sample_circuit_string); + Circuit> circuit; + + EXPECT_TRUE(CircuitQsimParser::FromStream(99, provider, ss, circuit)); + EXPECT_EQ(circuit.num_qubits, 2); + EXPECT_EQ(circuit.gates.size(), 11); + + using Simulator = Factory::Simulator; + using StateSpace = Simulator::StateSpace; + using Result = StateSpace::MeasurementResult; + using State = StateSpace::State; + using Runner = CuStateVecExRunner; + + Factory factory; + StateSpace state_space = factory.CreateStateSpace(); + State state = state_space.Create(circuit.num_qubits); + + EXPECT_FALSE(state_space.IsNull(state)); + + state_space.SetStateZero(state); + + std::vector results; + + Runner::Parameter param; + param.seed = 1; + param.verbosity = 0; + + EXPECT_TRUE(Runner::Run(param, Factory(), circuit, state, results)); + + // Results should contain (qubit @ time): + // (1 @ 1) - should be |01) + EXPECT_TRUE(results[0].bitstring[0]); + // (0 @ 3), (1 @ 3) - either |01) or |10) + EXPECT_EQ(results[1].bitstring[0], !results[1].bitstring[1]); + // (0 @ 4) - should match (0 @ 3) + EXPECT_EQ(results[1].bitstring[0], results[2].bitstring[0]); + // (0 @ 6) - either |11) or |10) + EXPECT_TRUE(results[3].bitstring[0]); + // (0 @ 8), (1 @ 8) - should be |00) + EXPECT_FALSE(results[4].bitstring[0]); + EXPECT_FALSE(results[4].bitstring[1]); +} + +TEST(RunQSimTest, CirqGates) { + auto circuit = CirqCircuit1::GetCircuit(false); + const auto& expected_results = CirqCircuit1::expected_results0; + + using Simulator = Factory::Simulator; + using StateSpace = Simulator::StateSpace; + using State = StateSpace::State; + using Runner = CuStateVecExRunner; + + Factory factory; + StateSpace state_space = factory.CreateStateSpace(); + State state = state_space.Create(circuit.num_qubits); + + auto size = uint64_t{1} << circuit.num_qubits; + + EXPECT_FALSE(state_space.IsNull(state)); + EXPECT_EQ(size, expected_results.size()); + + state_space.SetStateZero(state); + + Runner::Parameter param; + param.seed = 1; + param.verbosity = 0; + + EXPECT_TRUE(Runner::Run(param, Factory(), circuit, state)); + + for (uint64_t i = 0; i < size; ++i) { + auto ampl = state_space.GetAmpl(state, i); + EXPECT_NEAR(std::real(ampl), std::real(expected_results[i]), 2e-6); + EXPECT_NEAR(std::imag(ampl), std::imag(expected_results[i]), 2e-6); + } +} + +} // namespace qsim + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + qsim::mp.initialize(); + + return RUN_ALL_TESTS(); +} diff --git a/tests/simulator_custatevecex_test.cu b/tests/simulator_custatevecex_test.cu new file mode 100644 index 000000000..dcf9eaf65 --- /dev/null +++ b/tests/simulator_custatevecex_test.cu @@ -0,0 +1,105 @@ +// Copyright 2025 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "simulator_testfixture.h" + +#include + +#include + +#include "gtest/gtest.h" + +#include "../lib/multiprocess_custatevecex.h" +#include "../lib/simulator_custatevecex.h" + +namespace qsim { + +template +class SimulatorCuStateVecExTest : public testing::Test {}; + +//using fp_impl = ::testing::Types; +using fp_impl = ::testing::Types; + +TYPED_TEST_SUITE(SimulatorCuStateVecExTest, fp_impl); + +MultiProcessCuStateVecEx mp; + +template +struct Factory { + using Simulator = qsim::SimulatorCuStateVecEx; + using StateSpace = typename Simulator::StateSpace; + + StateSpace CreateStateSpace() const { + return StateSpace{mp}; + } + + Simulator CreateSimulator() const { + return Simulator{}; + } +}; + +TYPED_TEST(SimulatorCuStateVecExTest, ApplyGate1) { + TestApplyGate1(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, ApplyGate2) { + TestApplyGate2(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, ApplyGate3) { + TestApplyGate3(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, ApplyGate5) { + TestApplyGate5(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, CircuitWithControlledGates) { + TestCircuitWithControlledGates(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, CircuitWithControlledGatesDagger) { + TestCircuitWithControlledGatesDagger(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, MultiQubitGates) { + TestMultiQubitGates(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, ControlledGates) { + bool high_precision = std::is_same::value; + TestControlledGates(qsim::Factory(), high_precision, true); +} + +TYPED_TEST(SimulatorCuStateVecExTest, GlobalPhaseGate) { + TestGlobalPhaseGate(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, ExpectationValue1) { + TestExpectationValue1(qsim::Factory()); +} + +TYPED_TEST(SimulatorCuStateVecExTest, ExpectationValue2) { + TestExpectationValue2(qsim::Factory()); +} + +} // namespace qsim + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + qsim::mp.initialize(); + + return RUN_ALL_TESTS(); +} diff --git a/tests/simulator_testfixture.h b/tests/simulator_testfixture.h index 2e15c287f..bf46353a7 100644 --- a/tests/simulator_testfixture.h +++ b/tests/simulator_testfixture.h @@ -362,7 +362,8 @@ void TestCircuitWithControlledGates(const Factory& factory) { using fp_type = typename StateSpace::fp_type; using Gate = GateQSim; - unsigned num_qubits = 6; + unsigned num_qubits = 7; + unsigned size = 1 << (num_qubits - 1); std::vector gates; gates.reserve(128); @@ -722,10 +723,8 @@ if __name__ == '__main__': {-0.18774915, 0.12311842}, }; - unsigned size = 1 << num_qubits; - for (unsigned i = 0; i < size; ++i) { - auto a = StateSpace::GetAmpl(state1, i); + auto a = state_space.GetAmpl(state1, i); EXPECT_NEAR(std::real(a), expected_results[i][0], 1e-6); EXPECT_NEAR(std::imag(a), expected_results[i][1], 1e-6); } @@ -740,8 +739,8 @@ if __name__ == '__main__': } for (unsigned i = 0; i < size; ++i) { - auto a1 = StateSpace::GetAmpl(state1, i); - auto a2 = StateSpace::GetAmpl(state2, i); + auto a1 = state_space.GetAmpl(state1, i); + auto a2 = state_space.GetAmpl(state2, i); EXPECT_EQ(std::real(a1), std::real(a2)); EXPECT_EQ(std::imag(a1), std::imag(a2)); } @@ -756,8 +755,8 @@ if __name__ == '__main__': } for (unsigned i = 0; i < size; ++i) { - auto a1 = StateSpace::GetAmpl(state1, i); - auto a2 = StateSpace::GetAmpl(state3, i); + auto a1 = state_space.GetAmpl(state1, i); + auto a2 = state_space.GetAmpl(state3, i); EXPECT_EQ(std::real(a1), std::real(a2)); EXPECT_EQ(std::imag(a1), std::imag(a2)); } @@ -770,8 +769,8 @@ void TestCircuitWithControlledGatesDagger(const Factory& factory) { using fp_type = typename StateSpace::fp_type; using Gate = GateQSim; - unsigned num_qubits = 6; - unsigned size = 1 << num_qubits; + unsigned num_qubits = 7; + unsigned size = 1 << (num_qubits - 1); std::vector gates; gates.reserve(128); @@ -1133,10 +1132,10 @@ if __name__ == '__main__': */ - EXPECT_NEAR(std::real(StateSpace::GetAmpl(state, 0)), 1, 1e-6); - EXPECT_NEAR(std::imag(StateSpace::GetAmpl(state, 0)), 0, 1e-6); + EXPECT_NEAR(std::real(state_space.GetAmpl(state, 0)), 1, 1e-6); + EXPECT_NEAR(std::imag(state_space.GetAmpl(state, 0)), 0, 1e-6); for (unsigned i = 1; i < size; ++i) { - auto a = StateSpace::GetAmpl(state, i); + auto a = state_space.GetAmpl(state, i); EXPECT_NEAR(std::real(a), 0, 1e-6); EXPECT_NEAR(std::imag(a), 0, 1e-6); } @@ -1162,14 +1161,14 @@ void TestMultiQubitGates(const Factory& factory) { std::vector vec(state_space.MinSize(max_num_qubits)); - for (unsigned num_qubits = 1; num_qubits <= max_num_qubits; ++num_qubits) { + for (unsigned num_qubits = 2; num_qubits <= max_num_qubits; ++num_qubits) { auto state = state_space.Create(num_qubits); unsigned size = 1 << num_qubits; fp_type inorm = std::sqrt(1.0 / (1 << num_qubits)); - unsigned max_gate_qubits2 = std::min(max_gate_qubits, num_qubits); + unsigned max_gate_qubits2 = std::min(max_gate_qubits, num_qubits - 1); - for (unsigned q = 0; q <= max_gate_qubits2; ++q) { + for (unsigned q = 1; q <= max_gate_qubits2; ++q) { unsigned size1 = 1 << q; unsigned size2 = size1 * size1; @@ -1432,10 +1431,10 @@ void TestExpectationValue1(const Factory& factory) { std::vector vec(state_space.MinSize(max_num_qubits)); - for (unsigned num_qubits = 1; num_qubits <= max_num_qubits; ++num_qubits) { + for (unsigned num_qubits = 2; num_qubits <= max_num_qubits; ++num_qubits) { auto state = state_space.Create(num_qubits); - unsigned max_gate_qubits2 = std::min(max_gate_qubits, num_qubits); + unsigned max_gate_qubits2 = std::min(max_gate_qubits, num_qubits - 1); for (unsigned q = 1; q <= max_gate_qubits2; ++q) { unsigned size1 = 1 << q; diff --git a/tests/statespace_custatevecex_test.cu b/tests/statespace_custatevecex_test.cu new file mode 100644 index 000000000..db840d7c9 --- /dev/null +++ b/tests/statespace_custatevecex_test.cu @@ -0,0 +1,119 @@ +// Copyright 2019 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "statespace_testfixture.h" + +#include + +#include "gtest/gtest.h" + +#include "../lib/multiprocess_custatevecex.h" +#include "../lib/simulator_custatevecex.h" +#include "../lib/statespace_custatevecex.h" + +namespace qsim { + +template +class StateSpaceCuStateVecExTest : public testing::Test {}; + +using fp_impl = ::testing::Types; + +TYPED_TEST_SUITE(StateSpaceCuStateVecExTest, fp_impl); + +MultiProcessCuStateVecEx mp; + +template +struct Factory { + using Simulator = qsim::SimulatorCuStateVecEx; + using StateSpace = typename Simulator::StateSpace; + + StateSpace CreateStateSpace() const { + return StateSpace{mp}; + } + + Simulator CreateSimulator() const { + return Simulator{}; + } +}; + +TYPED_TEST(StateSpaceCuStateVecExTest, Add) { + TestAdd(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, NormSmall) { + TestNormSmall(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, NormAndInnerProductSmall) { + TestNormAndInnerProductSmall(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, NormAndInnerProduct) { + TestNormAndInnerProduct(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, SamplingSmall) { + TestSamplingSmall(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, SamplingCrossEntropyDifference) { + TestSamplingCrossEntropyDifference(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, Ordering) { + TestOrdering(qsim::Factory()); +} + +TEST(StateSpaceCuStateVecExTest, MeasurementSmall) { + TestMeasurementSmall(qsim::Factory(), true); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, MeasurementLarge) { +// This test fails. +// TestMeasurementLarge(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, Collapse) { +// Not implemented. +// TestCollapse(qsim::Factory()); +} + +TEST(StateSpaceCuStateVecExTest, InvalidStateSize) { + TestInvalidStateSize(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, BulkSetAmpl) { +// Not implemented. +// TestBulkSetAmplitude(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, BulkSetAmplExclusion) { +// Not implemented. +// TestBulkSetAmplitudeExclusion(qsim::Factory()); +} + +TYPED_TEST(StateSpaceCuStateVecExTest, BulkSetAmplDefault) { +// Not implemented. +// TestBulkSetAmplitudeDefault(factory); +} + +} // namespace qsim + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + qsim::mp.initialize(); + + return RUN_ALL_TESTS(); +} From 6b95f3aba3be3b3fce21b3b700cd9cf636113c27 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 09:21:30 -0800 Subject: [PATCH 12/33] Make use of larger GitHub job runners (#971) This updates some of the jobs in the CI workflow to use the larger runners we've configured for our org, so that CI runs faster. --- .github/workflows/ci.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e0968df6a..6185674a0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -221,10 +221,10 @@ jobs: # tests the endpoints of the supported range. The wheel build process # (in a separate workflow) *does* use all the versions. os: - - ubuntu-24.04 + - ubuntu-24.04-x64-8-core - macos-14 - macos-15 - - windows-2025 + - windows-2025-x64-8-core python_version: - '3.10' - '3.13' @@ -257,13 +257,13 @@ jobs: with: debug: ${{inputs.debug}} - - if: matrix.os != 'windows-2025' + - if: ${{! startsWith(matrix.os, 'windows-')}} name: Build the qsim C++ library and run tests (non-Windows case) run: | alias bazel=bazelisk dev_tools/test_libs.sh ${{inputs.debug && '--config=verbose'}} - - if: matrix.os == 'windows-2025' + - if: ${{startsWith(matrix.os, 'windows-')}} name: Build the qsim C++ library and run tests (Windows case) # On GitHub Windows runners, Bazel ends up finding a different "python3" # binary than what's installed by setup-python unless we tell Bazel what @@ -311,7 +311,7 @@ jobs: - python-checks - shell-lint - yaml-lint - runs-on: ubuntu-24.04 + runs-on: ubuntu-24.04-x64-16-core timeout-minutes: 60 strategy: matrix: @@ -371,7 +371,7 @@ jobs: - python-checks - shell-lint - yaml-lint - runs-on: ubuntu-24.04 + runs-on: ubuntu-24.04-x64-16-core timeout-minutes: 60 env: common_args: >- @@ -431,7 +431,7 @@ jobs: - python-checks - shell-lint - yaml-lint - runs-on: ubuntu-24.04 + runs-on: ubuntu-24.04-x64-16-core timeout-minutes: 60 env: # The next environment variable is used by Docker. From 567fb255d4e13dc4224828960625194d72b103a5 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 09:23:14 -0800 Subject: [PATCH 13/33] Add check/pylint (#978) This is a simple script to help contributors easily run pylint recursively on the right directories. It also passes `--jobs=0` to make it as fast as possible by default. --- check/pylint | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 check/pylint diff --git a/check/pylint b/check/pylint new file mode 100755 index 000000000..5fd9d9cdd --- /dev/null +++ b/check/pylint @@ -0,0 +1,39 @@ +#!/bin/bash +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Summary: make it easy to run pylint on directories that contain Python files +# and avoid tests/googletest/. + +set -e + +declare -r usage="Usage: ${0} [-h | --help] [args ...] + +If the first argument on the command line is the option --help or -h, this +program prints usage information and then exits. Otherwise, it runs Pylint on +the Python files of this project. It passes all command-line arguments (other +than -h, --help, or help) to Pylint along with the project source directories." + +# Exit early if the user requested help. +if [[ "${1}" == "-h" || "${1}" == "--help" || "${1}" == "help" ]]; then + echo "$usage" + exit 0 +fi + +# Go to the project root. +thisdir=$(dirname "${BASH_SOURCE[0]:?}") +repo_dir=$(git -C "${thisdir}" rev-parse --show-toplevel) +cd "${repo_dir}" + +pylint --jobs=0 --ignore-paths=tests/googletest "$@" . From 59e15b8518b2ed78c9e11be27aa870098553697d Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 09:34:41 -0800 Subject: [PATCH 14/33] Use GHA "working-directory" instead of cd'ing (#969) A better practice for jobs where the first step is to `cd` into a directory is to use the field `working-directory` in the step definition and skip the `cd`. --- .github/workflows/ci.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6185674a0..c574625f4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -461,9 +461,8 @@ jobs: run: docker run --rm qsim-base:latest -c /qsim/circuits/circuit_q24 - name: Test installation process - run: | - cd install/tests - docker compose build + working-directory: install/tests + run: docker compose build report-results: name: CI From 2fe8c97b83404d4a3f91a41446a9fc68061e1d1a Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 09:35:16 -0800 Subject: [PATCH 15/33] Detect missing cpuinfo package when running test-libs.sh and let the user know (#967) Due to the fact that the package name is not the same as the module name (`py-cpuinfo` vs `cpuinfo`), the error that results from trying to run this script when `py-cpuinfo` is not installed can be confusing. Let's help users & developers by testing for the package and pointing them in the right direction. Also, slightly improve the clarity of the usage message text in this script. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- dev_tools/test_libs.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dev_tools/test_libs.sh b/dev_tools/test_libs.sh index c640f91a8..7efa58f73 100755 --- a/dev_tools/test_libs.sh +++ b/dev_tools/test_libs.sh @@ -16,11 +16,15 @@ set -eo pipefail -o errtrace declare -r usage="Usage: ${0##*/} [-h | --help | help] [bazel options ...] -Run the programs in tests/, and on Linux, also build the programs in apps/. + +Invokes Bazel to run the programs in tests/, and on Linux, also build the +sample programs in apps/. If the first option on the command line is -h, --help, or help, this help text will be printed and the program will exit. Any other options on the command -line are passed directly to Bazel." +line are passed directly to Bazel. + +This script makes use of the Python package 'py-cpuinfo'." # Exit early if the user requested help. if [[ "$1" == "-h" || "$1" == "--help" || "$1" == "help" ]]; then @@ -28,6 +32,11 @@ if [[ "$1" == "-h" || "$1" == "--help" || "$1" == "help" ]]; then exit 0 fi +if ! python -m pip show -qq py-cpuinfo 2>/dev/null; then + echo "Error: missing 'py-cpuinfo'. Please install dev-requirements.txt." >&2 + exit 1 +fi + # Look for AVX and SSE in the processor's feature flags. declare features="" declare filters="" From 464a2663e999b6328706c00ffb46c49aba855e19 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 11:20:03 -0800 Subject: [PATCH 16/33] In tests/Makefile, send grep output to /dev/null (#988) On systems where there is no /proc/cpuinfo, you get an error message. We don't need to see it; we just need the result of the grep. --- tests/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index 46f35e492..4fef3e3fe 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,9 +13,9 @@ # limitations under the License. # Determine the hardware features available in this CPU. -HAVE_SSE := $(shell grep -q sse /proc/cpuinfo && echo "true") -HAVE_AVX2 := $(shell grep -q avx2 /proc/cpuinfo && echo "true") -HAVE_AVX512 := $(shell grep -q avx512f /proc/cpuinfo && echo "true") +HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true") +HAVE_AVX2 := $(shell grep -qs avx2 /proc/cpuinfo && echo "true") +HAVE_AVX512 := $(shell grep -qs avx512f /proc/cpuinfo && echo "true") # Default targets. Always built. BASIC_FILES := $(shell ls *.cc | egrep -v '_avx|_sse') From 34ccd4f49864c9f039f97a9e308c321f73a75f4b Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 18 Jan 2026 07:31:00 +0000 Subject: [PATCH 17/33] Merge differences --- setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2ac063ee8..3b4fe7272 100644 --- a/setup.py +++ b/setup.py @@ -116,11 +116,14 @@ def build_extension(self, ext): env = os.environ.copy() cxxflags = env.get("CXXFLAGS", "") - env["CXXFLAGS"] = f'{cxxflags} -DVERSION_INFO=\\"{__version__}\\"' + env["CXXFLAGS"] = ( + f'{cxxflags} -DVERSION_INFO=\\"{self.distribution.get_version()}\\"' + ) if not os.path.exists(self.build_temp): os.makedirs(self.build_temp) subprocess.check_call( - ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env + ["cmake", ext.sourcedir] + cmake_args, + cwd=self.build_temp, ) subprocess.check_call( ["cmake", "--build", ".", "--verbose"] + build_args, From f2bdcc60b3e622a761e4cb5703c22a600af113f5 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 11:53:37 -0800 Subject: [PATCH 18/33] Set HOMEBREW_NO_AUTO_UPDATE in CI (#973) Tell Homebrew not to auto-update when running. Auto-updating is not necessary here and just wastes time when it happens. --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c574625f4..f71a0fe19 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -276,6 +276,8 @@ jobs: - name: Install LLVM and OpenMP on macOS if: startsWith(matrix.os, 'macos') + env: + HOMEBREW_NO_AUTO_UPDATE: 1 run: | brew install -q libomp llvm@19 brew unlink libomp From 22a3eed33bbfa0856393f8a73facb1fb3dd2431f Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 11:55:28 -0800 Subject: [PATCH 19/33] Add `.gitattributes` file for project (#974) IMHO it's useful to see Markdown files as one of the types of files listed on the GitHub front page in the file statistics section, alongside other files. It gives a sense for the amount of documentation present in a project. On the other hand, some configuration files are not useful to count, such as .md files in a `.gemini/` subdirectory and configuration files for tools like git. --- .gitattributes | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..fe3dad05f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,22 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Include Markdown files in GitHub's file statistics for this repo. +*.md linguist-detectable + +# Exclude config files from GitHub's file statistics. GitHub's Linguist already +# recognizes most config files as data; this adds some it doesn't. +.gemini/** linguist-documentation +.git-blame-ignore-revs linguist-documentation +.markdownlintrc linguist-documentation From 2037e80e1dfb5c4b1dd6790e2aed0778abdfb437 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 11:56:36 -0800 Subject: [PATCH 20/33] Add configuration for isort to `pyproject.toml` (#987) `isort` is used in `check/format-incremental`, but there is no configuration for it. This adds a configuration based on what Cirq currently uses. --- pyproject.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a4f3e4613..bd1c5102c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,3 +150,11 @@ skip = "*musllinux*" [tool.black] target-version = ['py310', 'py311', 'py312', 'py313'] extend-exclude = 'third_party' + +[tool.isort] +profile = 'black' +order_by_type = false # Sort alphabetically, irrespective of case. +skip_gitignore = true +combine_as_imports = true +known_first_party = ["qsimcirq*"] +extend_skip = ["__init__.py"] From e0d71a4eeb25b93b5025efc0b6f8f7cb9fa7f44a Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Mon, 5 Jan 2026 23:21:32 -0800 Subject: [PATCH 21/33] Fix #929 by updating Eigen to version 3.4.1 (#934) The warning described in https://github.com/quantumlib/qsim/issues/929 seems to be the result of using an older version of Eigen. Updating the version makes the warning go go away. --- Makefile | 10 +++++----- WORKSPACE | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 4b37317c2..b59563d5d 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Version info for the copy of Eigen we will download and build locally. -EIGEN_PREFIX = "3bb6a48d8c171cf20b5f8e48bfb4e424fbd4f79e" +# If this is changed, updated the value in ./WORKSPACE too. +EIGEN_COMMIT = "b66188b5dfd147265bfa9ec47595ca0db72d21f5" EIGEN_URL = "https://gitlab.com/libeigen/eigen/-/archive/" # Default build targets. Additional may be added conditionally below. @@ -197,9 +197,9 @@ check-cuquantum-root-set: eigen: -rm -rf eigen - wget $(EIGEN_URL)/$(EIGEN_PREFIX)/eigen-$(EIGEN_PREFIX).tar.gz - tar -xzf eigen-$(EIGEN_PREFIX).tar.gz && mv eigen-$(EIGEN_PREFIX) eigen - rm eigen-$(EIGEN_PREFIX).tar.gz + wget $(EIGEN_URL)/$(EIGEN_COMMIT)/eigen-$(EIGEN_COMMIT).tar.gz + tar -xzf eigen-$(EIGEN_COMMIT).tar.gz && mv eigen-$(EIGEN_COMMIT) eigen + rm eigen-$(EIGEN_COMMIT).tar.gz .PHONY: clean clean: diff --git a/WORKSPACE b/WORKSPACE index 6f064c52e..0499366fd 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -55,9 +55,10 @@ load("@org_tensorflow//tensorflow:workspace0.bzl", "tf_workspace0") tf_workspace0() -EIGEN_COMMIT = "d71c30c47858effcbd39967097a2d99ee48db464" # 3.4.1 +# https://gitlab.com/libeigen/eigen/-/releases/3.4.1 +EIGEN_COMMIT = "b66188b5dfd147265bfa9ec47595ca0db72d21f5" -EIGEN_SHA256 = "f1d28c2205d015490a685b1e5a171c434da87f757746724de3cb85e69621dec2" +EIGEN_SHA256 = "2c167ff09e88a5261111bc2aa7f18ae2e78d73fd42339387532937b0c2629829" http_archive( name = "eigen", From 7926ba541dcd2714f6e80776f9d55c92aad0c7a2 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Tue, 6 Jan 2026 07:15:03 -0800 Subject: [PATCH 22/33] Fix Makefile handling of user CXXFLAGS & also add a DEBUG flag (#990) The way `CXXFLAGS` and other flags were being set meant that users had to add the default values (`-std=c++17 -fopenmp -O3` etc) if they set the flags at all, or else lose the defaults. This was error-prone and suboptimal. The change here makes it so that the user values are appended at the end of the defaults, so that they can override values. This PR also adds a DEBUG flag that switches between using `-g -O0` and the regular `-O3` option. Developers can invoke it using, e.g., `make DEBUG=1`. --- Makefile | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index b59563d5d..187f4f9b0 100644 --- a/Makefile +++ b/Makefile @@ -16,11 +16,12 @@ EIGEN_COMMIT = "b66188b5dfd147265bfa9ec47595ca0db72d21f5" EIGEN_URL = "https://gitlab.com/libeigen/eigen/-/archive/" -# Default build targets. Additional may be added conditionally below. +# Default build targets. Additional ones are added conditionally below. TARGETS = qsim TESTS = run-cxx-tests # By default, we also build the pybind11-based Python interface. +# Can be overriden via env variables or command-line flags PYBIND11 ?= true ifeq ($(PYBIND11), true) @@ -31,14 +32,36 @@ endif # Default options for Pytest (only used if the pybind interface is built). PYTESTFLAGS ?= -v -# Default C++ compilers and compiler flags. Can be overriden via env variables. +# Default compilers and compiler flags. +# Can be overriden via env variables or command-line flags. CXX ?= g++ NVCC ?= nvcc HIPCC ?= hipcc -CXXFLAGS ?= -O3 -std=c++17 -fopenmp -flto=auto -NVCCFLAGS ?= -O3 --std c++17 -Wno-deprecated-gpu-targets -HIPCCFLAGS ?= -O3 +BASE_CXXFLAGS := -std=c++17 -fopenmp +BASE_NVCCFLAGS := -std c++17 -Wno-deprecated-gpu-targets +BASE_HIPCCFLAGS := + +CXXFLAGS := $(BASE_CXXFLAGS) $(CXXFLAGS) +NVCCFLAGS := $(BASE_NVCCFLAGS) $(NVCCFLAGS) +HIPCCFLAGS := $(BASE_HIPCCFLAGS) $(HIPCCFLAGS) + +LTO_FLAGS := -flto=auto +USING_CLANG := $(shell $(CXX) --version | grep -isq clang && echo "true") +ifeq ($(USING_CLANG),"true") + LTO_FLAGS := -flto +endif + +ifdef DEBUG + DEBUG_FLAGS := -g -O0 + CXXFLAGS += $(DEBUG_FLAGS) + NVCCFLAGS += $(DEBUG_FLAGS) + HIPCCFLAGS += $(DEBUG_FLAGS) +else + CXXFLAGS += -O3 $(LTO_FLAGS) + NVCCFLAGS += -O3 + HIPCCFLAGS += -O3 +endif # For compatibility with CMake, if $CUDAARCHS is set, use it to set the # architecture options to nvcc. Otherwise, default to the "native" option, From e478fb17b1788d98324228840e979490220abee9 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Tue, 6 Jan 2026 07:16:00 -0800 Subject: [PATCH 23/33] Fix #892: use -msse4 rather than -msse4.1 in Makefiles (#991) There was an inconsistency in the Makefiles versus `tests/BUILD` and `tests/make.sh`, in that a couple of the Makewfiles used `-msse4.1` while the BUILD file and `tests/make.sh` used `-msse4`. In addition, it seems that `-msse4` subsumes `-msse4.1`, and so overall, it appears better to use `-msse4`. Fixes #892. --- pybind_interface/Makefile | 2 +- pybind_interface/sse/CMakeLists.txt | 2 +- tests/Makefile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pybind_interface/Makefile b/pybind_interface/Makefile index f9693c82c..2c30f678e 100644 --- a/pybind_interface/Makefile +++ b/pybind_interface/Makefile @@ -41,7 +41,7 @@ endif # The flags for the compilation of the simd-specific Pybind11 interfaces PYBINDFLAGS_BASIC = $(PYBINDFLAGS) -PYBINDFLAGS_SSE = -msse4.1 $(PYBINDFLAGS) +PYBINDFLAGS_SSE = -msse4 $(PYBINDFLAGS) PYBINDFLAGS_AVX2 = -mavx2 -mfma $(PYBINDFLAGS) PYBINDFLAGS_AVX512 = -mavx512f -mbmi2 $(PYBINDFLAGS) diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index fee561006..195af28c3 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -21,7 +21,7 @@ if(WIN32) # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) else() - add_compile_options(-msse4.1 -O3 -flto=auto) + add_compile_options(-msse4 -O3 -flto=auto) endif() if(APPLE) diff --git a/tests/Makefile b/tests/Makefile index 4fef3e3fe..6b5eee410 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -25,7 +25,7 @@ SSE_FILES = AVX2_FILES = AVX512_FILES = ifneq (,$(HAVE_SSE)) - SSE_FLAGS ?= -msse4.1 + SSE_FLAGS ?= -msse4 SSE_FILES := $(wildcard *_sse_test.cc) endif ifneq (,$(HAVE_AVX2)) From 8dfe69e5deba0914d3e65d7fb477db59dcb8ffcf Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Tue, 6 Jan 2026 10:33:57 -0800 Subject: [PATCH 24/33] Fix #904: test that repo owner is Quantumlib (#968) This adds checks to the GitHub Actions workflows for the owner being Quantumlib, so that users who fork the repo will not be surprised by the workflows running in their forks. --- .github/workflows/ci.yaml | 11 ++++++++++- .github/workflows/cirq_compatibility.yml | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f71a0fe19..365b316a8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -97,6 +97,7 @@ jobs: docker-lint: name: Dockerfile lint checks + if: github.repository_owner == 'quantumlib' # ubuntu-slim runners don't have docker installed. runs-on: ubuntu-24.04 timeout-minutes: 15 @@ -119,6 +120,7 @@ jobs: shell-lint: name: Shell script lint checks + if: github.repository_owner == 'quantumlib' runs-on: ubuntu-slim timeout-minutes: 15 steps: @@ -141,6 +143,7 @@ jobs: yaml-lint: name: YAML lint checks + if: github.repository_owner == 'quantumlib' runs-on: ubuntu-slim timeout-minutes: 15 steps: @@ -165,6 +168,7 @@ jobs: bazel-lint: name: Bazel build lint checks + if: github.repository_owner == 'quantumlib' runs-on: ubuntu-slim timeout-minutes: 15 steps: @@ -188,6 +192,7 @@ jobs: action-lint: name: GitHub Actions lint checks + if: github.repository_owner == 'quantumlib' runs-on: ubuntu-slim timeout-minutes: 15 steps: @@ -206,6 +211,7 @@ jobs: library-tests: name: Library tests + if: github.repository_owner == 'quantumlib' needs: - action-lint - bazel-lint @@ -307,6 +313,7 @@ jobs: options-tests: name: Options tests + if: github.repository_owner == 'quantumlib' needs: - action-lint - bazel-lint @@ -367,6 +374,7 @@ jobs: memory-tests: name: Malloc/asan/msan tests + if: github.repository_owner == 'quantumlib' needs: - action-lint - bazel-lint @@ -427,6 +435,7 @@ jobs: docker-tests: name: Docker build tests + if: github.repository_owner == 'quantumlib' needs: - action-lint - docker-lint @@ -468,7 +477,7 @@ jobs: report-results: name: CI - if: always() + if: always() && github.repository_owner == 'quantumlib' needs: - action-lint - bazel-lint diff --git a/.github/workflows/cirq_compatibility.yml b/.github/workflows/cirq_compatibility.yml index 18d974bef..12b396a18 100644 --- a/.github/workflows/cirq_compatibility.yml +++ b/.github/workflows/cirq_compatibility.yml @@ -35,6 +35,7 @@ concurrency: jobs: test-compatibility: + if: github.repository_owner == 'quantumlib' name: Test Cirq compatibility runs-on: ubuntu-24.04 timeout-minutes: 30 From bd95d8691504be9e17aca840455604c706e2729d Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Wed, 14 Jan 2026 09:27:50 -0800 Subject: [PATCH 25/33] Add `-mbmi2` flag when `-mavx2` is used in Makefiles (#992) It looks like it's possible to use the `-mbmi2` flag when `-mavx2` is used, based on documentation and on testing locally. This was done in some Makefiles but not consistently. This PR adds the flag to Makefiles where it was missing, plus also in one BUILD file. --- pybind_interface/avx2/CMakeLists.txt | 9 +++++++- tests/Makefile | 34 ++++++++++++++++++---------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index cbd6ea2d3..b8a989d2b 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -20,8 +20,15 @@ if(WIN32) # Add /O2 to any configuration that is NOT Debug. # This prevents a conflict with /RTC1 in DEBUG builds. add_compile_options($<$>:/O2>) -else() +elseif(LINUX) add_compile_options(-mavx2 -mfma -O3 -flto=auto) + execute_process( + COMMAND bash --noprofile -c "grep -qs bmi2 /proc/cpuinfo" + RESULT_VARIABLE _EXIT_CODE + ) + if(_EXIT_CODE EQUAL 0) + add_compile_options("-mbmi2") + endif() endif() if(APPLE) diff --git a/tests/Makefile b/tests/Makefile index 6b5eee410..c02f470fd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,33 +13,34 @@ # limitations under the License. # Determine the hardware features available in this CPU. -HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true") HAVE_AVX2 := $(shell grep -qs avx2 /proc/cpuinfo && echo "true") HAVE_AVX512 := $(shell grep -qs avx512f /proc/cpuinfo && echo "true") +HAVE_BMI2 := $(shell grep -qs bmi2 /proc/cpuinfo && echo "true") +HAVE_SSE := $(shell grep -qs sse /proc/cpuinfo && echo "true") # Default targets. Always built. BASIC_FILES := $(shell ls *.cc | egrep -v '_avx|_sse') # Additional flags and targets for non-CUDA cases. -SSE_FILES = -AVX2_FILES = -AVX512_FILES = -ifneq (,$(HAVE_SSE)) - SSE_FLAGS ?= -msse4 - SSE_FILES := $(wildcard *_sse_test.cc) -endif ifneq (,$(HAVE_AVX2)) AVX2_FLAGS ?= -mavx2 -mfma AVX2_FILES := $(wildcard *_avx_test.cc) endif ifneq (,$(HAVE_AVX512)) - AVX512_FLAGS ?= -mavx512f -mbmi2 + AVX512_FLAGS ?= -mavx512f AVX512_FILES := $(wildcard *_avx512_test.cc) endif +ifneq (,$(HAVE_BMI2)) + BMI2_FLAGS ?= -mbmi2 +endif +ifneq (,$(HAVE_SSE)) + SSE_FLAGS ?= -msse4 + SSE_FILES := $(wildcard *_sse_test.cc) +endif CXX_FILES := $(BASIC_FILES) $(SSE_FILES) $(AVX2_FILES) $(AVX512_FILES) CXX_TARGETS := $(CXX_FILES:%.cc=%.x) -CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) +CXXFLAGS := $(CXXFLAGS) $(SSE_FLAGS) $(AVX2_FLAGS) $(AVX512_FLAGS) $(BMI2_FLAGS) CUDA_FILES := $(wildcard *cuda_test.cu) CUDA_TARGETS := $(CUDA_FILES:%cuda_test.cu=%cuda_test.x) @@ -125,8 +126,17 @@ clean: -rm -f ./*.x ./*.a ./*.so ./*.mod rm -rf $(GTEST_DIR)/build -LOCAL_VARS = HAVE_SSE HAVE_AVX2 HAVE_AVX512 SSE_FLAGS AVX2_FLAGS $\ - AVX512_FLAGS CXXFLAGS CXX_TARGETS TEST_FLAGS +LOCAL_VARS = BASIC_FILES CXX_FILES CXX_TARGETS CXXFLAGS $\ + CUDA_FILES CUDA_TARGETS $\ + CUSTATEVEC_FILES CUSTATEVEC_FLAGS $\ + CUSTATEVECEX_FILES CUSTATEVECEX_FLAGS $\ + HAVE_AVX2 AVX2_FILES AVX2_FLAGS $\ + HAVE_AVX512 AVX512_FILES AVX512_FLAGS $\ + HAVE_BMI2 BMI2_FLAGS $\ + HAVE_SSE SSE_FILES SSE_FLAGS $\ + HIP_FILES HIP_TARGETS $\ + GMOCK_DIR GTEST_DIR $\ + TESTFLAGS .PHONY: print-vars print-vars: ; @$(foreach n,$(sort $(LOCAL_VARS)),echo $n=$($n);) From 603288098ec33f52f1ba7214834b135fc504c127 Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Thu, 15 Jan 2026 07:21:45 -0800 Subject: [PATCH 26/33] In `setup.py`, don't limit CMake version test to only Windows (#995) The `CMakeBuild` class' `run()` method tests the version of CMake; however, it did the test only when on Windows. It seems like this is a test worth doing everywhere, so I removed test for Windows. In addition, while at it, I slightly updated the way the process output is captured, and expanded the range of exceptions tested in order to provide more specific feedback to users. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- setup.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 3b4fe7272..84adc51a8 100644 --- a/setup.py +++ b/setup.py @@ -38,21 +38,32 @@ def __init__(self, name, sourcedir=""): class CMakeBuild(build_ext): def run(self): try: - out = subprocess.check_output(["cmake", "--version"]) - except OSError: + out = subprocess.check_output(["cmake", "--version"], text=True, timeout=15) + + from packaging.version import parse + + cmake_version = parse(re.search(r"version\s*([\d.]+)", out).group(1)) + if cmake_version < parse("3.28.0"): + raise RuntimeError( + f"CMake reports its version is {cmake_version}, but qsim needs " + "version >= 3.28.0." + ) + except FileNotFoundError: raise RuntimeError( "CMake must be installed to build the following extensions: " + ", ".join(e.name for e in self.extensions) ) - - if platform.system() == "Windows": - from packaging.version import parse - - cmake_version = parse( - re.search(r"version\s*([\d.]+)", out.decode()).group(1) + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Command '{e.cmd}' returned status {e.returncode}. " + f"Output: {e.output}" + ) + except subprocess.TimeoutExpired as e: + raise RuntimeError(f"Command timed out: {e}") + except OSError as e: + raise RuntimeError( + f"An OS error occurred when trying to run 'cmake --version': {e}" ) - if cmake_version < parse("3.28.0"): - raise RuntimeError("CMake >= 3.28.0 is required on Windows") for ext in self.extensions: self.build_extension(ext) From 9d25d30023fca0a9b007e41724dbc68efcb924d4 Mon Sep 17 00:00:00 2001 From: mhucka Date: Sat, 17 Jan 2026 00:12:50 +0000 Subject: [PATCH 27/33] Do pip install requirements & group dev in one command --- .github/workflows/ci.yaml | 16 ++++------------ .github/workflows/cirq_compatibility.yml | 4 +--- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 365b316a8..53b1894f8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -79,9 +79,7 @@ jobs: run: python -m pip install --upgrade pip - name: Install dependencies - run: | - pip install -r requirements.txt - pip install --group dev + run: pip install -r requirements.txt --group dev - name: Check format continue-on-error: ${{inputs.soft-linting == 'true'}} @@ -254,9 +252,7 @@ jobs: run: python -m pip install --upgrade pip - name: Install dependencies - run: | - pip install -r requirements.txt - pip install --group dev + run: pip install -r requirements.txt --group dev - name: Set up Bazel uses: './.github/actions/set-up-bazel' @@ -347,9 +343,7 @@ jobs: run: python -m pip install --upgrade pip - name: Install dependencies - run: | - pip install -r requirements.txt - pip install --group dev + run: pip install -r requirements.txt --group dev - name: Set up Bazel uses: './.github/actions/set-up-bazel' @@ -408,9 +402,7 @@ jobs: run: python -m pip install --upgrade pip - name: Install dependencies - run: | - pip install -r requirements.txt - pip install --group dev + run: pip install -r requirements.txt --group dev - name: Set up Bazel uses: './.github/actions/set-up-bazel' diff --git a/.github/workflows/cirq_compatibility.yml b/.github/workflows/cirq_compatibility.yml index 12b396a18..41830adc6 100644 --- a/.github/workflows/cirq_compatibility.yml +++ b/.github/workflows/cirq_compatibility.yml @@ -59,9 +59,7 @@ jobs: run: pip install --upgrade cirq~=1.0.dev - name: Install qsim dev requirements - run: | - pip install -r requirements.txt - pip install --group dev + run: pip install -r requirements.txt --group dev - name: Run Python tests env: From 8f66380c21f828658299e2f06093cbf94d63e479 Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 18 Jan 2026 06:46:58 +0000 Subject: [PATCH 28/33] Move cmake & pybind from requirements.txt to pyproject.toml As pointed out by Pavol in the review comments, CMake should only be needed for building qsim, and not a run-time installation dependency. --- pyproject.toml | 33 +++++++++++++++++++-------------- requirements.txt | 9 ++------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bd1c5102c..c1f85c3ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,14 +14,13 @@ # Note: there are altogether 3 types of dependencies listed in this file: # -# [build-system].requires: the packages needed for the build system. This list -# is not stored in the package metadata. +# [build-system].requires: the packages needed by the build backend to build +# the project from source. This list is not stored in the package metadata. # -# [project].dependencies: other packages are minimally needed to be able to -# install and run qsimcirq. These are things like Cirq, NumPy, etc. Equivalent -# to "install_requires" in setuptools' setup.py. The list gets stored in the -# metadata of the package; when the project is installed by pip, this is the -# specification that is used to install its dependencies. +# [project].dependencies: core packages needed to be able to run qsimcirq. +# Equivalent to "install_requires" in setuptools' setup.py. The list is stored +# in the metadata of the package; when the project is installed by pip, this is +# the specification that is used to install its dependencies. # # [dependency-groups].dev: the development dependencies; i.e., what a # developer needs in order to run unit tests, linters, and formatters. The @@ -32,8 +31,10 @@ [build-system] build-backend = "setuptools.build_meta" requires = [ - "setuptools>=78.1.1", + "cmake~=3.28.1", + "pybind11[global]", "setuptools-scm[toml]>=6.2", + "setuptools>=78.1.1", "wheel", ] @@ -69,12 +70,10 @@ classifiers = [ "Typing :: Typed", ] keywords = [ - "algorithms", "cirq", "nisq", "quantum algorithm development", "quantum circuit simulator", - "quantum computer simulator", "quantum computing", "quantum programming", "quantum simulation", @@ -94,11 +93,17 @@ source = "https://github.com/quantumlib/qsim" [dependency-groups] # Development dependencies. Install these with "pip install --group dev". dev = [ - "black~=25.9.0", + # The following repeats [build-system].requires b/c pyproject.toml has no + # mechanism to reference that list. Keep these versions in sync with above. + "cmake~=3.28.1", + "pybind11[global]", + "setuptools>=78.1.1; python_version >= '3.12'", + + # Other build, packaging, and distribution utilities. "cibuildwheel", - # Distutils was removed from Python in 3.12. - "setuptools; python_version >= '3.12'", - "flynt~=1.0", + + # Linters, formatters, and test utilities. + "black~=25.9.0", "isort[colors]~=6.0.1", "py-cpuinfo", "pylint~=4.0.2", diff --git a/requirements.txt b/requirements.txt index a927b3363..40ff6bc94 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,11 @@ -# Installation and run-time dependencies for qsimcirq. This file is read -# by pyproject.toml. +# Run-time dependencies for qsimcirq. This file is read from pyproject.toml. +# Core dependencies: absl-py cirq-core~=1.0 numpy>=1.26.0,<2.0; python_version < '3.11' numpy>=2.0; python_version >= '3.11' -# These are needed because installing qsimcirq in some environments may require -# pip to compile Pybind for that specific platform: -cmake~=3.28.1 -pybind11[global] - # These are transitive dependencies we need to constrain to avoid unresolvable # installation conflicts due to them requiring higher Python versions: scipy<1.16; python_version < '3.11' From 238b1710627f6453ebbe7b644f405e5199d282ef Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 18 Jan 2026 06:47:12 +0000 Subject: [PATCH 29/33] Remove mention of dev-requirements.txt --- dev_tools/test_libs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev_tools/test_libs.sh b/dev_tools/test_libs.sh index 7efa58f73..16f4417d1 100755 --- a/dev_tools/test_libs.sh +++ b/dev_tools/test_libs.sh @@ -33,7 +33,7 @@ if [[ "$1" == "-h" || "$1" == "--help" || "$1" == "help" ]]; then fi if ! python -m pip show -qq py-cpuinfo 2>/dev/null; then - echo "Error: missing 'py-cpuinfo'. Please install dev-requirements.txt." >&2 + echo "Error: missing package 'py-cpuinfo'." >&2 exit 1 fi From 2f9320f53588c0107613c39f044b6c75e239394f Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 18 Jan 2026 06:49:53 +0000 Subject: [PATCH 30/33] Adjust `Dockerfile`s for changes in requirements scheme Move the installation of the dev dependencies to the top-level Dockerfile because things fail to build otherwise. --- Dockerfile | 2 +- pybind_interface/Dockerfile | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 100c68e92..1e6ae80a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,7 +52,7 @@ ENV PATH="/test_env/bin:$PATH" # Install qsim requirements. # hadolint ignore=DL3013 RUN python3 -m pip install --no-cache-dir --upgrade pip && \ - python3 -m pip install --no-cache-dir -r requirements.txt + python3 -m pip install --no-cache-dir -r requirements.txt --group dev # Compile qsim. RUN make -j qsim diff --git a/pybind_interface/Dockerfile b/pybind_interface/Dockerfile index 2b1cb4fd0..54a4ff511 100644 --- a/pybind_interface/Dockerfile +++ b/pybind_interface/Dockerfile @@ -26,9 +26,5 @@ WORKDIR /qsim/ # Build pybind code early to cache the results RUN make -j -C /qsim/ pybind -# Install Python development dependencies. -# hadolint ignore=DL3013 -RUN pip install --no-cache-dir --group dev - # Compile and run qsim tests ENTRYPOINT ["make", "-C", "/qsim/", "run-py-tests"] From e2682a97a7a02b21379378f1faaa330907273621 Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 18 Jan 2026 07:02:54 +0000 Subject: [PATCH 31/33] Don't need update pip after venv creation As noted by Pavol in https://github.com/quantumlib/qsim/pull/985/changes#r2678037600, it's not necessary to update pip when `--upgrade-deps` is used in the venv creation. --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1e6ae80a9..8a3642cb9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,9 +50,7 @@ WORKDIR /qsim/ ENV PATH="/test_env/bin:$PATH" # Install qsim requirements. -# hadolint ignore=DL3013 -RUN python3 -m pip install --no-cache-dir --upgrade pip && \ - python3 -m pip install --no-cache-dir -r requirements.txt --group dev +RUN python3 -m pip install --no-cache-dir -r requirements.txt --group dev # Compile qsim. RUN make -j qsim From 996f6f914e17c7e484cb08d53aa62937528326c4 Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 18 Jan 2026 07:12:37 +0000 Subject: [PATCH 32/33] Remove unnecessary venv activation As pointed out by Pavol in review comments, the venv activation command was pointless. --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8a3642cb9..366ffbee1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,8 +41,7 @@ COPY ./requirements.txt /qsim/requirements.txt COPY ./pyproject.toml /qsim/pyproject.toml # Create venv to avoid collision between system packages and what we install. -RUN python3 -m venv --upgrade-deps test_env && \ - . test_env/bin/activate +RUN python3 -m venv --upgrade-deps test_env WORKDIR /qsim/ From 74e6c5813626ea23f5515927dbbaa90f4e4654cb Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 18 Jan 2026 07:14:40 +0000 Subject: [PATCH 33/33] Rearrange order of commands in Dockerfile Setting the workdir makes more sense to do before creating the venv. --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 366ffbee1..87155d2f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,13 +40,13 @@ COPY ./qsimcirq_tests/ /qsim/qsimcirq_tests/ COPY ./requirements.txt /qsim/requirements.txt COPY ./pyproject.toml /qsim/pyproject.toml +WORKDIR /qsim/ + # Create venv to avoid collision between system packages and what we install. RUN python3 -m venv --upgrade-deps test_env -WORKDIR /qsim/ - # Activate venv. -ENV PATH="/test_env/bin:$PATH" +ENV PATH="/qsim/test_env/bin:$PATH" # Install qsim requirements. RUN python3 -m pip install --no-cache-dir -r requirements.txt --group dev