From 9fe6b0ed12306dc3ec26034856c9af084ddba06a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 15:39:54 +0100 Subject: [PATCH 01/19] POC: [Python] Testing scikit-build-core as build backend for PyArrow --- python/pyproject.toml | 28 +-- python/setup.py | 480 ------------------------------------------ 2 files changed, 15 insertions(+), 493 deletions(-) delete mode 100755 python/setup.py diff --git a/python/pyproject.toml b/python/pyproject.toml index 217dba81b87..ad5b5126d55 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,16 +17,14 @@ [build-system] requires = [ + "scikit-build-core", "cython >= 3.1", # Needed for build-time stub docstring extraction "libcst>=1.8.6", "numpy>=1.25", - # configuring setuptools_scm in pyproject.toml requires - # versions released after 2022 "setuptools_scm[toml]>=8", - "setuptools>=77", ] -build-backend = "setuptools.build_meta" +build-backend = "scikit_build_core.build" [project] name = "pyarrow" @@ -81,16 +79,20 @@ exclude = [ '\._.*$', ] -[tool.setuptools] -zip-safe=false -include-package-data=true +[tool.scikit-build] +cmake.build-type = "Release" +metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" +sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] +wheel.packages = ["pyarrow"] +# CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ +# (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) +wheel.install-dir = "pyarrow" -[tool.setuptools.packages.find] -include = ["pyarrow"] -namespaces = false - -[tool.setuptools.package-data] -pyarrow = ["*.pxd", "*.pyi", "*.pyx", "includes/*.pxd", "py.typed"] +[tool.scikit-build.cmake.define] +PYARROW_BUNDLE_ARROW_CPP = {env = "PYARROW_BUNDLE_ARROW_CPP", default = "OFF"} +PYARROW_BUNDLE_CYTHON_CPP = {env = "PYARROW_BUNDLE_CYTHON_CPP", default = "OFF"} +PYARROW_GENERATE_COVERAGE = {env = "PYARROW_GENERATE_COVERAGE", default = "OFF"} +PYARROW_CXXFLAGS = {env = "PYARROW_CXXFLAGS", default = ""} [tool.setuptools_scm] root = '..' diff --git a/python/setup.py b/python/setup.py deleted file mode 100755 index 4f2bf7585e1..00000000000 --- a/python/setup.py +++ /dev/null @@ -1,480 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import contextlib -import os -import os.path -from os.path import join as pjoin -import re -import shlex -import shutil -import sys -import warnings - -if sys.version_info >= (3, 10): - import sysconfig -else: - # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825) - from distutils import sysconfig - -from setuptools import setup, Extension, Distribution -from setuptools.command.sdist import sdist - -from Cython.Distutils import build_ext as _build_ext -import Cython - -# Check if we're running 64-bit Python -is_64_bit = sys.maxsize > 2**32 - -# We can't use sys.platform in a cross-compiling situation -# as here it may be set to the host not target platform -is_emscripten = ( - sysconfig.get_config_var("SOABI") - and sysconfig.get_config_var("SOABI").find("emscripten") != -1 -) - - -if Cython.__version__ < '3.1': - raise Exception( - 'Please update your Cython version. Supported Cython >= 3.1') - -setup_dir = os.path.abspath(os.path.dirname(__file__)) - -ext_suffix = sysconfig.get_config_var('EXT_SUFFIX') - - -@contextlib.contextmanager -def changed_dir(dirname): - oldcwd = os.getcwd() - os.chdir(dirname) - try: - yield - finally: - os.chdir(oldcwd) - - -def strtobool(val): - """Convert a string representation of truth to true (1) or false (0). - - True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values - are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if - 'val' is anything else. - """ - # Copied from distutils - val = val.lower() - if val in ('y', 'yes', 't', 'true', 'on', '1'): - return 1 - elif val in ('n', 'no', 'f', 'false', 'off', '0'): - return 0 - else: - raise ValueError("invalid truth value %r" % (val,)) - - -MSG_DEPR_SETUP_BUILD_FLAGS = """ - !! - - *********************************************************************** - The '{}' flag is being passed to setup.py, but this is - deprecated. - - If a certain component is available in Arrow C++, it will automatically - be enabled for the PyArrow build as well. If you want to force the - build of a certain component, you can still use the - PYARROW_WITH_$COMPONENT environment variable. - *********************************************************************** - - !! -""" - - -class build_ext(_build_ext): - _found_names = () - - def build_extensions(self): - import numpy - numpy_incl = numpy.get_include() - - self.extensions = [ext for ext in self.extensions - if ext.name != '__dummy__'] - - for ext in self.extensions: - if (hasattr(ext, 'include_dirs') and - numpy_incl not in ext.include_dirs): - ext.include_dirs.append(numpy_incl) - _build_ext.build_extensions(self) - - def run(self): - self._run_cmake() - self._update_stubs() - _build_ext.run(self) - - def _update_stubs(self): - """Copy stubs to build directory, then inject docstrings into the copies.""" - stubs_dir = pjoin(setup_dir, 'pyarrow-stubs') - if not os.path.exists(stubs_dir): - return - - build_cmd = self.get_finalized_command('build') - build_lib = os.path.abspath(build_cmd.build_lib) - - # Copy clean stubs to build directory first - self._copy_stubs(stubs_dir, build_lib) - - # Inject docstrings into the build copies (not the source stubs). - # We pass build_lib as stubs_dir since it mirrors the pyarrow-stubs/ - # directory structure (both contain a pyarrow/ subdirectory with .pyi - # files), so the namespace resolution logic works identically. - import importlib.util - spec = importlib.util.spec_from_file_location( - "update_stub_docstrings", - pjoin(setup_dir, 'scripts', 'update_stub_docstrings.py')) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - mod.add_docstrings_from_build(build_lib, build_lib) - - def _copy_stubs(self, stubs_dir, build_lib): - """Copy .pyi stub files to the build directory.""" - src_dir = pjoin(stubs_dir, 'pyarrow') - dest_dir = pjoin(build_lib, 'pyarrow') - - if not os.path.exists(src_dir): - return - - print(f"-- Copying stubs: {src_dir} -> {dest_dir}") - for root, dirs, files in os.walk(src_dir): - for fname in files: - if fname.endswith('.pyi'): - src = pjoin(root, fname) - rel_path = os.path.relpath(src, src_dir) - dest = pjoin(dest_dir, rel_path) - os.makedirs(os.path.dirname(dest), exist_ok=True) - shutil.copy2(src, dest) - - # adapted from cmake_build_ext in dynd-python - # github.com/libdynd/dynd-python - - description = "Build the C-extensions for arrow" - user_options = ([('cmake-generator=', None, 'CMake generator'), - ('extra-cmake-args=', None, 'extra arguments for CMake'), - ('build-type=', None, - 'build type (debug or release), default release'), - ('boost-namespace=', None, - 'namespace of boost (default: boost)'), - ('with-cuda', None, 'build the Cuda extension'), - ('with-flight', None, 'build the Flight extension'), - ('with-substrait', None, 'build the Substrait extension'), - ('with-acero', None, 'build the Acero Engine extension'), - ('with-dataset', None, 'build the Dataset extension'), - ('with-parquet', None, 'build the Parquet extension'), - ('with-parquet-encryption', None, - 'build the Parquet encryption extension'), - ('with-azure', None, - 'build the Azure Blob Storage extension'), - ('with-gcs', None, - 'build the Google Cloud Storage (GCS) extension'), - ('with-s3', None, 'build the Amazon S3 extension'), - ('with-static-parquet', None, 'link parquet statically'), - ('with-static-boost', None, 'link boost statically'), - ('with-orc', None, 'build the ORC extension'), - ('with-gandiva', None, 'build the Gandiva extension'), - ('generate-coverage', None, - 'enable Cython code coverage'), - ('bundle-boost', None, - 'bundle the (shared) Boost libraries'), - ('bundle-cython-cpp', None, - 'bundle generated Cython C++ code ' - '(used for code coverage)'), - ('bundle-arrow-cpp', None, - 'bundle the Arrow C++ libraries'), - ('bundle-arrow-cpp-headers', None, - 'bundle the Arrow C++ headers')] + - _build_ext.user_options) - - def initialize_options(self): - _build_ext.initialize_options(self) - self.cmake_generator = os.environ.get('PYARROW_CMAKE_GENERATOR') - if not self.cmake_generator and sys.platform == 'win32': - self.cmake_generator = 'Visual Studio 15 2017 Win64' - self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '') - self.build_type = os.environ.get('PYARROW_BUILD_TYPE', - 'release').lower() - - self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '') - - if sys.platform == 'win32': - # Cannot do debug builds in Windows unless Python itself is a debug - # build - if not hasattr(sys, 'gettotalrefcount'): - self.build_type = 'release' - - self.with_azure = None - self.with_gcs = None - self.with_s3 = None - self.with_hdfs = None - self.with_cuda = None - self.with_substrait = None - self.with_flight = None - self.with_acero = None - self.with_dataset = None - self.with_parquet = None - self.with_parquet_encryption = None - self.with_orc = None - self.with_gandiva = None - - self.generate_coverage = strtobool( - os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) - self.bundle_arrow_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0')) - self.bundle_cython_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) - - CYTHON_MODULE_NAMES = [ - 'lib', - '_fs', - '_csv', - '_json', - '_compute', - '_cuda', - '_flight', - '_dataset', - '_dataset_orc', - '_dataset_parquet', - '_acero', - '_feather', - '_parquet', - '_parquet_encryption', - '_pyarrow_cpp_tests', - '_orc', - '_azurefs', - '_gcsfs', - '_s3fs', - '_substrait', - '_hdfs', - 'gandiva'] - - def _run_cmake(self): - # check if build_type is correctly passed / set - if self.build_type.lower() not in ('release', 'debug', - 'relwithdebinfo'): - raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to " - "be 'release', 'debug' or 'relwithdebinfo'") - - # The directory containing this setup.py - source = os.path.dirname(os.path.abspath(__file__)) - - # The staging directory for the module being built - build_cmd = self.get_finalized_command('build') - saved_cwd = os.getcwd() - build_temp = pjoin(saved_cwd, build_cmd.build_temp) - build_lib = pjoin(saved_cwd, build_cmd.build_lib) - - if not os.path.isdir(build_temp): - self.mkpath(build_temp) - - if self.inplace: - # a bit hacky - build_lib = saved_cwd - - install_prefix = pjoin(build_lib, "pyarrow") - - # Change to the build directory - with changed_dir(build_temp): - # Detect if we built elsewhere - if os.path.isfile('CMakeCache.txt'): - cachefile = open('CMakeCache.txt', 'r') - cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)', - cachefile.read()).group(1) - cachefile.close() - if (cachedir != build_temp): - build_base = pjoin(saved_cwd, build_cmd.build_base) - print(f"-- Skipping build. Temp build {build_temp} does " - f"not match cached dir {cachedir}") - print("---- For a clean build you might want to delete " - f"{build_base}.") - return - - cmake_options = [ - f'-DCMAKE_INSTALL_PREFIX={install_prefix}', - f'-DPYTHON_EXECUTABLE={sys.executable}', - f'-DPython3_EXECUTABLE={sys.executable}', - f'-DPYARROW_CXXFLAGS={self.cmake_cxxflags}', - ] - - def append_cmake_bool(value, varname): - cmake_options.append(f'-D{varname}={"on" if value else "off"}') - - def append_cmake_component(flag, varname): - # only pass this to cmake if the user pass the --with-component - # flag to setup.py build_ext - if flag is not None: - flag_name = ( - "--with-" - + varname.removeprefix("PYARROW_").lower().replace("_", "-")) - warnings.warn( - MSG_DEPR_SETUP_BUILD_FLAGS.format(flag_name), - UserWarning, stacklevel=2 - ) - append_cmake_bool(flag, varname) - - if self.cmake_generator: - cmake_options += ['-G', self.cmake_generator] - - append_cmake_component(self.with_cuda, 'PYARROW_CUDA') - append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT') - append_cmake_component(self.with_flight, 'PYARROW_FLIGHT') - append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA') - append_cmake_component(self.with_acero, 'PYARROW_ACERO') - append_cmake_component(self.with_dataset, 'PYARROW_DATASET') - append_cmake_component(self.with_orc, 'PYARROW_ORC') - append_cmake_component(self.with_parquet, 'PYARROW_PARQUET') - append_cmake_component(self.with_parquet_encryption, - 'PYARROW_PARQUET_ENCRYPTION') - append_cmake_component(self.with_azure, 'PYARROW_AZURE') - append_cmake_component(self.with_gcs, 'PYARROW_GCS') - append_cmake_component(self.with_s3, 'PYARROW_S3') - append_cmake_component(self.with_hdfs, 'PYARROW_HDFS') - - append_cmake_bool(self.bundle_arrow_cpp, - 'PYARROW_BUNDLE_ARROW_CPP') - append_cmake_bool(self.bundle_cython_cpp, - 'PYARROW_BUNDLE_CYTHON_CPP') - append_cmake_bool(self.generate_coverage, - 'PYARROW_GENERATE_COVERAGE') - - cmake_options.append( - f'-DCMAKE_BUILD_TYPE={self.build_type.lower()}') - - extra_cmake_args = shlex.split(self.extra_cmake_args) - - build_tool_args = [] - if sys.platform == 'win32': - if not is_64_bit: - raise RuntimeError('Not supported on 32-bit Windows') - else: - build_tool_args.append('--') - if os.environ.get('PYARROW_BUILD_VERBOSE', '0') == '1': - cmake_options.append('-DCMAKE_VERBOSE_MAKEFILE=ON') - parallel = os.environ.get('PYARROW_PARALLEL') - if parallel: - build_tool_args.append(f'-j{parallel}') - - # Generate the build files - if is_emscripten: - print("-- Running emcmake cmake for PyArrow on Emscripten") - self.spawn(['emcmake', 'cmake'] + extra_cmake_args + - cmake_options + [source]) - else: - print("-- Running cmake for PyArrow") - self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source]) - - print("-- Finished cmake for PyArrow") - - print("-- Running cmake --build for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - build_tool_args) - print("-- Finished cmake --build for PyArrow") - - print("-- Running cmake --build --target install for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - ['--target', 'install'] + build_tool_args) - print("-- Finished cmake --build --target install for PyArrow") - - self._found_names = [] - for name in self.CYTHON_MODULE_NAMES: - built_path = pjoin(install_prefix, name + ext_suffix) - if os.path.exists(built_path): - self._found_names.append(name) - - def _get_build_dir(self): - # Get the package directory from build_py - build_py = self.get_finalized_command('build_py') - return build_py.get_package_dir('pyarrow') - - def _get_cmake_ext_path(self, name): - # This is the name of the arrow C-extension - filename = name + ext_suffix - return pjoin(self._get_build_dir(), filename) - - def get_ext_generated_cpp_source(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + ".cpp") - else: - return pjoin(name + ".cpp") - - def get_ext_built_api_header(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + "_api.h") - else: - return pjoin(name + "_api.h") - - def get_names(self): - return self._found_names - - def get_outputs(self): - # Just the C extensions - # regular_exts = _build_ext.get_outputs(self) - return [self._get_cmake_ext_path(name) - for name in self.get_names()] - - -class BinaryDistribution(Distribution): - def has_ext_modules(foo): - return True - - -class CopyLicenseSdist(sdist): - """Custom sdist command that copies license files from parent directory.""" - - def make_release_tree(self, base_dir, files): - # Call parent to do the normal work - super().make_release_tree(base_dir, files) - - # Define source (parent dir) and destination (sdist root) for license files - license_files = [ - ("LICENSE.txt", "../LICENSE.txt"), - ("NOTICE.txt", "../NOTICE.txt"), - ] - - for dest_name, src_path in license_files: - src_full = os.path.join(os.path.dirname(__file__), src_path) - dest_full = os.path.join(base_dir, dest_name) - - # Remove any existing file/symlink at destination - if os.path.exists(dest_full) or os.path.islink(dest_full): - os.unlink(dest_full) - - if not os.path.exists(src_full): - msg = f"Required license file not found: {src_full}" - raise FileNotFoundError(msg) - - shutil.copy2(src_full, dest_full) - print(f"Copied {src_path} to {dest_name} in sdist") - - -setup( - distclass=BinaryDistribution, - # Dummy extension to trigger build_ext - ext_modules=[Extension('__dummy__', sources=[])], - cmdclass={ - 'build_ext': build_ext, - 'sdist': CopyLicenseSdist, - }, -) From 234043f557e06560e5e59c7f36c163e8680e0843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 15:57:00 +0100 Subject: [PATCH 02/19] Add scikit-build-core to requirements-build.txt --- python/requirements-build.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/requirements-build.txt b/python/requirements-build.txt index c3b7aa48eb6..2e8e8e774ec 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,5 +1,5 @@ cython>=3.1 libcst>=1.8.6 numpy>=1.25 +scikit-build-core setuptools_scm>=8 -setuptools>=77 From 8c6b44a47c896c5134cc4f75256916445009e4ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 16:10:59 +0100 Subject: [PATCH 03/19] Try fixing source test tests on dev PR --- .github/workflows/dev.yml | 2 +- dev/release/02-source-test.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 59171ddcaae..d8ee21761a5 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit openssl - pip install "cython>=3.1" setuptools pytest requests setuptools-scm + pip install build "cython>=3.1" pytest requests scikit-build-core setuptools-scm - name: Run Release Test shell: bash run: | diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index 5bd7c717709..fdd1db9c607 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -64,7 +64,7 @@ def test_symbolic_links def test_python_version source Dir.chdir("#{@tag_name_no_rc}/python") do - sh("python3", "setup.py", "sdist") + sh("python", "-m", "build", "--sdist") if on_release_branch? pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz" else From c391495d9d030cedf76cb14411b8e61165809b2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 16:14:17 +0100 Subject: [PATCH 04/19] Update some more dependencies --- ci/conda_env_python.txt | 2 +- python/requirements-wheel-build.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 33ac193f86e..dd16d66b725 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -29,5 +29,5 @@ numpy>=1.16.6 pytest pytest-faulthandler s3fs>=2023.10.0 -setuptools>=77 +scikit-build-core setuptools_scm>=8 diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt index 6a2c6221243..a3ac1396772 100644 --- a/python/requirements-wheel-build.txt +++ b/python/requirements-wheel-build.txt @@ -3,6 +3,6 @@ cython>=3.1 # Needed for build-time stub docstring extraction libcst>=1.8.6 numpy>=2.0.0 +scikit-build-core setuptools_scm -setuptools>=77 wheel From d2e5d292059593e217f8d0e8eb0fb2ecf7ce9f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 16:42:58 +0100 Subject: [PATCH 05/19] Try creating a wrapper backend to just copy licenses before anything else happens --- python/_build_backend/__init__.py | 41 +++++++++++++++++++++++++++++++ python/pyproject.toml | 5 ++-- 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 python/_build_backend/__init__.py diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py new file mode 100644 index 00000000000..88519e7f34b --- /dev/null +++ b/python/_build_backend/__init__.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Build backend wrapper that copies license files from the repo root +before delegating to scikit-build-core. + +Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level above +python/. The pyproject-metadata validator requires that files listed in +``project.license-files`` exist inside the project directory, so we copy them +in before anything else happens. +""" + +import shutil +from pathlib import Path + +from scikit_build_core.build import * # noqa: F401,F403 + +_PYTHON_DIR = Path(__file__).resolve().parent.parent +_REPO_ROOT = _PYTHON_DIR.parent + +for _name in ("LICENSE.txt", "NOTICE.txt"): + _src = _REPO_ROOT / _name + _dst = _PYTHON_DIR / _name + # If file doesn't exist, example on an sdist, this is just no-op. + if _src.exists() and not _dst.exists(): + shutil.copy2(_src, _dst) diff --git a/python/pyproject.toml b/python/pyproject.toml index ad5b5126d55..9020aadebab 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,7 +24,8 @@ requires = [ "numpy>=1.25", "setuptools_scm[toml]>=8", ] -build-backend = "scikit_build_core.build" +build-backend = "_build_backend" +backend-path = ["."] [project] name = "pyarrow" @@ -82,7 +83,7 @@ exclude = [ [tool.scikit-build] cmake.build-type = "Release" metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" -sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] +sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/", "LICENSE.txt", "NOTICE.txt"] wheel.packages = ["pyarrow"] # CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ # (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) From 416478ab2501d371ec1b5d24a0844045427c5d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 17:02:23 +0100 Subject: [PATCH 06/19] Remove custom build backend and just use links --- python/.gitignore | 4 --- python/LICENSE.txt | 1 + python/NOTICE.txt | 1 + python/_build_backend/__init__.py | 41 ------------------------------- python/pyproject.toml | 5 ++-- 5 files changed, 4 insertions(+), 48 deletions(-) create mode 120000 python/LICENSE.txt create mode 120000 python/NOTICE.txt delete mode 100644 python/_build_backend/__init__.py diff --git a/python/.gitignore b/python/.gitignore index ce97ba4af62..858c983d49c 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -44,7 +44,3 @@ manylinux1/arrow nm_arrow.log visible_symbols.log -# the purpose of the custom SDist class in setup.py is to include these files -# in the sdist tarball, but we don't want to track duplicates -LICENSE.txt -NOTICE.txt diff --git a/python/LICENSE.txt b/python/LICENSE.txt new file mode 120000 index 00000000000..4ab43736a83 --- /dev/null +++ b/python/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/python/NOTICE.txt b/python/NOTICE.txt new file mode 120000 index 00000000000..eb9f24e040b --- /dev/null +++ b/python/NOTICE.txt @@ -0,0 +1 @@ +../NOTICE.txt \ No newline at end of file diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py deleted file mode 100644 index 88519e7f34b..00000000000 --- a/python/_build_backend/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -Build backend wrapper that copies license files from the repo root -before delegating to scikit-build-core. - -Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level above -python/. The pyproject-metadata validator requires that files listed in -``project.license-files`` exist inside the project directory, so we copy them -in before anything else happens. -""" - -import shutil -from pathlib import Path - -from scikit_build_core.build import * # noqa: F401,F403 - -_PYTHON_DIR = Path(__file__).resolve().parent.parent -_REPO_ROOT = _PYTHON_DIR.parent - -for _name in ("LICENSE.txt", "NOTICE.txt"): - _src = _REPO_ROOT / _name - _dst = _PYTHON_DIR / _name - # If file doesn't exist, example on an sdist, this is just no-op. - if _src.exists() and not _dst.exists(): - shutil.copy2(_src, _dst) diff --git a/python/pyproject.toml b/python/pyproject.toml index 9020aadebab..ad5b5126d55 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,8 +24,7 @@ requires = [ "numpy>=1.25", "setuptools_scm[toml]>=8", ] -build-backend = "_build_backend" -backend-path = ["."] +build-backend = "scikit_build_core.build" [project] name = "pyarrow" @@ -83,7 +82,7 @@ exclude = [ [tool.scikit-build] cmake.build-type = "Release" metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" -sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/", "LICENSE.txt", "NOTICE.txt"] +sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] wheel.packages = ["pyarrow"] # CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ # (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) From 1f537702cd47427e761f55ab00705e4e363a00c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 13 Feb 2026 10:20:05 +0100 Subject: [PATCH 07/19] Fix populating UPPERCASE_PYBUILD_TYPE for multi-config generators where CMAKE_BUILD_TYPE isn't populated --- python/CMakeLists.txt | 16 +++++++++++++++- python/pyproject.toml | 2 -- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index f99225284a8..31ce2f149ea 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -352,7 +352,21 @@ set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) # Write out compile-time configuration constants -string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_PYBUILD_TYPE) +if(CMAKE_BUILD_TYPE) + string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_PYBUILD_TYPE) +else() + # For multi-config generators (XCode and Visual Studio), + # CMAKE_BUILD_TYPE is not set at configure time. + # scikit-build-core does the right thing with cmake.build-type and + # adds the corresponding --config but does not populate CMAKE_BUILD_TYPE + # for those. On this specific case, we set the default to "RELEASE" + # as it's the most common build type for users building from source. + # This is mainly relevant for our Windows wheels, which are built with + # Visual Studio and thus use a multi-config generator with Release. + # As a note this is only to populate config_internal.h.cmake. + set(UPPERCASE_PYBUILD_TYPE "RELEASE") +endif() + configure_file("${PYARROW_CPP_SOURCE_DIR}/config_internal.h.cmake" "${PYARROW_CPP_SOURCE_DIR}/config_internal.h" ESCAPE_QUOTES) diff --git a/python/pyproject.toml b/python/pyproject.toml index ad5b5126d55..0d5e04843ad 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -84,8 +84,6 @@ cmake.build-type = "Release" metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] wheel.packages = ["pyarrow"] -# CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ -# (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) wheel.install-dir = "pyarrow" [tool.scikit-build.cmake.define] From 79b2b639288f298d958185a3856f4606149fc9a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 13 Feb 2026 14:51:52 +0100 Subject: [PATCH 08/19] Add some logging --- .env | 4 ++-- ci/scripts/python_wheel_windows_build.bat | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 6985926772d..d00eefdb8df 100644 --- a/.env +++ b/.env @@ -99,8 +99,8 @@ VCPKG="66c0373dc7fca549e5803087b9487edfe3aca0a1" # 2026.01.16 Release # ci/docker/python-*-windows-*.dockerfile or the vcpkg config. # This is a workaround for our CI problem that "archery docker build" doesn't # use pulled built images in dev/tasks/python-wheels/github.windows.yml. -PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-07 -PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-07 +PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-13 +PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-13 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan". # See https://github.com/conan-io/conan-docker-tools#readme and diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index e10766ef37e..1e20d54bf16 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -136,6 +136,10 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel +%PYTHON_CMD% -c "import scikit_build_core; print(scikit_build_core.__version__)" +%PYTHON_CMD% -c "import sys; print(sys.executable); print(sys.path)" +%PYTHON_CMD% -m pip list +%PYTHON_CMD% -m pip show scikit-build-core %PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1 @REM Repair the wheel with delvewheel From 9646fc898348a67f1aac24fc84f3d0f2de4f4352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 16 Feb 2026 12:10:17 +0100 Subject: [PATCH 09/19] Remove unnecessary logging --- ci/scripts/python_wheel_windows_build.bat | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 1e20d54bf16..e10766ef37e 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -136,10 +136,6 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -c "import scikit_build_core; print(scikit_build_core.__version__)" -%PYTHON_CMD% -c "import sys; print(sys.executable); print(sys.path)" -%PYTHON_CMD% -m pip list -%PYTHON_CMD% -m pip show scikit-build-core %PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1 @REM Repair the wheel with delvewheel From d8999bb00619018ce476794da8e2df11b143511b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 16 Feb 2026 12:17:53 +0100 Subject: [PATCH 10/19] Fix sdist --- ci/scripts/python_sdist_build.sh | 2 +- python/requirements-build.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/scripts/python_sdist_build.sh b/ci/scripts/python_sdist_build.sh index dfb99518431..4f7437c423d 100755 --- a/ci/scripts/python_sdist_build.sh +++ b/ci/scripts/python_sdist_build.sh @@ -23,5 +23,5 @@ source_dir=${1}/python pushd "${source_dir}" export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-} -${PYTHON:-python} setup.py sdist +${PYTHON:-python} -m build --sdist popd diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 2e8e8e774ec..fdd3e68a1b1 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,3 +1,4 @@ +build cython>=3.1 libcst>=1.8.6 numpy>=1.25 From 23ffa93e7a8daa377e6ac91a369defd14076dfb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 17 Feb 2026 11:55:22 +0100 Subject: [PATCH 11/19] Add tiny build backend wrapper on top of scikit-build-core to manage licenses --- python/_build_backend/__init__.py | 66 +++++++++++++++++++++++++++++++ python/pyproject.toml | 5 ++- 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 python/_build_backend/__init__.py diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py new file mode 100644 index 00000000000..d074f1699ac --- /dev/null +++ b/python/_build_backend/__init__.py @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Build backend wrapper that resolves license symlinks before delegating +to scikit-build-core. + +Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level +above python/. They are symlinked into python/ so that license-files in +pyproject.toml can reference them otherwise project metadata fails validation. +This is done before any build backend is invoked that's why symlinks are necessary. +But when building sdist tarballs symlinks are not copied and we end up with +broken LICENSE.txt and NOTICE.txt. + +This custom build backend only replace the symlinks with hardlinks +before scikit_build_core.build.build_sdist so +that sdist contains the actual file content. The symlinks are restored +afterwards so the git working tree stays clean. +""" + +import os +from contextlib import contextmanager +from pathlib import Path + +from scikit_build_core.build import build_sdist as scikit_build_sdist + +LICENSE_FILES = ("LICENSE.txt", "NOTICE.txt") +PYTHON_DIR = Path(__file__).resolve().parent.parent + + +@contextmanager +def hardlink_licenses(): + # Temporarily replace symlinks with hardlinks so sdist gets real content + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + if filepath.is_symlink(): + target = filepath.resolve() + filepath.unlink() + os.link(target, filepath) + try: + yield + finally: + # Copy back the original symlinks so git status is clean + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + filepath.unlink() + os.symlink(f"../{name}", filepath) + + +def build_sdist(sdist_directory, config_settings=None): + with hardlink_licenses(): + return scikit_build_sdist(sdist_directory, config_settings) diff --git a/python/pyproject.toml b/python/pyproject.toml index 0d5e04843ad..14aa37ed045 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,7 +24,10 @@ requires = [ "numpy>=1.25", "setuptools_scm[toml]>=8", ] -build-backend = "scikit_build_core.build" +# We use a really simple build backend wrapper over scikit-build-core +# to solve licenses to work around links not being included in sdists. +build-backend = "_build_backend" +backend-path = ["."] [project] name = "pyarrow" From ce6e4c312b69b8ba48ff8c480822449d7265c5b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 17 Feb 2026 12:28:25 +0100 Subject: [PATCH 12/19] Import all scikit_build_core.build hooks to be available like build_wheel and others --- python/_build_backend/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index d074f1699ac..043ea394531 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -36,6 +36,7 @@ from contextlib import contextmanager from pathlib import Path +from scikit_build_core.build import * # noqa: F401,F403 from scikit_build_core.build import build_sdist as scikit_build_sdist LICENSE_FILES = ("LICENSE.txt", "NOTICE.txt") From 5c96a75d69c79885dda9c55f43345629ea9bafc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 17 Feb 2026 19:07:56 +0100 Subject: [PATCH 13/19] Try to solve Windows license problems with links --- python/_build_backend/__init__.py | 40 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index 043ea394531..e9a6ead8aec 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -32,9 +32,11 @@ afterwards so the git working tree stays clean. """ -import os from contextlib import contextmanager +import os from pathlib import Path +import shutil +import sys from scikit_build_core.build import * # noqa: F401,F403 from scikit_build_core.build import build_sdist as scikit_build_sdist @@ -44,24 +46,34 @@ @contextmanager -def hardlink_licenses(): - # Temporarily replace symlinks with hardlinks so sdist gets real content +def prepare_licenses(): + # Temporarily replace symlinks with hardlinks so sdist gets real content. + # On Windows we just copy the files since hardlinks might not be supported. for name in LICENSE_FILES: - filepath = PYTHON_DIR / name - if filepath.is_symlink(): - target = filepath.resolve() - filepath.unlink() - os.link(target, filepath) + parent_license = PYTHON_DIR.parent / name + pyarrow_license = PYTHON_DIR / name + if sys.platform == "win32": + # For Windows copy the files. + pyarrow_license.unlink(missing_ok=True) + shutil.copy2(parent_license, pyarrow_license) + else: + # For Unix-like systems we replace the symlink with + # a hardlink to avoid copying the file content. + if pyarrow_license.is_symlink(): + target = pyarrow_license.resolve() + pyarrow_license.unlink() + os.link(target, pyarrow_license) try: yield finally: - # Copy back the original symlinks so git status is clean - for name in LICENSE_FILES: - filepath = PYTHON_DIR / name - filepath.unlink() - os.symlink(f"../{name}", filepath) + if sys.platform != "win32": + # Copy back the original symlinks so git status is clean. + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + filepath.unlink() + os.symlink(f"../{name}", filepath) def build_sdist(sdist_directory, config_settings=None): - with hardlink_licenses(): + with prepare_licenses(): return scikit_build_sdist(sdist_directory, config_settings) From b44b5bb4c5b5f227b530eddbca0bed0baf966af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 12:42:23 +0100 Subject: [PATCH 14/19] Some fixes on documentation, remove remnant setup.py executions and remove usage of PYARROW_CMAKE_GENERATOR --- ci/scripts/python_build.bat | 1 - ci/scripts/python_build.sh | 4 +--- ci/scripts/python_sdist_test.sh | 2 +- ci/scripts/python_wheel_macos_build.sh | 2 +- ci/scripts/python_wheel_windows_build.bat | 1 - ci/scripts/python_wheel_xlinux_build.sh | 2 +- dev/release/verify-release-candidate.bat | 6 +++-- docs/source/developers/documentation.rst | 3 ++- .../guide/step_by_step/building.rst | 2 +- docs/source/developers/python/building.rst | 24 +++++++++---------- python/.gitignore | 4 ++-- python/examples/minimal_build/build_conda.sh | 2 +- python/examples/minimal_build/build_venv.sh | 2 +- 13 files changed, 27 insertions(+), 28 deletions(-) diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index 06f5a637223..fbd44a1c4c2 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -114,7 +114,6 @@ echo "=== Building Python ===" set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% set PYARROW_WITH_DATASET=%ARROW_DATASET% diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index 36dc35a2de8..bc606e3b60a 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -59,7 +59,7 @@ if [ -n "${CONDA_PREFIX}" ]; then conda list fi -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF} @@ -90,8 +90,6 @@ export DYLD_LIBRARY_PATH=${ARROW_HOME}/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_P rm -rf "${python_build_dir}" cp -aL "${source_dir}" "${python_build_dir}" pushd "${python_build_dir}" -# - Cannot call setup.py as it may install in the wrong directory -# on Debian/Ubuntu (ARROW-15243). # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. ${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index 98a938d970a..26ed2f417d5 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -25,7 +25,7 @@ export ARROW_SOURCE_DIR=${arrow_dir} export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 0990a842e94..6d4fc180de6 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -145,7 +145,7 @@ popd echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} -export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index e10766ef37e..08ba4df7ad7 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -116,7 +116,6 @@ echo "=== (%PYTHON%) Building wheel ===" set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% set PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index ceebbc5ad01..aea7bda8750 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -147,7 +147,7 @@ check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} -export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index 50dfc06e698..c69dab58e76 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -132,15 +132,17 @@ popd @rem Build and import pyarrow pushd !ARROW_SOURCE!\python +pip install build || exit /B 1 pip install -r requirements-test.txt || exit /B 1 -set PYARROW_CMAKE_GENERATOR=%GENERATOR% +set CMAKE_GENERATOR=%GENERATOR% set PYARROW_WITH_FLIGHT=1 set PYARROW_WITH_PARQUET=1 set PYARROW_WITH_PARQUET_ENCRYPTION=1 set PYARROW_WITH_DATASET=1 set PYARROW_TEST_CYTHON=OFF -python setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel || exit /B 1 +set PYARROW_BUNDLE_ARROW_CPP=ON +python -m build --sdist --wheel . --no-isolation || exit /B 1 pytest pyarrow -v -s --enable-parquet || exit /B 1 popd diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst index 5f0ebbdb7db..3cdb2f23b00 100644 --- a/docs/source/developers/documentation.rst +++ b/docs/source/developers/documentation.rst @@ -71,8 +71,9 @@ These two steps are mandatory and must be executed in order. this step requires that ``pyarrow`` library is installed in your python environment. One way to accomplish this is to follow the build instructions at :ref:`python-development` - and then run ``python setup.py install`` in arrow/python + and then run ``pip install --no-build-isolation .`` in arrow/python (it is best to do this in a dedicated conda/virtual environment). + Add ``-vv`` to the pip command to get output of the build process. You can still build the documentation without ``pyarrow`` library installed but note that Python part of the documentation diff --git a/docs/source/developers/guide/step_by_step/building.rst b/docs/source/developers/guide/step_by_step/building.rst index 5317cf06c74..7b4f42a04f5 100644 --- a/docs/source/developers/guide/step_by_step/building.rst +++ b/docs/source/developers/guide/step_by_step/building.rst @@ -155,7 +155,7 @@ Building other Arrow libraries .. code:: console - $ python setup.py build_ext --inplace + $ pip install --no-build-isolation -vv . **Recompiling C++** diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index deb6076e44a..539d2f93f45 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -391,7 +391,7 @@ To build PyArrow run: .. code-block:: $ pushd arrow/python - $ python setup.py build_ext --inplace + $ pip install --no-build-isolation -vv . $ popd .. tab-item:: Windows @@ -400,7 +400,7 @@ To build PyArrow run: .. code-block:: $ pushd arrow\python - $ python setup.py build_ext --inplace + $ pip install --no-build-isolation -vv . $ popd .. note:: @@ -428,8 +428,8 @@ To build PyArrow run: .. code-block:: - $ set PYARROW_BUNDLE_ARROW_CPP=1 - $ python setup.py build_ext --inplace + $ set PYARROW_BUNDLE_ARROW_CPP=ON + $ pip install --no-build-isolation -vv . Note that bundled Arrow C++ libraries will not be automatically updated when rebuilding Arrow C++. @@ -444,9 +444,9 @@ artifacts before rebuilding. See :ref:`python-dev-env-variables`. By default, PyArrow will be built in release mode even if Arrow C++ has been built in debug mode. To create a debug build of PyArrow, run -``export PYARROW_BUILD_TYPE=debug`` prior to running ``python setup.py -build_ext --inplace`` above. A ``relwithdebinfo`` build can be created -similarly. +``export PYARROW_BUILD_TYPE=debug`` prior to running +``pip install --no-build-isolation -vv .`` above. +A ``relwithdebinfo`` build can be created similarly. Self-Contained Wheel ^^^^^^^^^^^^^^^^^^^^ @@ -457,13 +457,13 @@ libraries). This ensures that all necessary native libraries are bundled inside the wheel, so users can install it without needing to have Arrow or Parquet installed separately on their system. -To do this, pass the ``--bundle-arrow-cpp`` option to the build command: +To do this, set the ``PYARROW_BUNDLE_ARROW_CPP`` environment variable before building ``pyarrow``: .. code-block:: - $ pip install wheel # if not installed - $ python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \ - --bundle-arrow-cpp bdist_wheel + $ set PYARROW_BUNDLE_ARROW_CPP=ON + $ pip install build wheel # if not installed + $ python -m build --sdist --wheel . --no-isolation This option is typically only needed for releases or distribution scenarios, not for local development. @@ -558,7 +558,7 @@ PyArrow are: * - ``PYARROW_BUILD_TYPE`` - Build type for PyArrow (release, debug or relwithdebinfo), sets ``CMAKE_BUILD_TYPE`` - ``release`` - * - ``PYARROW_CMAKE_GENERATOR`` + * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` * - ``PYARROW_CMAKE_OPTIONS`` diff --git a/python/.gitignore b/python/.gitignore index 858c983d49c..5ec5fdf0120 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -24,9 +24,9 @@ cython_debug # Bundled headers pyarrow/include -# setup.py working directory +# build directory build -# setup.py dist directory +# dist directory dist # Coverage .coverage diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh index 0b731638cd9..1855869cff1 100755 --- a/python/examples/minimal_build/build_conda.sh +++ b/python/examples/minimal_build/build_conda.sh @@ -95,7 +95,7 @@ rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} export PYARROW_BUILD_TYPE=Debug -export PYARROW_CMAKE_GENERATOR=Ninja +export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh python -m pip install --no-deps --no-build-isolation -vv . diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh index f462c4e9b9d..8b6fa925e39 100755 --- a/python/examples/minimal_build/build_venv.sh +++ b/python/examples/minimal_build/build_venv.sh @@ -67,7 +67,7 @@ rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} export PYARROW_BUILD_TYPE=Debug -export PYARROW_CMAKE_GENERATOR=Ninja +export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh python -m pip install --no-deps --no-build-isolation -vv . From 677f4ca007abbb74c5286de1e7ac52680d07858b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 12:43:53 +0100 Subject: [PATCH 15/19] Ooops unsaved files --- ci/scripts/python_wheel_macos_build.sh | 1 - ci/scripts/python_wheel_xlinux_build.sh | 1 - 2 files changed, 2 deletions(-) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 6d4fc180de6..fac0be4fd29 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -146,7 +146,6 @@ popd echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} export PYARROW_WITH_DATASET=${ARROW_DATASET} diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index aea7bda8750..1c1a546c734 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -148,7 +148,6 @@ check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} From d4b90559a06be2c6ad9ef0b5949c3a8bddd195ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 13:02:30 +0100 Subject: [PATCH 16/19] Remove obsolete PYARROW_BUILD_TYPE. Use and document new --config-settings cmake.build-type usage --- ci/scripts/python_build.bat | 1 - ci/scripts/python_build.sh | 4 +--- ci/scripts/python_sdist_test.sh | 3 +-- ci/scripts/python_wheel_macos_build.sh | 3 +-- ci/scripts/python_wheel_windows_build.bat | 3 +-- ci/scripts/python_wheel_xlinux_build.sh | 3 +-- docs/source/developers/python/building.rst | 8 ++------ docs/source/developers/python/development.rst | 2 +- python/examples/minimal_build/build_conda.sh | 3 +-- python/examples/minimal_build/build_venv.sh | 3 +-- 10 files changed, 10 insertions(+), 23 deletions(-) diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index fbd44a1c4c2..bf462fce727 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -111,7 +111,6 @@ echo "=== CCACHE Stats after build ===" ccache -sv echo "=== Building Python ===" -set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_WITH_ACERO=%ARROW_ACERO% diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index bc606e3b60a..7cadf6ca19a 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -60,8 +60,6 @@ if [ -n "${CONDA_PREFIX}" ]; then fi export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} - export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} @@ -92,7 +90,7 @@ cp -aL "${source_dir}" "${python_build_dir}" pushd "${python_build_dir}" # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. -${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . +${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" . popd if [ "${BUILD_DOCS_PYTHON}" == "ON" ]; then diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index 26ed2f417d5..52023ff5e7e 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -26,7 +26,6 @@ export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} export PYARROW_WITH_S3=${ARROW_S3:-OFF} @@ -64,7 +63,7 @@ if [ -n "${ARROW_PYTHON_VENV:-}" ]; then . "${ARROW_PYTHON_VENV}/bin/activate" fi -${PYTHON:-python} -m pip install "${sdist}" +${PYTHON:-python} -m pip install --config-settings cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" "${sdist}" # shellcheck disable=SC2086 pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index fac0be4fd29..786e0ecaeeb 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -144,7 +144,6 @@ cmake --build . --target install popd echo "=== (${PYTHON_VERSION}) Building wheel ===" -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} @@ -166,7 +165,7 @@ export CMAKE_PREFIX_PATH=${build_dir}/install export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python -python -m build --sdist --wheel . --no-isolation +python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} popd echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 08ba4df7ad7..11d3e87acf8 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -113,7 +113,6 @@ cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B 1 popd echo "=== (%PYTHON%) Building wheel ===" -set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" @@ -135,7 +134,7 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1 +%PYTHON_CMD% -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% || exit /B 1 @REM Repair the wheel with delvewheel @REM diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index 1c1a546c734..2cbc855cac1 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -146,7 +146,6 @@ popd check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} @@ -166,7 +165,7 @@ export ARROW_HOME=/tmp/arrow-dist export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python -python -m build --sdist --wheel . --no-isolation +python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} echo "=== Strip symbols from wheel ===" mkdir -p dist/temp-fix-wheel diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 539d2f93f45..2cb0750f9f6 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -444,8 +444,7 @@ artifacts before rebuilding. See :ref:`python-dev-env-variables`. By default, PyArrow will be built in release mode even if Arrow C++ has been built in debug mode. To create a debug build of PyArrow, run -``export PYARROW_BUILD_TYPE=debug`` prior to running -``pip install --no-build-isolation -vv .`` above. +``pip install --no-build-isolation -vv --config-settings cmake.build-type=Debug .``. A ``relwithdebinfo`` build can be created similarly. Self-Contained Wheel @@ -461,7 +460,7 @@ To do this, set the ``PYARROW_BUNDLE_ARROW_CPP`` environment variable before bui .. code-block:: - $ set PYARROW_BUNDLE_ARROW_CPP=ON + $ export PYARROW_BUNDLE_ARROW_CPP=ON $ pip install build wheel # if not installed $ python -m build --sdist --wheel . --no-isolation @@ -555,9 +554,6 @@ PyArrow are: * - PyArrow environment variable - Description - Default value - * - ``PYARROW_BUILD_TYPE`` - - Build type for PyArrow (release, debug or relwithdebinfo), sets ``CMAKE_BUILD_TYPE`` - - ``release`` * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index 5529ad25a29..857358a6c31 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -222,7 +222,7 @@ Debug build Since PyArrow depends on the Arrow C++ libraries, debugging can frequently involve crossing between Python and C++ shared libraries. For the best experience, make sure you've built both Arrow C++ -(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow (``export PYARROW_BUILD_TYPE=debug``) +(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow (``--config-settings cmake.build-type=Debug``) in debug mode. Using gdb on Linux diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh index 1855869cff1..1565396df3d 100755 --- a/python/examples/minimal_build/build_conda.sh +++ b/python/examples/minimal_build/build_conda.sh @@ -94,11 +94,10 @@ pushd $ARROW_ROOT/python rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} -export PYARROW_BUILD_TYPE=Debug export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh -python -m pip install --no-deps --no-build-isolation -vv . +python -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type=Debug . popd pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh index 8b6fa925e39..d84ca6d55ab 100755 --- a/python/examples/minimal_build/build_venv.sh +++ b/python/examples/minimal_build/build_venv.sh @@ -66,11 +66,10 @@ pushd $ARROW_ROOT/python rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} -export PYARROW_BUILD_TYPE=Debug export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh -python -m pip install --no-deps --no-build-isolation -vv . +python -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type=Debug . popd From f0ccac0264169542ae89a248d27eef9fd0f5b7b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 13:30:38 +0100 Subject: [PATCH 17/19] Remove obsolete PYARROW_CMAKE_OPTIONS and document new --config-settings cmake.args usage --- ci/scripts/python_wheel_macos_build.sh | 6 ++++-- ci/scripts/python_wheel_windows_build.bat | 5 +++-- ci/scripts/python_wheel_xlinux_build.sh | 5 +++-- docs/source/developers/python/building.rst | 8 +++++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 786e0ecaeeb..493c2e9595e 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -157,7 +157,6 @@ export PYARROW_WITH_PARQUET=${ARROW_PARQUET} export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT} export PYARROW_WITH_S3=${ARROW_S3} -export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" export ARROW_HOME=${build_dir}/install # PyArrow build configuration export CMAKE_PREFIX_PATH=${build_dir}/install @@ -165,7 +164,10 @@ export CMAKE_PREFIX_PATH=${build_dir}/install export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python -python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} +python -m build --sdist --wheel . --no-isolation \ + --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-settings cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ + --config-settings cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" popd echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 11d3e87acf8..ab9bbcc325f 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -115,7 +115,6 @@ popd echo "=== (%PYTHON%) Building wheel ===" set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% set PYARROW_WITH_DATASET=%ARROW_DATASET% @@ -134,7 +133,9 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% || exit /B 1 +%PYTHON_CMD% -m build --sdist --wheel . --no-isolation ^ + --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% ^ + --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 @REM Repair the wheel with delvewheel @REM diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index 2cbc855cac1..c5dbcdc618b 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -147,7 +147,6 @@ check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} export PYARROW_WITH_DATASET=${ARROW_DATASET} @@ -165,7 +164,9 @@ export ARROW_HOME=/tmp/arrow-dist export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python -python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} +python -m build --sdist --wheel . --no-isolation \ + --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" echo "=== Strip symbols from wheel ===" mkdir -p dist/temp-fix-wheel diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 2cb0750f9f6..f1f7f01cfd1 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -557,9 +557,6 @@ PyArrow are: * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` - * - ``PYARROW_CMAKE_OPTIONS`` - - Extra CMake and Arrow options (ex. ``"-DARROW_SIMD_LEVEL=NONE -DCMAKE_OSX_ARCHITECTURES=x86_64;arm64"``) - - ``''`` * - ``PYARROW_CXXFLAGS`` - Extra C++ compiler flags - ``''`` @@ -579,6 +576,11 @@ PyArrow are: - Number of processes used to compile PyArrow’s C++/Cython components - ``''`` +For extra CMake arguments you can use the ``--config-settings cmake.args=`` +argument when building PyArrow. For example, to build a version of PyArrow +with ``ARROW_SIMD_LEVEL=NONE``, you can run +``pip install --no-build-isolation -vv --config-settings cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. + The components being disabled or enabled when building PyArrow is by default based on how Arrow C++ is build (i.e. it follows the ``ARROW_$COMPONENT`` flags). However, the ``PYARROW_WITH_$COMPONENT`` environment variables can still be used From 1110dbe66a20668a4b0d4d9257e87963695228c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 13:59:39 +0100 Subject: [PATCH 18/19] Remove PYARROW_BUILD_VERBOSE and PYARROW_PARALLEL, document new usages and update usage. Also document --config-settings cmake.build-type --- ci/scripts/python_build.bat | 3 +- ci/scripts/python_build.sh | 3 +- ci/scripts/python_wheel_macos_build.sh | 1 + ci/scripts/python_wheel_windows_build.bat | 4 +-- ci/scripts/python_wheel_xlinux_build.sh | 1 + dev/release/verify-release-candidate.sh | 2 +- dev/tasks/python-wheels/github.osx.yml | 1 - docs/source/developers/python/building.rst | 32 ++++++++++++++-------- 8 files changed, 28 insertions(+), 19 deletions(-) diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index bf462fce727..dd59b3008d6 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -111,7 +111,6 @@ echo "=== CCACHE Stats after build ===" ccache -sv echo "=== Building Python ===" -set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% @@ -135,6 +134,6 @@ pushd %SOURCE_DIR%\python %PYTHON_CMD% -m pip install -r requirements-build.txt || exit /B 1 @REM Build PyArrow -%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv . || exit /B 1 +%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv --config-settings build.verbose=true . || exit /B 1 popd diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index 7cadf6ca19a..3da5b2f0934 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -59,6 +59,7 @@ if [ -n "${CONDA_PREFIX}" ]; then conda list fi +export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} @@ -74,8 +75,6 @@ export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} export PYARROW_WITH_S3=${ARROW_S3:-OFF} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} -export PYARROW_PARALLEL=${n_jobs} - : "${CMAKE_PREFIX_PATH:=${ARROW_HOME}}" export CMAKE_PREFIX_PATH export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 493c2e9595e..d2d230201a4 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -165,6 +165,7 @@ export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python python -m build --sdist --wheel . --no-isolation \ + --config-settings build.verbose=true \ --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ --config-settings cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ --config-settings cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index ab9bbcc325f..610e7393881 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -113,7 +113,6 @@ cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B 1 popd echo "=== (%PYTHON%) Building wheel ===" -set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% @@ -133,7 +132,8 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -m build --sdist --wheel . --no-isolation ^ +%PYTHON_CMD% -m build --sdist --wheel . --no-isolation -vv ^ + --config-settings build.verbose=true ^ --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% ^ --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index c5dbcdc618b..edfed2905fa 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -165,6 +165,7 @@ export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python python -m build --sdist --wheel . --no-isolation \ + --config-settings build.verbose=true \ --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index e6def2ca190..a65ec853c7d 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -543,7 +543,7 @@ test_python() { CMAKE_PREFIX_PATH="${CONDA_BACKUP_CMAKE_PREFIX_PATH}:${CMAKE_PREFIX_PATH}" fi - export PYARROW_PARALLEL=$NPROC + export CMAKE_BUILD_PARALLEL_LEVEL=$NPROC export PYARROW_WITH_DATASET=1 export PYARROW_WITH_HDFS=1 export PYARROW_WITH_ORC=1 diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml index fb57f131ad1..2232f728ad3 100644 --- a/dev/tasks/python-wheels/github.osx.yml +++ b/dev/tasks/python-wheels/github.osx.yml @@ -23,7 +23,6 @@ CMAKE_BUILD_TYPE: release CXX: "clang++" MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}" - PYARROW_BUILD_VERBOSE: 1 PYARROW_VERSION: "{{ arrow.no_rc_version }}" PYTHON_VERSION: "{{ python_version }}" PYTHON_ABI_TAG: "{{ python_abi_tag }}" diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index f1f7f01cfd1..61cd4251de1 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -349,7 +349,7 @@ Optional build components There are several optional components that can be enabled or disabled by setting specific flags to ``ON`` or ``OFF``, respectively. See the list of -:ref:`python-dev-env-variables` below. +:ref:`python-dev-components` below. You may choose between different kinds of C++ build types: @@ -378,7 +378,7 @@ Build PyArrow If you did build one of the optional components in C++, the equivalent components will be enabled by default for building pyarrow. This default can be overridden by setting the corresponding ``PYARROW_WITH_$COMPONENT`` environment variable -to 0 or 1, see :ref:`python-dev-env-variables` below. +to 0 or 1, see :ref:`python-dev-components` below. To build PyArrow run: @@ -435,7 +435,7 @@ To build PyArrow run: updated when rebuilding Arrow C++. To set the number of threads used to compile PyArrow's C++/Cython components, -set the ``PYARROW_PARALLEL`` environment variable. +set the ``CMAKE_BUILD_PARALLEL_LEVEL`` environment variable. If you build PyArrow but then make changes to the Arrow C++ or PyArrow code, you can end up with stale build artifacts. This can lead to @@ -541,8 +541,8 @@ described in development section. .. _python-dev-env-variables: -Relevant components and environment variables -============================================= +Relevant environment variables and build options +================================================ List of relevant environment variables that can be used to build PyArrow are: @@ -554,6 +554,9 @@ PyArrow are: * - PyArrow environment variable - Description - Default value + * - ``CMAKE_BUILD_PARALLEL_LEVEL`` + - Number of processes used to compile PyArrow’s C++/Cython components + - ``''`` * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` @@ -569,18 +572,25 @@ PyArrow are: * - ``PYARROW_BUNDLE_CYTHON_CPP`` - Bundle the C++ files generated by Cython - ``0`` (``OFF``) - * - ``PYARROW_BUILD_VERBOSE`` - - Enable verbose output from Makefile builds - - ``0`` (``OFF``) - * - ``PYARROW_PARALLEL`` - - Number of processes used to compile PyArrow’s C++/Cython components - - ``''`` + +To set the build type (e.g. ``Debug``, ``Release``, ``RelWithDebInfo``), pass +``--config-settings cmake.build-type=Debug`` to the ``pip install`` or +``python -m build`` command. For extra CMake arguments you can use the ``--config-settings cmake.args=`` argument when building PyArrow. For example, to build a version of PyArrow with ``ARROW_SIMD_LEVEL=NONE``, you can run ``pip install --no-build-isolation -vv --config-settings cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. +To enable verbose output from the build tool, pass +``--config-settings build.verbose=true`` to the ``pip install`` or +``python -m build`` command. + +.. _python-dev-components: + +Relevant components +=================== + The components being disabled or enabled when building PyArrow is by default based on how Arrow C++ is build (i.e. it follows the ``ARROW_$COMPONENT`` flags). However, the ``PYARROW_WITH_$COMPONENT`` environment variables can still be used From 75d2f7f56e59fdd3a3b8b599962c5674c9bc89de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 14:58:27 +0100 Subject: [PATCH 19/19] --config-settings or --config-setting depending on pip or build --- ci/scripts/python_wheel_macos_build.sh | 8 ++++---- ci/scripts/python_wheel_windows_build.bat | 6 +++--- ci/scripts/python_wheel_xlinux_build.sh | 6 +++--- docs/source/developers/python/building.rst | 11 +++++++---- docs/source/developers/python/development.rst | 3 ++- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index d2d230201a4..04c8d74117b 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -165,10 +165,10 @@ export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python python -m build --sdist --wheel . --no-isolation \ - --config-settings build.verbose=true \ - --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ - --config-settings cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ - --config-settings cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" + --config-setting build.verbose=true \ + --config-setting cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-setting cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ + --config-setting cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" popd echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 610e7393881..a08a31c649d 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -133,9 +133,9 @@ pushd C:\arrow\python @REM Build wheel %PYTHON_CMD% -m build --sdist --wheel . --no-isolation -vv ^ - --config-settings build.verbose=true ^ - --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% ^ - --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 + --config-setting build.verbose=true ^ + --config-setting cmake.build-type=%CMAKE_BUILD_TYPE% ^ + --config-setting cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 @REM Repair the wheel with delvewheel @REM diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index edfed2905fa..14b73658ea1 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -165,9 +165,9 @@ export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python python -m build --sdist --wheel . --no-isolation \ - --config-settings build.verbose=true \ - --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ - --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" + --config-setting build.verbose=true \ + --config-setting cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-setting cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" echo "=== Strip symbols from wheel ===" mkdir -p dist/temp-fix-wheel diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 61cd4251de1..58ba5349c65 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -573,9 +573,12 @@ PyArrow are: - Bundle the C++ files generated by Cython - ``0`` (``OFF``) +Note that ``pip install`` uses ``--config-settings`` (plural) while +``python -m build`` uses ``--config-setting`` (singular). + To set the build type (e.g. ``Debug``, ``Release``, ``RelWithDebInfo``), pass -``--config-settings cmake.build-type=Debug`` to the ``pip install`` or -``python -m build`` command. +``--config-settings cmake.build-type=Debug`` to ``pip install`` or +``--config-setting cmake.build-type=Debug`` to ``python -m build``. For extra CMake arguments you can use the ``--config-settings cmake.args=`` argument when building PyArrow. For example, to build a version of PyArrow @@ -583,8 +586,8 @@ with ``ARROW_SIMD_LEVEL=NONE``, you can run ``pip install --no-build-isolation -vv --config-settings cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. To enable verbose output from the build tool, pass -``--config-settings build.verbose=true`` to the ``pip install`` or -``python -m build`` command. +``--config-settings build.verbose=true`` to ``pip install`` or +``--config-setting build.verbose=true`` to ``python -m build``. .. _python-dev-components: diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index 857358a6c31..8afc55a173e 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -222,7 +222,8 @@ Debug build Since PyArrow depends on the Arrow C++ libraries, debugging can frequently involve crossing between Python and C++ shared libraries. For the best experience, make sure you've built both Arrow C++ -(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow (``--config-settings cmake.build-type=Debug``) +(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow +(``pip install --no-build-isolation --config-settings cmake.build-type=Debug .``) in debug mode. Using gdb on Linux