diff --git a/.github/workflows/rc.yml b/.github/workflows/rc.yml index 5a82d686e8..666767ff71 100644 --- a/.github/workflows/rc.yml +++ b/.github/workflows/rc.yml @@ -32,8 +32,8 @@ concurrency: permissions: contents: read jobs: - archive: - name: Archive + source: + name: Source runs-on: ubuntu-latest timeout-minutes: 5 steps: @@ -70,15 +70,269 @@ jobs: - name: Audit run: | dev/release/run_rat.sh "${TAR_GZ}" - - uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 + - name: Upload source archive + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: - name: archive + name: release-source path: | apache-arrow-java-* + jni-ubuntu: + name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs_on }} + needs: + - source + strategy: + fail-fast: false + matrix: + platform: + - runs_on: ubuntu-latest + arch: "x86_64" + archery_arch: "amd64" + env: + # architecture name used for archery build + ARCH: ${{ matrix.platform.archery_arch }} + DOCKER_VOLUME_PREFIX: .docker/ + permissions: + contents: read + packages: write + steps: + - name: Download source archive + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: release-source + - name: Extract Download the latest Apache Arrow C++ + run: | + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + - name: Download the latest Apache Arrow C++ + run: | + ci/scripts/download_cpp.sh + - name: Checkout apache/arrow-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/arrow-testing + path: arrow/testing + - name: Checkout apache/parquet-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/parquet-testing + path: arrow/cpp/submodules/parquet-testing + - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Cache + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: .docker + key: jni-linux-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} + restore-keys: jni-linux-${{ matrix.platform.arch }}- + - name: Build C++ libraries + run: | + docker compose run vcpkg-jni + - name: Push Docker image + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow-java' && github.ref_name == 'main' + run: | + docker compose push vcpkg-jni + - name: Compress into single artifact to keep directory structure + run: tar -cvzf jni-linux-${{ matrix.platform.arch }}.tar.gz jni/ + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: jni-linux-${{ matrix.platform.arch }} + path: jni-linux-${{ matrix.platform.arch }}.tar.gz + jni-macos: + name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs_on }} + needs: + - source + strategy: + fail-fast: false + matrix: + platform: + - { runs_on: macos-13, arch: "x86_64"} + - { runs_on: macos-14, arch: "aarch_64" } + env: + MACOSX_DEPLOYMENT_TARGET: "14.0" + steps: + - name: Download source archive + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: release-source + - name: Extract Download the latest Apache Arrow C++ + run: | + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + # We need 19.0.0 for latest Boost support + - name: Download the latest RC Apache Arrow C++ + run: | + ci/scripts/download_cpp.sh latest-rc + - name: Checkout apache/arrow-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/arrow-testing + path: arrow/testing + - name: Checkout apache/parquet-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/parquet-testing + path: arrow/cpp/submodules/parquet-testing + - name: Set up Python + uses: actions/setup-python@v5 + with: + cache: 'pip' + python-version: 3.12 + - name: Install Archery + run: pip install -e arrow/dev/archery[all] + - name: Install dependencies + run: | + # We want to use llvm@14 to avoid shared z3 + # dependency. llvm@14 doesn't depend on z3 and llvm depends + # on z3. And Homebrew's z3 provides only shared library. It + # doesn't provides static z3 because z3's CMake doesn't accept + # building both shared and static libraries at once. + # See also: Z3_BUILD_LIBZ3_SHARED in + # https://github.com/Z3Prover/z3/blob/master/README-CMake.md + # + # If llvm is installed, Apache Arrow C++ uses llvm rather than + # llvm@14 because llvm is newer than llvm@14. + brew uninstall llvm || : + + # Ensure updating python@XXX with the "--overwrite" option. + # If python@XXX is updated without "--overwrite", it causes + # a conflict error. Because Python 3 installed not by + # Homebrew exists in /usr/local on GitHub Actions. If + # Homebrew's python@XXX is updated without "--overwrite", it + # tries to replace /usr/local/bin/2to3 and so on and causes + # a conflict error. + brew update + for python_package in $(brew list | grep python@); do + brew install --overwrite ${python_package} + done + brew install --overwrite python + + if [ "$(uname -m)" = "arm64" ]; then + # pkg-config formula is deprecated but it's still installed + # in GitHub Actions runner now. We can remove this once + # pkg-config formula is removed from GitHub Actions runner. + brew uninstall pkg-config || : + brew uninstall pkg-config@0.29.2 || : + fi + + brew bundle --file=arrow/cpp/Brewfile + # We want to link aws-sdk-cpp statically but Homebrew's + # aws-sdk-cpp provides only shared library. If we have + # Homebrew's aws-sdk-cpp, our build mix Homebrew's + # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's + # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. + brew uninstall aws-sdk-cpp + # We want to use bundled RE2 for static linking. If + # Homebrew's RE2 is installed, its header file may be used. + # We uninstall Homebrew's RE2 to ensure using bundled RE2. + brew uninstall grpc || : # gRPC depends on RE2 + brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too + brew uninstall re2 + # We want to use bundled Protobuf for static linking. If + # Homebrew's Protobuf is installed, its library file may be + # used on test We uninstall Homebrew's Protobuf to ensure using + # bundled Protobuf. + brew uninstall protobuf + + brew bundle --file=Brewfile + - name: Prepare ccache + run: | + echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} + - name: Cache ccache + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ccache + key: jni-macos-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} + restore-keys: jni-macos-${{ matrix.platform.arch }}- + - name: Build C++ libraries + run: | + set -e + # make brew Java available to CMake + export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home + ci/scripts/jni_macos_build.sh . arrow build jni + - name: Compress into single artifact to keep directory structure + run: tar -cvzf jni-macos-${{ matrix.platform.arch }}.tar.gz jni/ + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: jni-macos-${{ matrix.platform.arch }} + path: jni-macos-${{ matrix.platform.arch }}.tar.gz + binaries: + name: Binaries + runs-on: ubuntu-latest + needs: + - jni-ubuntu + - jni-macos + steps: + - name: Download artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + path: artifacts + - name: Decompress artifacts + run: | + mv artifacts/*/*.tar.gz . + tar -xf apache-arrow-java-*.tar.gz --strip-components=1 + tar -xvzf jni-linux-x86_64.tar.gz + # tar -xvzf jni-linux-aarch_64.tar.gz + tar -xvzf jni-macos-x86_64.tar.gz + tar -xvzf jni-macos-aarch_64.tar.gz + # tar -xvzf jni-windows.tar.gz + - name: Test that shared libraries exist + run: | + set -x + + test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.so + test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.so + test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.so + test -f jni/gandiva_jni/x86_64/libgandiva_jni.so + + # test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.so + # test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.so + # test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.so + # test -f jni/gandiva_jni/aarch_64/libgandiva_jni.so + + test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib + test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib + test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib + test -f jni/gandiva_jni/x86_64/libgandiva_jni.dylib + + test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib + test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib + test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib + test -f jni/gandiva_jni/aarch_64/libgandiva_jni.dylib + + # test -f jni/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll + # test -f jni/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll + # test -f jni/arrow_orc_jni/x86_64/arrow_orc_jni.dll + - name: Checkout apache/arrow-testing + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: apache/arrow-testing + path: testing + - name: Cache ~/.m2 + uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ~/.m2 + key: binaries-build-${{ hashFiles('**/*.java', '**/pom.xml') }} + restore-keys: binaries-build- + - name: Build bundled JAR + env: + MAVEN_ARGS: >- + --no-transfer-progress + run: | + ci/scripts/jni_full_build.sh . jni binaries + - name: Upload artifacts + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 + with: + name: release-binaries + path: binaries/* verify: name: Verify needs: - - archive + - binaries runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -87,15 +341,13 @@ jobs: - macos-latest - ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - submodules: recursive - - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + - name: Download release artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: archive + pattern: release-* - name: Verify run: | + mv release-source/* ./ tar_gz=$(echo apache-arrow-java-*.tar.gz) version=${tar_gz#apache-arrow-java-} version=${version%.tar.gz} @@ -105,9 +357,14 @@ jobs: else rc=$(date +%Y%m%d) fi - VERIFY_DEFAULT=0 \ - VERIFY_SOURCE=1 \ - dev/release/verify_rc.sh "${version}" "${rc}" + tar -xf ${tar_gz} + export VERIFY_DEFAULT=0 + export VERIFY_BINARY=1 + export VERIFY_SOURCE=1 + cd apache-arrow-java-${version} + mv ../${tar_gz}* ./ + mv ../release-binaries binaries + dev/release/verify_rc.sh "${version}" "${rc}" upload: name: Upload if: github.ref_type == 'tag' @@ -117,13 +374,11 @@ jobs: permissions: contents: write steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - submodules: recursive - - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + - name: Download release artifacts + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: - name: archive + pattern: release-* + path: artifacts - name: Upload run: | # GH-499: How to create release notes? @@ -133,9 +388,9 @@ jobs: gh release create ${GITHUB_REF_NAME} \ --generate-notes \ --prerelease \ + --repo ${{ github.repository }} \ --title "Apache Arrow Java ${version} RC${rc}" \ --verify-tag \ - apache-arrow-java-*.tar.gz \ - apache-arrow-java-*.tar.gz.sha* + artifacts/*/* env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test_jni.yml b/.github/workflows/test_jni.yml deleted file mode 100644 index 15b535e17f..0000000000 --- a/.github/workflows/test_jni.yml +++ /dev/null @@ -1,267 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Test (JNI) - -on: - push: - branches: - - '**' - - '!dependabot/**' - tags: - - '**' - pull_request: - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -permissions: - contents: read - -env: - DOCKER_VOLUME_PREFIX: ".docker/" - -jobs: - cpp-ubuntu: - name: Build C++ libraries ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} - runs-on: ${{ matrix.platform.runs_on }} - strategy: - fail-fast: false - matrix: - platform: - - runs_on: ubuntu-latest - arch: "x86_64" - archery_arch: "amd64" - env: - # architecture name used for archery build - ARCH: ${{ matrix.platform.archery_arch }} - permissions: - contents: read - packages: write - steps: - - name: Checkout apache/arrow-java - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - fetch-depth: 0 - submodules: recursive - - name: Checkout apache/arrow - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - repository: apache/arrow - fetch-depth: 0 - path: arrow - submodules: recursive - - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build C++ libraries - run: | - docker compose run vcpkg-jni - - name: Push Docker image - if: success() && github.event_name == 'push' && github.repository == 'apache/arrow-java' && github.ref_name == 'main' - run: | - docker compose push vcpkg-jni - - name: Compress into single artifact to keep directory structure - run: tar -cvzf arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz dist/ - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: ubuntu-shared-lib-${{ matrix.platform.arch }} - path: arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz - - cpp-macos: - name: Build C++ libraries macOS ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} - runs-on: ${{ matrix.platform.runs_on }} - strategy: - fail-fast: false - matrix: - platform: - - { runs_on: macos-13, arch: "x86_64"} - - { runs_on: macos-14, arch: "aarch_64" } - env: - MACOSX_DEPLOYMENT_TARGET: "14.0" - steps: - - name: Checkout apache/arrow-java - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - fetch-depth: 0 - submodules: recursive - - name: Checkout apache/arrow - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - repository: apache/arrow - fetch-depth: 0 - path: arrow - submodules: recursive - - name: Set up Python - uses: actions/setup-python@v5 - with: - cache: 'pip' - python-version: 3.12 - - name: Install Archery - run: pip install -e arrow/dev/archery[all] - - name: Install dependencies - run: | - # We want to use llvm@14 to avoid shared z3 - # dependency. llvm@14 doesn't depend on z3 and llvm depends - # on z3. And Homebrew's z3 provides only shared library. It - # doesn't provides static z3 because z3's CMake doesn't accept - # building both shared and static libraries at once. - # See also: Z3_BUILD_LIBZ3_SHARED in - # https://github.com/Z3Prover/z3/blob/master/README-CMake.md - # - # If llvm is installed, Apache Arrow C++ uses llvm rather than - # llvm@14 because llvm is newer than llvm@14. - brew uninstall llvm || : - - # Ensure updating python@XXX with the "--overwrite" option. - # If python@XXX is updated without "--overwrite", it causes - # a conflict error. Because Python 3 installed not by - # Homebrew exists in /usr/local on GitHub Actions. If - # Homebrew's python@XXX is updated without "--overwrite", it - # tries to replace /usr/local/bin/2to3 and so on and causes - # a conflict error. - brew update - for python_package in $(brew list | grep python@); do - brew install --overwrite ${python_package} - done - brew install --overwrite python - - if [ "$(uname -m)" = "arm64" ]; then - # pkg-config formula is deprecated but it's still installed - # in GitHub Actions runner now. We can remove this once - # pkg-config formula is removed from GitHub Actions runner. - brew uninstall pkg-config || : - brew uninstall pkg-config@0.29.2 || : - fi - - brew bundle --file=arrow/cpp/Brewfile - # We want to link aws-sdk-cpp statically but Homebrew's - # aws-sdk-cpp provides only shared library. If we have - # Homebrew's aws-sdk-cpp, our build mix Homebrew's - # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's - # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. - brew uninstall aws-sdk-cpp - # We want to use bundled RE2 for static linking. If - # Homebrew's RE2 is installed, its header file may be used. - # We uninstall Homebrew's RE2 to ensure using bundled RE2. - brew uninstall grpc || : # gRPC depends on RE2 - brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too - brew uninstall re2 - # We want to use bundled Protobuf for static linking. If - # Homebrew's Protobuf is installed, its library file may be - # used on test We uninstall Homebrew's Protobuf to ensure using - # bundled Protobuf. - brew uninstall protobuf - - brew bundle --file=Brewfile - - name: Build C++ libraries - run: | - set -e - # make brew Java available to CMake - export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home - ./ci/scripts/jni_macos_build.sh \ - $GITHUB_WORKSPACE \ - $GITHUB_WORKSPACE/arrow \ - $GITHUB_WORKSPACE/arrow-java/cpp-build \ - $GITHUB_WORKSPACE/dist - - name: Compress into single artifact to keep directory structure - run: tar -cvzf arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz dist/ - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: macos-shared-lib-${{ matrix.platform.arch }} - path: arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz - - java-jars: - name: Build JAR files - runs-on: ubuntu-latest - needs: - - cpp-ubuntu - - cpp-macos - steps: - - name: Checkout apache/arrow-java - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - fetch-depth: 0 - submodules: recursive - - name: Checkout apache/arrow - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - repository: apache/arrow - fetch-depth: 0 - path: arrow - submodules: recursive - - name: Download Libraries - uses: actions/download-artifact@v4 - with: - path: artifacts - - name: Decompress artifacts - run: | - mv artifacts/*/*.tar.gz . - tar -xvzf arrow-shared-libs-linux-x86_64.tar.gz - # tar -xvzf arrow-shared-libs-linux-aarch_64.tar.gz - tar -xvzf arrow-shared-libs-macos-x86_64.tar.gz - tar -xvzf arrow-shared-libs-macos-aarch_64.tar.gz - # tar -xvzf arrow-shared-libs-windows.tar.gz - - name: Test that shared libraries exist - run: | - set -x - - test -f dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.so - test -f dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.so - test -f dist/arrow_orc_jni/x86_64/libarrow_orc_jni.so - test -f dist/gandiva_jni/x86_64/libgandiva_jni.so - - # test -f dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.so - # test -f dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.so - # test -f dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.so - # test -f dist/gandiva_jni/aarch_64/libgandiva_jni.so - - test -f dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib - test -f dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib - test -f dist/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib - test -f dist/gandiva_jni/x86_64/libgandiva_jni.dylib - - test -f dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib - test -f dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib - test -f dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib - test -f dist/gandiva_jni/aarch_64/libgandiva_jni.dylib - - # test -f dist/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll - # test -f dist/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll - # test -f dist/arrow_orc_jni/x86_64/arrow_orc_jni.dll - - name: Build bundled jar - env: - MAVEN_ARGS: >- - --no-transfer-progress - run: | - set -e - # mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} - # mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f bom - ./ci/scripts/jni_full_build.sh \ - $GITHUB_WORKSPACE \ - $GITHUB_WORKSPACE/arrow \ - $GITHUB_WORKSPACE/dist - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: java-jars - path: ${{ github.workspace }}/arrow-java/java-dist diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b29f37d80..318bd4d10c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,10 @@ add_library(jni INTERFACE IMPORTED) set_target_properties(jni PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${JNI_INCLUDE_DIRS}") include(CTest) -if(BUILD_TESTING) +if(BUILD_TESTING + AND (ARROW_JAVA_JNI_ENABLE_DATASET + OR ARROW_JAVA_JNI_ENABLE_GANDIVA + OR ARROW_JAVA_JNI_ENABLE_ORC)) find_package(ArrowTesting REQUIRED) find_package(GTest REQUIRED) add_library(arrow_java_test INTERFACE IMPORTED) diff --git a/ci/scripts/build.sh b/ci/scripts/build.sh index f3f2023759..309f28126a 100755 --- a/ci/scripts/build.sh +++ b/ci/scripts/build.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information diff --git a/ci/scripts/download_cpp.sh b/ci/scripts/download_cpp.sh new file mode 100755 index 0000000000..4d801a47f9 --- /dev/null +++ b/ci/scripts/download_cpp.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euxo pipefail + +if [ $# -eq 1 ]; then + version="${1}" +else + version="latest-release" +fi + +url="" + +if [ "${version}" = "latest-release" ]; then + version=$(curl \ + https://raw.githubusercontent.com/apache/arrow-site/refs/heads/main/_data/versions.yml | + grep '^ number:' | + sed -E -e "s/^ number: '|'$//g") +elif [ "${version}" = "latest-rc" ]; then + rc_archive_name=$(curl \ + https://dist.apache.org/repos/dist/dev/arrow/ | + grep -E -o 'apache-arrow-[0-9]+\.[0-9]+\.[0-9]+\-rc[0-9]' | + sort | + uniq | + tail -n1) + rc_version="${rc_archive_name#apache-arrow-}" + version="${rc_version%-rc*}" + url="https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-${rc_version}/apache-arrow-${version}.tar.gz" +fi + +if [ -z "${url}" ]; then + url="https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${version}/apache-arrow-${version}.tar.gz" +fi +wget --output-document "apache-arrow-${version}.tar.gz" "${url}" +tar xf "apache-arrow-${version}.tar.gz" +mv "apache-arrow-${version}" arrow diff --git a/ci/scripts/jni_build.sh b/ci/scripts/jni_build.sh index 4462646347..a77b0da02a 100755 --- a/ci/scripts/jni_build.sh +++ b/ci/scripts/jni_build.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,9 +17,9 @@ # specific language governing permissions and limitations # under the License. -set -exo pipefail +set -euxo pipefail -arrow_dir=${1} +source_dir=${1} arrow_install_dir=${2} build_dir=${3}/java_jni # The directory where the final binaries will be stored when scripts finish @@ -30,8 +31,6 @@ echo "=== Clear output directories and leftovers ===" rm -rf "${build_dir}" echo "=== Building Arrow Java C Data Interface native library ===" -mkdir -p "${build_dir}" -pushd "${build_dir}" case "$(uname)" in Linux) @@ -45,10 +44,12 @@ Darwin) ;; esac -: "${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}}" +: "${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-ON}}" : "${CMAKE_BUILD_TYPE:=release}" read -ra EXTRA_CMAKE_OPTIONS <<<"${JAVA_JNI_CMAKE_ARGS:-}" cmake \ + -S "${source_dir}" \ + -B "${build_dir}" \ -DARROW_JAVA_JNI_ENABLE_DATASET="${ARROW_DATASET:-OFF}" \ -DARROW_JAVA_JNI_ENABLE_GANDIVA="${ARROW_GANDIVA:-OFF}" \ -DARROW_JAVA_JNI_ENABLE_ORC="${ARROW_ORC:-OFF}" \ @@ -59,18 +60,16 @@ cmake \ -DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD:-OFF}" \ -DProtobuf_USE_STATIC_LIBS=ON \ -GNinja \ - "${EXTRA_CMAKE_OPTIONS[@]}" \ - "${arrow_dir}" -export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs} -cmake --build . --config "${CMAKE_BUILD_TYPE}" + "${EXTRA_CMAKE_OPTIONS[@]}" +cmake --build "${build_dir}" if [ "${ARROW_JAVA_BUILD_TESTS}" = "ON" ]; then ctest \ --output-on-failure \ --parallel "${n_jobs}" \ + --test-dir "${build_dir}" \ --timeout 300 fi -cmake --build . --config "${CMAKE_BUILD_TYPE}" --target install -popd +cmake --build "${build_dir}" --target install mkdir -p "${dist_dir}" # For Windows. *.dll are installed into bin/ on Windows. diff --git a/ci/scripts/jni_full_build.sh b/ci/scripts/jni_full_build.sh index 1a39aeb510..15cf72f21b 100755 --- a/ci/scripts/jni_full_build.sh +++ b/ci/scripts/jni_full_build.sh @@ -19,16 +19,13 @@ set -e -arrow_java_dir="${1}" -arrow_dir="${2}" +source_dir="$(cd "${1}" && pwd)" +jni_build_dir="$(cd "${2}" && pwd)" dist_dir="${3}" +mkdir -p "${dist_dir}" +dist_dir="$(cd "${dist_dir}" && pwd)" -export ARROW_TEST_DATA="${arrow_dir}/testing/data" - -pushd "${arrow_java_dir}" - -# Ensure that there is no old jar -# inside the maven repository +# Ensure that there is no old artifacts inside the maven repository maven_repo=~/.m2/repository/org/apache/arrow if [ -d "$maven_repo" ]; then find "$maven_repo" \ @@ -37,7 +34,7 @@ if [ -d "$maven_repo" ]; then -exec rm -rf {} ";" fi -# generate dummy GPG key for -Papache-release. +# Generate dummy GPG key for -Papache-release. # -Papache-release generates signs (*.asc) of artifacts. # We don't use these signs in our release process. ( @@ -49,15 +46,17 @@ fi ) | gpg --full-generate-key --batch +pushd "${source_dir}" # build the entire project mvn clean \ install \ -Papache-release \ -Parrow-c-data \ -Parrow-jni \ - -Darrow.cpp.build.dir="$dist_dir" \ - -Darrow.c.jni.dist.dir="$dist_dir" \ + -Darrow.cpp.build.dir="${jni_build_dir}" \ + -Darrow.c.jni.dist.dir="${jni_build_dir}" \ --no-transfer-progress +popd # copy all jar, zip and pom files to the distribution folder find ~/.m2/repository/org/apache/arrow \ @@ -69,6 +68,4 @@ find ~/.m2/repository/org/apache/arrow \ -name "*.zip" \ ")" \ -exec echo "{}" ";" \ - -exec cp "{}" "$dist_dir" ";" - -popd + -exec cp "{}" "${dist_dir}" ";" diff --git a/ci/scripts/jni_macos_build.sh b/ci/scripts/jni_macos_build.sh index eeabfd1334..5a693031c3 100755 --- a/ci/scripts/jni_macos_build.sh +++ b/ci/scripts/jni_macos_build.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -21,8 +22,8 @@ set -ex -arrow_java_dir="${1}" -arrow_dir="${2}" +source_dir="$(cd "${1}" && pwd)" +arrow_dir="$(cd "${2}" && pwd)" build_dir="${3}" normalized_arch="$(arch)" case "${normalized_arch}" in @@ -39,6 +40,10 @@ dist_dir="${4}" echo "=== Clear output directories and leftovers ===" # Clear output directories and leftovers rm -rf "${build_dir}" +rm -rf "${dist_dir}" + +mkdir -p "${build_dir}" +build_dir="$(cd "${build_dir}" && pwd)" echo "=== Building Arrow C++ libraries ===" install_dir="${build_dir}/cpp-install" @@ -53,7 +58,7 @@ export ARROW_GANDIVA export ARROW_ORC : "${ARROW_PARQUET:=ON}" : "${ARROW_S3:=ON}" -: "${ARROW_USE_CCACHE:=OFF}" +: "${ARROW_USE_CCACHE:=ON}" : "${CMAKE_BUILD_TYPE:=Release}" : "${CMAKE_UNITY_BUILD:=ON}" @@ -66,10 +71,9 @@ export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE -mkdir -p "${build_dir}/cpp" -pushd "${build_dir}/cpp" - cmake \ + -S "${arrow_dir}/cpp" \ + -B "${build_dir}/cpp" \ -DARROW_ACERO="${ARROW_ACERO}" \ -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_TESTS="${ARROW_BUILD_TESTS}" \ @@ -84,6 +88,7 @@ cmake \ -DARROW_PARQUET="${ARROW_PARQUET}" \ -DARROW_S3="${ARROW_S3}" \ -DARROW_USE_CCACHE="${ARROW_USE_CCACHE}" \ + -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \ -DCMAKE_INSTALL_PREFIX="${install_dir}" \ -DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD}" \ @@ -92,11 +97,10 @@ cmake \ -DPARQUET_BUILD_EXECUTABLES=OFF \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ -Dre2_SOURCE=BUNDLED \ - -GNinja \ - "${arrow_dir}/cpp" -cmake --build . --target install + -GNinja +cmake --build "${build_dir}/cpp" --target install -if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then +if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then # MinIO is required exclude_tests="arrow-s3fs-test" # unstable @@ -107,14 +111,13 @@ if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then --label-regex unittest \ --output-on-failure \ --parallel "$(sysctl -n hw.ncpu)" \ + --test-dir "${build_dir}/cpp" \ --timeout 300 fi -popd - export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install" -"${arrow_java_dir}/ci/scripts/jni_build.sh" \ - "${arrow_java_dir}" \ +"${source_dir}/ci/scripts/jni_build.sh" \ + "${source_dir}" \ "${install_dir}" \ "${build_dir}" \ "${dist_dir}" diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh index 2551d67239..0251c71c65 100755 --- a/ci/scripts/jni_manylinux_build.sh +++ b/ci/scripts/jni_manylinux_build.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -21,8 +22,8 @@ set -exo pipefail -arrow_java_dir="${1}" -arrow_dir="${2}" +source_dir="$(cd "${1}" && pwd)" +arrow_dir="$(cd "${2}" && pwd)" build_dir="${3}" normalized_arch="$(arch)" case "${normalized_arch}" in @@ -41,12 +42,15 @@ echo "=== Clear output directories and leftovers ===" rm -rf "${build_dir}" rm -rf "${dist_dir}" +mkdir -p "${build_dir}" +build_dir="$(cd "${build_dir}" && pwd)" + echo "=== Building Arrow C++ libraries ===" devtoolset_version="$(rpm -qa "devtoolset-*-gcc" --queryformat '%{VERSION}' | grep -o "^[0-9]*")" devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" : "${ARROW_ACERO:=ON}" export ARROW_ACERO -: "${ARROW_BUILD_TESTS:=OFF}" +: "${ARROW_BUILD_TESTS:=ON}" : "${ARROW_DATASET:=ON}" export ARROW_DATASET : "${ARROW_GANDIVA:=ON}" @@ -59,7 +63,7 @@ export ARROW_GANDIVA export ARROW_ORC : "${ARROW_PARQUET:=ON}" : "${ARROW_S3:=ON}" -: "${ARROW_USE_CCACHE:=OFF}" +: "${ARROW_USE_CCACHE:=ON}" : "${CMAKE_BUILD_TYPE:=release}" : "${CMAKE_UNITY_BUILD:=ON}" : "${VCPKG_ROOT:=/opt/vcpkg}" @@ -77,9 +81,11 @@ export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE mkdir -p "${build_dir}/cpp" -pushd "${build_dir}/cpp" +install_dir="${build_dir}/cpp-install" cmake \ + -S "${arrow_dir}/cpp" \ + -B "${build_dir}/cpp" \ -DARROW_ACERO="${ARROW_ACERO}" \ -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_TESTS="${ARROW_BUILD_TESTS}" \ @@ -100,7 +106,7 @@ cmake \ -DARROW_S3="${ARROW_S3}" \ -DARROW_USE_CCACHE="${ARROW_USE_CCACHE}" \ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \ - -DCMAKE_INSTALL_PREFIX="${ARROW_HOME}" \ + -DCMAKE_INSTALL_PREFIX="${install_dir}" \ -DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD}" \ -DGTest_SOURCE=BUNDLED \ -DORC_SOURCE=BUNDLED \ @@ -110,11 +116,11 @@ cmake \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET="${VCPKG_TARGET_TRIPLET}" \ - -GNinja \ - "${arrow_dir}/cpp" -ninja install + -GNinja +cmake --build "${build_dir}/cpp" +cmake --install "${build_dir}/cpp" -if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then +if [ "${ARROW_RUN_TESTS:-OFF}" = "ON" ]; then # MinIO is required exclude_tests="arrow-s3fs-test" case $(arch) in @@ -138,17 +144,16 @@ if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then --label-regex unittest \ --output-on-failure \ --parallel "$(nproc)" \ + --test-dir "${build_dir}/cpp" \ --timeout 300 fi -popd - JAVA_JNI_CMAKE_ARGS="-DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}" export JAVA_JNI_CMAKE_ARGS -"${arrow_java_dir}/ci/scripts/jni_build.sh" \ - "${arrow_java_dir}" \ - "${ARROW_HOME}" \ +"${source_dir}/ci/scripts/jni_build.sh" \ + "${source_dir}" \ + "${install_dir}" \ "${build_dir}" \ "${dist_dir}" diff --git a/ci/scripts/test.sh b/ci/scripts/test.sh index 6449bbc2c5..f65f630182 100755 --- a/ci/scripts/test.sh +++ b/ci/scripts/test.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information diff --git a/docker-compose.yml b/docker-compose.yml index c1d270287d..5decbfc86c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,6 +26,8 @@ # $ ARCH=arm64v8 docker compose run java volumes: + ccache-cache: + name: ccache-cache maven-cache: name: maven-cache @@ -101,9 +103,11 @@ services: volumes: - .:/arrow-java:delegated - ${ARROW_REPO_ROOT}:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}ccache:/ccache:delegated - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated environment: ARROW_JAVA_CDATA: "ON" + CCACHE_DIR: "/ccache" command: ["git config --global --add safe.directory /arrow-java && \ - /arrow-java/ci/scripts/jni_manylinux_build.sh /arrow-java /arrow /build /arrow-java/dist"] + /arrow-java/ci/scripts/jni_manylinux_build.sh /arrow-java /arrow /build /arrow-java/jni"]