Skip to content

Commit ac6bbb9

Browse files
committed
GH-13: Set up JNI build (dataset, etc.)
Fixes #13.
1 parent d650aa0 commit ac6bbb9

File tree

5 files changed

+342
-0
lines changed

5 files changed

+342
-0
lines changed

.env

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ ARCH_SHORT=amd64
4040

4141
# Default repository to pull and push images from
4242
REPO=ghcr.io/apache/arrow-java-dev
43+
ARROW_REPO=apache/arrow-dev
4344

4445
# The setup attempts to generate coredumps by default, in order to disable the
4546
# coredump generation set it to 0
@@ -48,3 +49,9 @@ ULIMIT_CORE=-1
4849
# Default versions for various dependencies
4950
JDK=11
5051
MAVEN=3.9.9
52+
53+
# Versions for various dependencies used to build artifacts
54+
# Keep in sync with apache/arrow
55+
ARROW_REPO_ROOT=./arrow
56+
PYTHON=3.9
57+
VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01" # 2024.04.26 Release

.github/workflows/test_jni.yml

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Test (JNI)
19+
20+
on:
21+
push:
22+
branches:
23+
- '**'
24+
- '!dependabot/**'
25+
tags:
26+
- '**'
27+
pull_request:
28+
29+
concurrency:
30+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
31+
cancel-in-progress: true
32+
33+
permissions:
34+
contents: read
35+
36+
env:
37+
DOCKER_VOLUME_PREFIX: ".docker/"
38+
39+
jobs:
40+
cpp-ubuntu:
41+
name: Build C++ libraries ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }}
42+
runs-on: ${{ matrix.platform.runs_on }}
43+
strategy:
44+
fail-fast: false
45+
matrix:
46+
platform:
47+
- runs_on: ubuntu-latest
48+
arch: "x86_64"
49+
archery_arch: "amd64"
50+
archery_arch_alias: "x86_64"
51+
archery_arch_short: "amd64"
52+
env:
53+
# architecture name used for archery build
54+
ARCH: ${{ matrix.platform.archery_arch }}
55+
ARCH_ALIAS: ${{ matrix.platform.archery_arch_alias }}
56+
ARCH_SHORT: ${{ matrix.platform.archery_arch_short }}
57+
steps:
58+
- name: Checkout apache/arrow-java
59+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
60+
with:
61+
fetch-depth: 0
62+
submodules: recursive
63+
- name: Checkout apache/arrow
64+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
65+
with:
66+
repository: apache/arrow
67+
fetch-depth: 0
68+
path: arrow
69+
submodules: recursive
70+
- name: Build C++ libraries
71+
env:
72+
VCPKG_BINARY_SOURCES: "clear;nuget,GitHub,readwrite"
73+
run: |
74+
docker-compose run vcpkg-jni
75+
- name: Compress into single artifact to keep directory structure
76+
run: tar -cvzf arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz dist/
77+
- name: Upload artifacts
78+
uses: actions/upload-artifact@v4
79+
with:
80+
name: ubuntu-shared-lib-${{ matrix.platform.arch }}
81+
path: arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz

ci/docker/vcpkg-jni.dockerfile

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
ARG base
19+
FROM ${base}
20+
21+
# Install the libraries required by Gandiva to run
22+
# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva
23+
RUN vcpkg install \
24+
--clean-after-build \
25+
--x-install-root=${VCPKG_ROOT}/installed \
26+
--x-manifest-root=/arrow/ci/vcpkg \
27+
--x-feature=dev \
28+
--x-feature=flight \
29+
--x-feature=gcs \
30+
--x-feature=json \
31+
--x-feature=parquet \
32+
--x-feature=gandiva \
33+
--x-feature=s3
34+
35+
# Install Java
36+
# We need Java for JNI headers, but we don't invoke Maven in this build.
37+
ARG java=11
38+
RUN yum install -y java-$java-openjdk-devel && yum clean all
39+
40+
# For ci/scripts/{cpp,java}_*.sh
41+
ENV ARROW_HOME=/tmp/local \
42+
ARROW_JAVA_CDATA=ON \
43+
ARROW_JAVA_JNI=ON \
44+
ARROW_USE_CCACHE=ON
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#!/usr/bin/env bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
# This script is like java_jni_build.sh, but is meant for release artifacts
20+
# and hardcodes assumptions about the environment it is being run in.
21+
22+
set -eo pipefail
23+
24+
arrow_dir=${1}
25+
build_dir=${2}
26+
normalized_arch=$(arch)
27+
case ${normalized_arch} in
28+
aarch64)
29+
normalized_arch=aarch_64
30+
;;
31+
esac
32+
# The directory where the final binaries will be stored when scripts finish
33+
dist_dir=${3}
34+
35+
echo "=== Install Archery ==="
36+
pip install -e "${arrow_dir}/dev/archery[all]"
37+
38+
echo "=== Clear output directories and leftovers ==="
39+
# Clear output directories and leftovers
40+
rm -rf ${build_dir}
41+
rm -rf "${dist_dir}"
42+
43+
echo "=== Building Arrow C++ libraries ==="
44+
devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \
45+
grep -o "^[0-9]*")
46+
devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
47+
: ${ARROW_ACERO:=ON}
48+
export ARROW_ACERO
49+
: ${ARROW_BUILD_TESTS:=ON}
50+
: ${ARROW_DATASET:=ON}
51+
export ARROW_DATASET
52+
: ${ARROW_GANDIVA:=ON}
53+
export ARROW_GANDIVA
54+
: ${ARROW_GCS:=ON}
55+
: ${ARROW_JEMALLOC:=ON}
56+
: ${ARROW_RPATH_ORIGIN:=ON}
57+
: ${ARROW_ORC:=ON}
58+
export ARROW_ORC
59+
: ${ARROW_PARQUET:=ON}
60+
: ${ARROW_S3:=ON}
61+
: ${ARROW_USE_CCACHE:=OFF}
62+
: ${CMAKE_BUILD_TYPE:=release}
63+
: ${CMAKE_UNITY_BUILD:=ON}
64+
: ${VCPKG_ROOT:=/opt/vcpkg}
65+
: ${VCPKG_FEATURE_FLAGS:=-manifests}
66+
: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
67+
: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread}
68+
69+
if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
70+
echo "=== ccache statistics before build ==="
71+
ccache -sv 2>/dev/null || ccache -s
72+
fi
73+
74+
export ARROW_TEST_DATA="${arrow_dir}/testing/data"
75+
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
76+
export AWS_EC2_METADATA_DISABLED=TRUE
77+
78+
mkdir -p "${build_dir}/cpp"
79+
pushd "${build_dir}/cpp"
80+
81+
cmake \
82+
-DARROW_ACERO=${ARROW_ACERO} \
83+
-DARROW_BUILD_SHARED=OFF \
84+
-DARROW_BUILD_TESTS=ON \
85+
-DARROW_CSV=${ARROW_DATASET} \
86+
-DARROW_DATASET=${ARROW_DATASET} \
87+
-DARROW_SUBSTRAIT=${ARROW_DATASET} \
88+
-DARROW_DEPENDENCY_SOURCE="VCPKG" \
89+
-DARROW_DEPENDENCY_USE_SHARED=OFF \
90+
-DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
91+
-DARROW_GANDIVA=${ARROW_GANDIVA} \
92+
-DARROW_GCS=${ARROW_GCS} \
93+
-DARROW_JEMALLOC=${ARROW_JEMALLOC} \
94+
-DARROW_ORC=${ARROW_ORC} \
95+
-DARROW_PARQUET=${ARROW_PARQUET} \
96+
-DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
97+
-DARROW_S3=${ARROW_S3} \
98+
-DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
99+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
100+
-DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
101+
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
102+
-DGTest_SOURCE=BUNDLED \
103+
-DORC_SOURCE=BUNDLED \
104+
-DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \
105+
-DPARQUET_BUILD_EXAMPLES=OFF \
106+
-DPARQUET_BUILD_EXECUTABLES=OFF \
107+
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
108+
-DVCPKG_MANIFEST_MODE=OFF \
109+
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
110+
-GNinja \
111+
${arrow_dir}/cpp
112+
ninja install
113+
114+
if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then
115+
# MinIO is required
116+
exclude_tests="arrow-s3fs-test"
117+
case $(arch) in
118+
aarch64)
119+
# GCS testbench is crashed on aarch64:
120+
# ImportError: ../grpc/_cython/cygrpc.cpython-38-aarch64-linux-gnu.so:
121+
# undefined symbol: vtable for std::__cxx11::basic_ostringstream<
122+
# char, std::char_traits<char>, std::allocator<char> >
123+
exclude_tests="${exclude_tests}|arrow-gcsfs-test"
124+
;;
125+
esac
126+
# unstable
127+
exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test"
128+
exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test"
129+
# external dependency
130+
exclude_tests="${exclude_tests}|arrow-gcsfs-test"
131+
# strptime
132+
exclude_tests="${exclude_tests}|arrow-utility-test"
133+
ctest \
134+
--exclude-regex "${exclude_tests}" \
135+
--label-regex unittest \
136+
--output-on-failure \
137+
--parallel $(nproc) \
138+
--timeout 300
139+
fi
140+
141+
popd
142+
143+
144+
JAVA_JNI_CMAKE_ARGS=""
145+
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
146+
JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}"
147+
export JAVA_JNI_CMAKE_ARGS
148+
${arrow_dir}/ci/scripts/java_jni_build.sh \
149+
${arrow_dir} \
150+
${ARROW_HOME} \
151+
${build_dir} \
152+
${dist_dir}
153+
154+
if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
155+
echo "=== ccache statistics after build ==="
156+
ccache -sv 2>/dev/null || ccache -s
157+
fi
158+
159+
160+
echo "=== Checking shared dependencies for libraries ==="
161+
pushd ${dist_dir}
162+
archery linking check-dependencies \
163+
--allow ld-linux-aarch64 \
164+
--allow ld-linux-x86-64 \
165+
--allow libc \
166+
--allow libdl \
167+
--allow libgcc_s \
168+
--allow libm \
169+
--allow libpthread \
170+
--allow librt \
171+
--allow libstdc++ \
172+
--allow libz \
173+
--allow linux-vdso \
174+
arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.so \
175+
arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.so \
176+
arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.so \
177+
gandiva_jni/${normalized_arch}/libgandiva_jni.so
178+
popd

docker-compose.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ volumes:
3131

3232
services:
3333
ubuntu:
34+
# Build and test arrow-java on Ubuntu.
35+
#
3436
# Usage:
3537
# docker compose build ubuntu
3638
# docker compose run ubuntu
@@ -47,6 +49,10 @@ services:
4749
/arrow-java/ci/scripts/java_test.sh /arrow-java /build"
4850

4951
conda-jni-cdata:
52+
# Builds and tests just the C Data Interface JNI library and JARs.
53+
# (No dependencies on arrow-cpp.)
54+
# This build isn't meant for distribution. It's for testing only.
55+
#
5056
# Usage:
5157
# docker compose build conda-jni-cdata
5258
# docker compose run conda-jni-cdata
@@ -75,3 +81,29 @@ services:
7581
/arrow-java/ci/scripts/java_jni_build.sh /arrow-java /build/jni /build /jni &&
7682
/arrow-java/ci/scripts/java_build.sh /arrow-java /build /jni &&
7783
/arrow-java/ci/scripts/java_test.sh /arrow-java /build /jni"
84+
85+
vcpkg-jni:
86+
# Builds all the JNI libraries, but not the JARs.
87+
# (Requires arrow-cpp.)
88+
# The artifacts from this build are meant to be used for packaging.
89+
#
90+
# Usage:
91+
# docker compose build vcpkg-jni
92+
# docker compose run vcpkg-jni
93+
image: ${REPO}:${ARCH}-vcpkg-jni
94+
build:
95+
context: .
96+
dockerfile: ci/docker/vcpkg-jni.dockerfile
97+
cache_from:
98+
- ${REPO}:${ARCH}-vcpkg-jni
99+
args:
100+
base: ${ARROW_REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
101+
volumes:
102+
- .:/arrow-java:delegated
103+
- ${ARROW_REPO_ROOT}:/arrow:delegated
104+
- ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
105+
environment:
106+
ARROW_JAVA_CDATA: "ON"
107+
command:
108+
["git config --global --add safe.directory /arrow-java && \
109+
/arrow-java/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow-java/dist"]

0 commit comments

Comments
 (0)