diff --git a/.github/workflows/check-java-bridge-licensing.yml b/.github/workflows/check-java-bridge-licensing.yml index 153dfeb..1513f98 100644 --- a/.github/workflows/check-java-bridge-licensing.yml +++ b/.github/workflows/check-java-bridge-licensing.yml @@ -40,14 +40,14 @@ jobs: - name: Build run: | set -o pipefail - cd pypaimon/py4j/paimon-python-java-bridge + cd paimon-python-java-bridge mvn clean deploy ${{ env.MVN_COMMON_OPTIONS }} -DskipTests \ -DaltDeploymentRepository=validation_repository::default::file:${{ env.MVN_VALIDATION_DIR }} \ | tee ${{ env.MVN_BUILD_OUTPUT_FILE }} - name: Check licensing run: | - cd pypaimon/py4j/paimon-python-java-bridge + cd paimon-python-java-bridge mvn ${{ env.MVN_COMMON_OPTIONS }} exec:java@check-licensing -N \ -Dexec.args="${{ env.MVN_BUILD_OUTPUT_FILE }} $(pwd) ${{ env.MVN_VALIDATION_DIR }}" \ -Dlog4j.configurationFile=file://$(pwd)/tools/ci/log4j.properties diff --git a/.github/workflows/create-source-release.yml b/.github/workflows/create-source-release.yml new file mode 100644 index 0000000..eb9029e --- /dev/null +++ b/.github/workflows/create-source-release.yml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Create Source Release + +on: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup GPG + env: + GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }} + run: | + mkdir -p ~/.gnupg + chmod 700 ~/.gnupg + echo $GPG_PRIVATE_KEY | base64 --decode | gpg --batch --import --yes + echo "use-agent" >> ~/.gnupg/gpg.conf + echo "pinentry-program /usr/bin/pinentry" >> ~/.gnupg/gpg-agent.conf + echo "allow-loopback-pinentry" >> ~/.gnupg/gpg-agent.conf + + - name: Create source release + env: + GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} + run: | + mkdir -p output + chmod +x tools/releasing/create_source_release.sh + OUTPUT_DIR=output GPG_PASSPHRASE=$GPG_PASSPHRASE tools/releasing/create_source_release.sh + + - name: Upload source release + uses: actions/upload-artifact@v4 + with: + name: source-release + path: output/* diff --git a/.github/workflows/paimon-python-checks.yml b/.github/workflows/paimon-python-checks.yml index 195783f..6ec61f0 100644 --- a/.github/workflows/paimon-python-checks.yml +++ b/.github/workflows/paimon-python-checks.yml @@ -43,14 +43,8 @@ jobs: with: java-version: ${{ env.JDK_VERSION }} distribution: 'adopt' - - name: Set up hadoop dependency - run: | - mkdir -p ${{ github.workspace }}/temp - curl -L -o ${{ github.workspace }}/temp/bundled-hadoop.jar \ - https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar + - name: Run lint-python.sh - env: - _PYPAIMON_HADOOP_CLASSPATH: ${{ github.workspace }}/temp/bundled-hadoop.jar run: | chmod +x dev/lint-python.sh ./dev/lint-python.sh diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..09e3734 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,23 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +global-exclude *.py[cod] __pycache__ .DS_Store +recursive-include deps/jars *.jar +include README.md +include LICENSE +include NOTICE diff --git a/README.md b/README.md index cd2d360..63c2d41 100644 --- a/README.md +++ b/README.md @@ -31,16 +31,6 @@ We provide script to check codes. ./dev/lint-python.sh -h # run this to see more usages ``` -## Build - -We provide script to build wheel. - -```shell -./dev/build-wheels.sh -``` - -The target wheel is under `dist/` - # Usage See Apache Paimon Python API [Doc](https://paimon.apache.org/docs/master/program-api/python-api/). diff --git a/dev/build-wheels.sh b/dev/build-wheels.sh deleted file mode 100755 index 7b9f53d..0000000 --- a/dev/build-wheels.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -x - -## 1. install python env -dev/lint-python.sh -s py_env - -PY_ENV_DIR=`pwd`/dev/.conda/envs -# Don't need to build with different Python version when there are no C-related codes -py_env=("3.11") -## 2. install dependency -for ((i=0;i<${#py_env[@]};i++)) do - echo "Installing dependencies for environment: ${py_env[i]}" - ${PY_ENV_DIR}/${py_env[i]}/bin/pip install -r dev/dev-requirements.txt -done - -## 3. build wheels -for ((i=0;i<${#py_env[@]};i++)) do - echo "Building wheel for environment: ${py_env[i]}" - if [[ "$(uname)" != "Darwin" ]]; then - # force the linker to use the older glibc version in Linux - export CFLAGS="-I. -include dev/glibc_version_fix.h" - fi - ${PY_ENV_DIR}/${py_env[i]}/bin/python setup.py clean bdist_wheel -done - -## 4. convert linux_x86_64 wheel to manylinux1 wheel in Linux -if [[ "$(uname)" != "Darwin" ]]; then - echo "Converting linux_x86_64 wheel to manylinux1" - source `pwd`/dev/.conda/bin/activate - # 4.1 install patchelf - conda install -c conda-forge patchelf=0.11 -y - # 4.2 install auditwheel - pip install auditwheel==3.2.0 - # 4.3 convert Linux wheel - for wheel_file in dist/*.whl; do - auditwheel repair ${wheel_file} -w dist - rm -f ${wheel_file} - done - source deactivate -fi -## see the result -ls -al dist/ diff --git a/dev/lint-python.sh b/dev/lint-python.sh index 15a009e..e9d3e5e 100755 --- a/dev/lint-python.sh +++ b/dev/lint-python.sh @@ -577,8 +577,9 @@ function tox_check() { # Ensure the permission of the scripts set correctly chmod +x $PAIMON_PYTHON_DIR/dev/* - # tox runs codes in virtual env, set var to avoid error - export _PYPAIMON_TOX_TEST="true" + # dummy jar needed by setup.py + mkdir -p $PAIMON_PYTHON_DIR/deps/jars + touch $PAIMON_PYTHON_DIR/deps/jars/dummy.jar if [[ -n "$GITHUB_ACTION" ]]; then # Run tests in all versions triggered by a Git push (tests aren't so many currently) @@ -596,6 +597,9 @@ function tox_check() { $TOX_PATH -vv -c $PAIMON_PYTHON_DIR/tox.ini -e ${ENV_LIST[$index]} --recreate 2>&1 | tee -a $LOG_FILE fi + # delete dummy jar + rm -rf $PAIMON_PYTHON_DIR/deps + TOX_RESULT=$((grep -c "congratulations :)" "$LOG_FILE") 2>&1) if [ $TOX_RESULT -eq '0' ]; then print_function "STAGE" "tox checks... [FAILED]" diff --git a/dev/test_deps/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar b/dev/test_deps/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar new file mode 100644 index 0000000..a7b50b0 Binary files /dev/null and b/dev/test_deps/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar differ diff --git a/dev/test_deps/paimon-python-java-bridge-0.9-SNAPSHOT.jar b/dev/test_deps/paimon-python-java-bridge-0.9-SNAPSHOT.jar new file mode 100644 index 0000000..dfbe9d7 Binary files /dev/null and b/dev/test_deps/paimon-python-java-bridge-0.9-SNAPSHOT.jar differ diff --git a/pypaimon/py4j/paimon-python-java-bridge/copyright.txt b/paimon-python-java-bridge/copyright.txt similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/copyright.txt rename to paimon-python-java-bridge/copyright.txt diff --git a/pypaimon/py4j/paimon-python-java-bridge/pom.xml b/paimon-python-java-bridge/pom.xml similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/pom.xml rename to paimon-python-java-bridge/pom.xml diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/BytesWriter.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/BytesWriter.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/BytesWriter.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/BytesWriter.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/FileLock.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/FileLock.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/FileLock.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/FileLock.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/InvocationUtil.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/InvocationUtil.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/InvocationUtil.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/InvocationUtil.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/NetUtils.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/NetUtils.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/NetUtils.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/NetUtils.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/ParallelBytesReader.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/ParallelBytesReader.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/ParallelBytesReader.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/ParallelBytesReader.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PredicationUtil.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PredicationUtil.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PredicationUtil.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PredicationUtil.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonEnvUtils.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonEnvUtils.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonEnvUtils.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonEnvUtils.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonGatewayServer.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonGatewayServer.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonGatewayServer.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/PythonGatewayServer.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/RecordBytesIterator.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/RecordBytesIterator.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/RecordBytesIterator.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/RecordBytesIterator.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/SchemaUtil.java b/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/SchemaUtil.java similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/java/org/apache/paimon/python/SchemaUtil.java rename to paimon-python-java-bridge/src/main/java/org/apache/paimon/python/SchemaUtil.java diff --git a/pypaimon/py4j/paimon-python-java-bridge/src/main/resources/META-INF/NOTICE b/paimon-python-java-bridge/src/main/resources/META-INF/NOTICE similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/src/main/resources/META-INF/NOTICE rename to paimon-python-java-bridge/src/main/resources/META-INF/NOTICE diff --git a/pypaimon/py4j/paimon-python-java-bridge/tools/ci/log4j.properties b/paimon-python-java-bridge/tools/ci/log4j.properties similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/tools/ci/log4j.properties rename to paimon-python-java-bridge/tools/ci/log4j.properties diff --git a/pypaimon/py4j/paimon-python-java-bridge/tools/maven/checkstyle.xml b/paimon-python-java-bridge/tools/maven/checkstyle.xml similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/tools/maven/checkstyle.xml rename to paimon-python-java-bridge/tools/maven/checkstyle.xml diff --git a/pypaimon/py4j/paimon-python-java-bridge/tools/maven/suppressions.xml b/paimon-python-java-bridge/tools/maven/suppressions.xml similarity index 100% rename from pypaimon/py4j/paimon-python-java-bridge/tools/maven/suppressions.xml rename to paimon-python-java-bridge/tools/maven/suppressions.xml diff --git a/pypaimon/py4j/gateway_server.py b/pypaimon/py4j/gateway_server.py index 9a259e0..f3a0fda 100644 --- a/pypaimon/py4j/gateway_server.py +++ b/pypaimon/py4j/gateway_server.py @@ -16,7 +16,7 @@ # limitations under the License. ################################################################################ -import importlib +import importlib.resources import os import platform import signal @@ -74,17 +74,21 @@ def preexec_func(): stdin=PIPE, stderr=PIPE, preexec_fn=preexec_fn, env=env) -_JAVA_IMPL_MODULE = 'pypaimon.py4j' -_JAVA_DEPS = 'java_dependencies' -_JAVA_BRIDGE = 'paimon-python-java-bridge' +_JAVA_DEPS_PACKAGE = 'pypaimon.jars' def _get_classpath(env): classpath = [] - module = importlib.import_module(_JAVA_IMPL_MODULE) - builtin_java_bridge = os.path.join(*module.__path__, _JAVA_DEPS, _JAVA_BRIDGE + '.jar') - classpath.append(builtin_java_bridge) + # note that jars are not packaged in test + test_mode = os.environ.get(constants.PYPAIMON4J_TEST_MODE) + if not test_mode or test_mode.lower() != "true": + jars = importlib.resources.files(_JAVA_DEPS_PACKAGE) + one_jar = next(iter(jars.iterdir()), None) + if not one_jar: + raise ValueError("Haven't found necessary python-java-bridge jar, this is unexpected.") + builtin_java_classpath = os.path.join(os.path.dirname(str(one_jar)), '*') + classpath.append(builtin_java_classpath) # user defined if constants.PYPAIMON_JAVA_CLASSPATH in env: diff --git a/pypaimon/py4j/tests/__init__.py b/pypaimon/py4j/tests/__init__.py index 65b48d4..931ef35 100644 --- a/pypaimon/py4j/tests/__init__.py +++ b/pypaimon/py4j/tests/__init__.py @@ -15,3 +15,35 @@ # See the License for the specific language governing permissions and # limitations under the License. ################################################################################ + +import os +import shutil +import tempfile +import unittest + +from pypaimon.py4j import constants, Catalog + + +class PypaimonTestBase(unittest.TestCase): + """ + Base class for unit tests. + """ + + @classmethod + def setUpClass(cls): + os.environ[constants.PYPAIMON4J_TEST_MODE] = 'true' + + this_dir = os.path.abspath(os.path.dirname(__file__)) + project_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_dir))) + deps = os.path.join(project_dir, "dev/test_deps/*") + os.environ[constants.PYPAIMON_HADOOP_CLASSPATH] = deps + + cls.tempdir = tempfile.mkdtemp() + cls.warehouse = os.path.join(cls.tempdir, 'warehouse') + cls.catalog = Catalog.create({'warehouse': cls.warehouse}) + cls.catalog.create_database('default', False) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.tempdir, ignore_errors=True) + del os.environ[constants.PYPAIMON4J_TEST_MODE] diff --git a/pypaimon/py4j/tests/test_data_types.py b/pypaimon/py4j/tests/test_data_types.py index 5fb809e..b0d0e41 100644 --- a/pypaimon/py4j/tests/test_data_types.py +++ b/pypaimon/py4j/tests/test_data_types.py @@ -16,43 +16,24 @@ # limitations under the License. ################################################################################ -import os import random -import shutil import string -import tempfile import pyarrow as pa -import unittest from pypaimon import Schema -from pypaimon.py4j import Catalog -from pypaimon.py4j.tests import utils +from pypaimon.py4j.tests import PypaimonTestBase from pypaimon.py4j.util import java_utils -from setup_utils import java_setuputils -class DataTypesTest(unittest.TestCase): +class DataTypesTest(PypaimonTestBase): @classmethod def setUpClass(cls): - java_setuputils.setup_java_bridge() - cls.hadoop_path = tempfile.mkdtemp() - utils.setup_hadoop_bundle_jar(cls.hadoop_path) - cls.warehouse = tempfile.mkdtemp() + super().setUpClass() cls.simple_pa_schema = pa.schema([ ('f0', pa.int32()), ('f1', pa.string()) ]) - cls.catalog = Catalog.create({'warehouse': cls.warehouse}) - cls.catalog.create_database('default', False) - - @classmethod - def tearDownClass(cls): - java_setuputils.clean() - if os.path.exists(cls.hadoop_path): - shutil.rmtree(cls.hadoop_path) - if os.path.exists(cls.warehouse): - shutil.rmtree(cls.warehouse) def test_int(self): pa_schema = pa.schema([ diff --git a/pypaimon/py4j/tests/test_preicates.py b/pypaimon/py4j/tests/test_preicates.py index 5b63759..f538c93 100644 --- a/pypaimon/py4j/tests/test_preicates.py +++ b/pypaimon/py4j/tests/test_preicates.py @@ -16,18 +16,12 @@ # limitations under the License. ################################################################################ -import os -import shutil -import tempfile -import unittest import random import pandas as pd import pyarrow as pa from pypaimon import Schema -from pypaimon.py4j import Catalog -from pypaimon.py4j.tests import utils -from setup_utils import java_setuputils +from pypaimon.py4j.tests import PypaimonTestBase def _check_filtered_result(read_builder, expected_df): @@ -38,41 +32,34 @@ def _check_filtered_result(read_builder, expected_df): actual_df.reset_index(drop=True), expected_df.reset_index(drop=True)) -# TODO: parquet has bug now +# TODO: Parquet has bug now. Fixed in 1.0. def _random_format(): return random.choice(['avro', 'orc']) -class PredicateTest(unittest.TestCase): +class PredicateTest(PypaimonTestBase): @classmethod def setUpClass(cls): - java_setuputils.setup_java_bridge() - cls.hadoop_path = tempfile.mkdtemp() - utils.setup_hadoop_bundle_jar(cls.hadoop_path) - cls.warehouse = tempfile.mkdtemp() - - catalog = Catalog.create({'warehouse': cls.warehouse}) - catalog.create_database('default', False) - + super().setUpClass() pa_schema = pa.schema([ ('f0', pa.int64()), ('f1', pa.string()), ]) - catalog.create_table('default.test_append', - Schema(pa_schema, options={'file.format': _random_format()}), - False) - catalog.create_table('default.test_pk', - Schema(pa_schema, primary_keys=['f0'], - options={'bucket': '1', 'file.format': _random_format()}), - False) + cls.catalog.create_table('default.test_append', + Schema(pa_schema, options={'file.format': _random_format()}), + False) + cls.catalog.create_table('default.test_pk', + Schema(pa_schema, primary_keys=['f0'], + options={'bucket': '1', 'file.format': _random_format()}), + False) df = pd.DataFrame({ 'f0': [1, 2, 3, 4, 5], 'f1': ['abc', 'abbc', 'bc', 'd', None], }) - append_table = catalog.get_table('default.test_append') + append_table = cls.catalog.get_table('default.test_append') write_builder = append_table.new_batch_write_builder() write = write_builder.new_write() commit = write_builder.new_commit() @@ -81,7 +68,7 @@ def setUpClass(cls): write.close() commit.close() - pk_table = catalog.get_table('default.test_pk') + pk_table = cls.catalog.get_table('default.test_pk') write_builder = pk_table.new_batch_write_builder() write = write_builder.new_write() commit = write_builder.new_commit() @@ -90,17 +77,8 @@ def setUpClass(cls): write.close() commit.close() - cls.catalog = catalog cls.df = df - @classmethod - def tearDownClass(cls): - java_setuputils.clean() - if os.path.exists(cls.hadoop_path): - shutil.rmtree(cls.hadoop_path) - if os.path.exists(cls.warehouse): - shutil.rmtree(cls.warehouse) - def testWrongFieldName(self): table = self.catalog.get_table('default.test_append') predicate_builder = table.new_read_builder().new_predicate_builder() diff --git a/pypaimon/py4j/tests/test_write_and_read.py b/pypaimon/py4j/tests/test_write_and_read.py index 27528d1..14e4138 100644 --- a/pypaimon/py4j/tests/test_write_and_read.py +++ b/pypaimon/py4j/tests/test_write_and_read.py @@ -16,10 +16,6 @@ # limitations under the License. ################################################################################ -import os -import shutil -import tempfile -import unittest import pandas as pd import pyarrow as pa from py4j.protocol import Py4JJavaError @@ -27,33 +23,19 @@ from pypaimon import Schema from pypaimon.py4j import Catalog from pypaimon.py4j.java_gateway import get_gateway -from pypaimon.py4j.tests import utils +from pypaimon.py4j.tests import PypaimonTestBase from pypaimon.py4j.util import java_utils -from setup_utils import java_setuputils -class TableWriteReadTest(unittest.TestCase): +class TableWriteReadTest(PypaimonTestBase): @classmethod def setUpClass(cls): - java_setuputils.setup_java_bridge() - cls.hadoop_path = tempfile.mkdtemp() - utils.setup_hadoop_bundle_jar(cls.hadoop_path) - cls.warehouse = tempfile.mkdtemp() + super().setUpClass() cls.simple_pa_schema = pa.schema([ ('f0', pa.int32()), ('f1', pa.string()) ]) - cls.catalog = Catalog.create({'warehouse': cls.warehouse}) - cls.catalog.create_database('default', False) - - @classmethod - def tearDownClass(cls): - java_setuputils.clean() - if os.path.exists(cls.hadoop_path): - shutil.rmtree(cls.hadoop_path) - if os.path.exists(cls.warehouse): - shutil.rmtree(cls.warehouse) def testReadEmptyAppendTable(self): schema = Schema(self.simple_pa_schema) diff --git a/pypaimon/py4j/util/constants.py b/pypaimon/py4j/util/constants.py index 1039de2..f223309 100644 --- a/pypaimon/py4j/util/constants.py +++ b/pypaimon/py4j/util/constants.py @@ -26,3 +26,6 @@ # ------------------------ for catalog options ------------------------ MAX_WORKERS = "max-workers" + +# ------------------ for tests (Please don't use it) ------------------ +PYPAIMON4J_TEST_MODE = '_PYPAIMON4J_TEST_MODE' diff --git a/setup_utils/version.py b/pypaimon/version.py similarity index 100% rename from setup_utils/version.py rename to pypaimon/version.py diff --git a/setup_utils/__init__.py b/setup.cfg similarity index 93% rename from setup_utils/__init__.py rename to setup.cfg index 27dc0ac..ecc3dda 100644 --- a/setup_utils/__init__.py +++ b/setup.cfg @@ -16,4 +16,5 @@ # limitations under the License. ################################################################################ -"""This module only contains utils for setup and won't be packaged.""" +[bdist_wheel] +universal = 1 diff --git a/setup.py b/setup.py index 628a6b8..4fc12a6 100644 --- a/setup.py +++ b/setup.py @@ -16,84 +16,65 @@ # limitations under the License. ################################################################################ -import fnmatch import os -import shutil -import setup_utils.java_setuputils as java_setuputils -import setup_utils.version +import sys -from setuptools import Command, setup - - -class CleanCommand(Command): - description = 'Clean up temporary files and directories of last build.' - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - directories_to_delete = ['build', 'dist', '*.egg-info'] - - for directory in directories_to_delete: - if '*' in directory: - for matched_dir in filter(lambda x: fnmatch.fnmatch(x, directory), os.listdir('.')): - if os.path.isdir(matched_dir): - shutil.rmtree(matched_dir) - else: - if os.path.exists(directory): - shutil.rmtree(directory) +from setuptools import setup +this_directory = os.path.abspath(os.path.dirname(__file__)) +version_file = os.path.join(this_directory, 'pypaimon/version.py') try: - PACKAGES = [ - 'pypaimon', - 'pypaimon.api', - 'pypaimon.py4j', - 'pypaimon.py4j.util' - ] + exec(open(version_file).read()) +except IOError: + print("Failed to load PyPaimon version file for packaging. " + + "'%s' not found!" % version_file, + file=sys.stderr) + sys.exit(-1) +VERSION = __version__ # noqa - PACKAGE_DATA = { - 'pypaimon.py4j': java_setuputils.get_package_data() - } +PACKAGES = [ + 'pypaimon', + 'pypaimon.api', + 'pypaimon.py4j', + 'pypaimon.py4j.util', + 'pypaimon.jars' +] - install_requires = [ - 'py4j==0.10.9.7', - 'python-dateutil>=2.8.0,<3', - 'pytz>=2018.3', - 'numpy>=1.22.4', - 'pandas>=1.3.0', - 'pyarrow>=5.0.0' - ] +install_requires = [ + 'py4j==0.10.9.7', + 'pandas>=1.3.0', + 'pyarrow>=5.0.0' +] - long_description = 'See Apache Paimon Python API \ - [Doc](https://paimon.apache.org/docs/master/program-api/python-api/) for usage.' +long_description = 'See Apache Paimon Python API \ +[Doc](https://paimon.apache.org/docs/master/program-api/python-api/) for usage.' - setup( - name='pypaimon', - version=setup_utils.version.__version__, - packages=PACKAGES, - include_package_data=True, - package_data=PACKAGE_DATA, - cmdclass={'clean': CleanCommand}, - install_requires=install_requires, - description='Apache Paimon Python API', - long_description=long_description, - long_description_content_type='text/markdown', - author='Apache Software Foundation', - author_email='dev@paimon.apache.org', - url='https://paimon.apache.org', - classifiers=[ - 'Development Status :: 4 - Beta', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11'], - python_requires='>=3.8' - ) -finally: - java_setuputils.clean() +setup( + name='pypaimon', + version=VERSION, + packages=PACKAGES, + include_package_data=True, + # releasing tool will generate deps + package_dir={ + "pypaimon.jars": "deps/jars" + }, + package_data={ + "pypaimon.jars": ["*.jar"] + }, + install_requires=install_requires, + description='Apache Paimon Python API', + long_description=long_description, + long_description_content_type='text/markdown', + author='Apache Software Foundation', + author_email='dev@paimon.apache.org', + url='https://paimon.apache.org', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11'], + python_requires='>=3.8' +) diff --git a/setup_utils/java_setuputils.py b/setup_utils/java_setuputils.py deleted file mode 100755 index 01b02e8..0000000 --- a/setup_utils/java_setuputils.py +++ /dev/null @@ -1,85 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -import os -import shutil -import subprocess - -from xml.etree import ElementTree - -_JAVA_IMPL_MODULE = 'pypaimon/py4j' -_JAVA_DEPS = 'java_dependencies' -_JAVA_BRIDGE = 'paimon-python-java-bridge' - -_PYPAIMON_TOX_TEST = '_PYPAIMON_TOX_TEST' - - -def get_package_data(): - is_tox_test = os.environ.get(_PYPAIMON_TOX_TEST) - if is_tox_test and is_tox_test.lower() == "true": - return [''] - - setup_java_bridge() - return [os.path.join(_JAVA_DEPS, '*')] - - -def clean(): - java_deps_dir = os.path.join(_find_java_impl_dir(), _JAVA_DEPS) - if os.path.exists(java_deps_dir): - shutil.rmtree(java_deps_dir) - - -def setup_java_bridge(): - java_impl_dir = _find_java_impl_dir() - - java_deps_dir = os.path.join(java_impl_dir, _JAVA_DEPS) - if not os.path.exists(java_deps_dir): - os.mkdir(java_deps_dir) - - java_bridge_dst = os.path.join(java_deps_dir, _JAVA_BRIDGE + '.jar') - if os.path.exists(java_bridge_dst): - return - - java_bridge_module = os.path.join(java_impl_dir, _JAVA_BRIDGE) - subprocess.run( - ["mvn", "clean", "package"], - cwd=java_bridge_module, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE - ) - - shutil.copy( - os.path.join(java_bridge_module, 'target/{}-{}.jar' - .format(_JAVA_BRIDGE, _extract_bridge_version())), - java_bridge_dst - ) - - -def _extract_bridge_version(): - pom_path = os.path.join(_find_java_impl_dir(), _JAVA_BRIDGE, 'pom.xml') - return ElementTree.parse(pom_path).getroot().find( - 'POM:version', - namespaces={ - 'POM': 'http://maven.apache.org/POM/4.0.0' - }).text - - -def _find_java_impl_dir(): - this_dir = os.path.abspath(os.path.dirname(__file__)) - paimon_python_dir = os.path.dirname(this_dir) - return os.path.join(paimon_python_dir, _JAVA_IMPL_MODULE) diff --git a/tools/releasing/create_binary_release.sh b/tools/releasing/create_binary_release.sh deleted file mode 100755 index 8188fb4..0000000 --- a/tools/releasing/create_binary_release.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -## -## Required variables -## -RELEASE_VERSION=${RELEASE_VERSION} - -if [ -z "${RELEASE_VERSION}" ]; then - echo "RELEASE_VERSION was not set" - exit 1 -fi - -# fail immediately -set -o errexit -set -o nounset - -CURR_DIR=`pwd` -BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" -PROJECT_ROOT="${BASE_DIR}/../../" - -# Sanity check to ensure that resolved paths are valid; a LICENSE file should always exist in project root -if [ ! -f ${PROJECT_ROOT}/LICENSE ]; then - echo "Project root path ${PROJECT_ROOT} is not valid; script may be in the wrong directory." - exit 1 -fi - -if [ "$(uname)" == "Darwin" ]; then - SHASUM="shasum -a 512" -else - SHASUM="sha512sum" -fi - -########################### - -RELEASE_DIR=${PROJECT_ROOT}/release/binary -rm -rf ${RELEASE_DIR} -mkdir -p ${RELEASE_DIR} - -# use lint-python.sh script to create a python environment. -dev/lint-python.sh -s basic -source dev/.conda/bin/activate - -# build -dev/build-wheels.sh - -WHEEL_FILE_NAME="pypaimon-${RELEASE_VERSION}-py3-none-any.whl" -cp "dist/${WHEEL_FILE_NAME}" "${RELEASE_DIR}/${WHEEL_FILE_NAME}" - -cd ${RELEASE_DIR} - -# Sign sha the wheel package -gpg --armor --detach-sig ${WHEEL_FILE_NAME} -$SHASUM ${WHEEL_FILE_NAME} > "${WHEEL_FILE_NAME}.sha512" - -cd ${CURR_DIR} diff --git a/tools/releasing/create_source_release.sh b/tools/releasing/create_source_release.sh index 7d6a8a9..ad12a42 100755 --- a/tools/releasing/create_source_release.sh +++ b/tools/releasing/create_source_release.sh @@ -18,12 +18,18 @@ # ## -## Required variables +## set build vars ## -RELEASE_VERSION=${RELEASE_VERSION} +OUTPUT_DIR=${OUTPUT_DIR} +GPG_PASSPHRASE=${GPG_PASSPHRASE} -if [ -z "${RELEASE_VERSION}" ]; then - echo "RELEASE_VERSION is unset" +if [ -z "${OUTPUT_DIR}" ]; then + echo "OUTPUT_DIR was not set" + exit 1 +fi + +if [ -z "${GPG_PASSPHRASE}" ]; then + echo "GPG_PASSPHRASE was not set" exit 1 fi @@ -33,7 +39,7 @@ set -o nounset CURR_DIR=`pwd` BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" -PROJECT_ROOT="$( cd "$( dirname "${BASE_DIR}/../../../" )" >/dev/null && pwd )" +PROJECT_ROOT="${BASE_DIR}/../../" # Sanity check to ensure that resolved paths are valid; a LICENSE file should always exist in project root if [ ! -f ${PROJECT_ROOT}/LICENSE ]; then @@ -43,48 +49,53 @@ fi if [ "$(uname)" == "Darwin" ]; then SHASUM="shasum -a 512" - TAR="tar --no-xattrs" else SHASUM="sha512sum" - TAR="tar" fi ########################### -RELEASE_DIR=${PROJECT_ROOT}/release/source -CLONE_DIR=${RELEASE_DIR}/paimon-tmp-clone +# prepare bridge jar + +DEPS_DIR=${PROJECT_ROOT}/deps/jars +rm -rf ${DEPS_DIR} +mkdir -p ${DEPS_DIR} + +cd ${PROJECT_ROOT}/paimon-python-java-bridge -rm -rf ${RELEASE_DIR} -mkdir -p ${RELEASE_DIR} +# check there is no snapshot dependencies +if grep -q ".*SNAPSHOT" "pom.xml"; then + echo "paimon-python-java-bridge is snapshot or contains snapshot dependencies" + exit 1 +fi + +# get bridge jar version +JAR_VERSION=$(sed -n 's/.*\(.*\)<\/version>.*/\1/p' pom.xml | head -n 1) -# delete the temporary release directory on error -trap 'rm -rf ${RELEASE_DIR}' ERR +mvn clean install -DskipTests +cp "target/paimon-python-java-bridge-${JAR_VERSION}.jar" ${DEPS_DIR} + +cd ${CURR_DIR} -echo "Creating source package" +# build source release -# create a temporary git clone to ensure that we have a pristine source release -git clone ${PROJECT_ROOT} ${CLONE_DIR} +# get release version +RELEASE_VERSION=$(sed -n 's/^__version__ = "\(.*\)"/\1/p' ${PROJECT_ROOT}/pypaimon/version.py) -cd ${CLONE_DIR} -JAVA_ROOT="pypaimon/py4j/paimon-python-java-bridge" -rsync -a \ - --exclude ".DS_Store" --exclude ".asf.yaml" --exclude ".git" \ - --exclude ".github" --exclude ".gitignore" --exclude ".idea" \ - --exclude ".mypy_cache" --exclude ".tox" --exclude "__pycache__" \ - --exclude "build" --exclude "dist" --exclude "*.egg-info" \ - --exclude "dev/.conda" --exclude "dev/.stage.txt" \ - --exclude "dev/download" --exclude "dev/log" --exclude "**/__pycache__" \ - --exclude "${JAVA_ROOT}/dependency-reduced-pom.xml" \ - --exclude "${JAVA_ROOT}/target" \ - . paimon-python-${RELEASE_VERSION} +# use lint-python.sh script to create a python environment. +dev/lint-python.sh -s basic +source dev/.conda/bin/activate -TAR czf ${RELEASE_DIR}/apache-paimon-python-${RELEASE_VERSION}-src.tgz paimon-python-${RELEASE_VERSION} -gpg --armor --detach-sig ${RELEASE_DIR}/apache-paimon-python-${RELEASE_VERSION}-src.tgz -cd ${RELEASE_DIR} -${SHASUM} apache-paimon-python-${RELEASE_VERSION}-src.tgz > apache-paimon-python-${RELEASE_VERSION}-src.tgz.sha512 +python setup.py sdist +conda deactivate +PACKAGE_FILE="pypaimon-${RELEASE_VERSION}.tar.gz" +cp "dist/${PACKAGE_FILE}" "${OUTPUT_DIR}/${PACKAGE_FILE}" -rm -rf ${CLONE_DIR} +cd ${OUTPUT_DIR} -echo "Done. Source release package and signatures created under ${RELEASE_DIR}/." +# Sign sha the wheel package +gpg --batch --yes --pinentry-mode loopback --passphrase=$GPG_PASSPHRASE --armor --detach-sign ${PACKAGE_FILE} +$SHASUM ${PACKAGE_FILE} > "${PACKAGE_FILE}.sha512" +rm -rf DEPS_DIR cd ${CURR_DIR} diff --git a/tools/releasing/update_branch_version.sh b/tools/releasing/update_branch_version.sh index 5c8fc49..9c20171 100755 --- a/tools/releasing/update_branch_version.sh +++ b/tools/releasing/update_branch_version.sh @@ -34,7 +34,7 @@ set -o nounset CURR_DIR=`pwd` BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" PROJECT_ROOT="${BASE_DIR}/../../" -SETUP_UTILS_DIR="${PROJECT_ROOT}/setup_utils" +PYPAIMON_DIR="${PROJECT_ROOT}/pypaimon" # Sanity check to ensure that resolved paths are valid; a LICENSE file should always exist in project root if [ ! -f ${PROJECT_ROOT}/LICENSE ]; then @@ -44,7 +44,7 @@ fi ########################### -cd ${SETUP_UTILS_DIR}/ +cd ${PYPAIMON_DIR}/ # change version perl -pi -e "s#^__version__ = \".*\"#__version__ = \"${NEW_VERSION}\"#" version.py