Skip to content

Commit 6581f65

Browse files
authored
Remove flink-shaded-hadoop-2-uber in java-bridge (#14)
1 parent 7e49b66 commit 6581f65

File tree

6 files changed

+73
-16
lines changed

6 files changed

+73
-16
lines changed

paimon_python_java/gateway_server.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,30 @@ def preexec_func():
7979

8080

8181
def _get_classpath(env):
82-
user_defined = env.get(constants.PYPAIMON_JAVA_CLASSPATH)
82+
classpath = []
8383

8484
module = importlib.import_module(_JAVA_IMPL_MODULE)
8585
builtin_java_bridge = os.path.join(*module.__path__, _JAVA_DEPS, _JAVA_BRIDGE + '.jar')
86+
classpath.append(builtin_java_bridge)
8687

87-
if user_defined is None:
88-
return builtin_java_bridge
88+
# user defined
89+
if constants.PYPAIMON_JAVA_CLASSPATH in env:
90+
classpath.append(env[constants.PYPAIMON_JAVA_CLASSPATH])
91+
92+
# hadoop
93+
hadoop_classpath = _get_hadoop_classpath(env)
94+
if hadoop_classpath is not None:
95+
classpath.append(hadoop_classpath)
96+
97+
return os.pathsep.join(classpath)
98+
99+
100+
def _get_hadoop_classpath(env):
101+
if constants.PYPAIMON_HADOOP_CLASSPATH in env:
102+
return env[constants.PYPAIMON_HADOOP_CLASSPATH]
103+
104+
if 'HADOOP_CLASSPATH' in env:
105+
return None
89106
else:
90-
return os.pathsep.join([builtin_java_bridge, user_defined])
107+
raise EnvironmentError(f"You haven't set '{constants.PYPAIMON_HADOOP_CLASSPATH}', \
108+
and 'HADOOP_CLASSPATH' is also not set. Ensure one of them is set.")

paimon_python_java/paimon-python-java-bridge/pom.xml

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030

3131
<properties>
3232
<paimon.version>0.9.0</paimon.version>
33-
<flink.shaded.hadoop.version>2.8.3-10.0</flink.shaded.hadoop.version>
3433
<py4j.version>0.10.9.7</py4j.version>
3534
<slf4j.version>1.7.32</slf4j.version>
3635
<log4j.version>2.17.1</log4j.version>
@@ -68,12 +67,6 @@
6867
<version>${log4j.version}</version>
6968
</dependency>
7069

71-
<dependency>
72-
<groupId>org.apache.flink</groupId>
73-
<artifactId>flink-shaded-hadoop-2-uber</artifactId>
74-
<version>${flink.shaded.hadoop.version}</version>
75-
</dependency>
76-
7770
<dependency>
7871
<groupId>org.apache.arrow</groupId>
7972
<artifactId>arrow-vector</artifactId>
@@ -178,7 +171,6 @@
178171
<include>com.google.flatbuffers:flatbuffers-java</include>
179172
<include>org.slf4j:slf4j-api</include>
180173
<include>org.apache.logging.log4j:log4j-1.2-api</include>
181-
<include>org.apache.flink:flink-shaded-hadoop-2-uber</include>
182174
<include>net.sf.py4j:py4j</include>
183175
</includes>
184176
</artifactSet>

paimon_python_java/tests/test_data_types.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
import pyarrow as pa
2525
import unittest
2626

27-
2827
from paimon_python_api import Schema
2928
from paimon_python_java import Catalog
29+
from paimon_python_java.tests import utils
3030
from paimon_python_java.util import java_utils
3131
from setup_utils import java_setuputils
3232

@@ -36,6 +36,8 @@ class DataTypesTest(unittest.TestCase):
3636
@classmethod
3737
def setUpClass(cls):
3838
java_setuputils.setup_java_bridge()
39+
cls.hadoop_path = tempfile.mkdtemp()
40+
utils.setup_hadoop_bundle_jar(cls.hadoop_path)
3941
cls.warehouse = tempfile.mkdtemp()
4042
cls.simple_pa_schema = pa.schema([
4143
('f0', pa.int32()),
@@ -47,6 +49,8 @@ def setUpClass(cls):
4749
@classmethod
4850
def tearDownClass(cls):
4951
java_setuputils.clean()
52+
if os.path.exists(cls.hadoop_path):
53+
shutil.rmtree(cls.hadoop_path)
5054
if os.path.exists(cls.warehouse):
5155
shutil.rmtree(cls.warehouse)
5256

paimon_python_java/tests/test_write_and_read.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,23 @@
2222
import unittest
2323
import pandas as pd
2424
import pyarrow as pa
25-
import setup_utils.java_setuputils as setuputils
2625

2726
from paimon_python_api import Schema
2827
from paimon_python_java import Catalog
2928
from paimon_python_java.java_gateway import get_gateway
29+
from paimon_python_java.tests import utils
3030
from paimon_python_java.util import java_utils
3131
from py4j.protocol import Py4JJavaError
32+
from setup_utils import java_setuputils
3233

3334

3435
class TableWriteReadTest(unittest.TestCase):
3536

3637
@classmethod
3738
def setUpClass(cls):
38-
setuputils.setup_java_bridge()
39+
java_setuputils.setup_java_bridge()
40+
cls.hadoop_path = tempfile.mkdtemp()
41+
utils.setup_hadoop_bundle_jar(cls.hadoop_path)
3942
cls.warehouse = tempfile.mkdtemp()
4043
cls.simple_pa_schema = pa.schema([
4144
('f0', pa.int32()),
@@ -46,7 +49,9 @@ def setUpClass(cls):
4649

4750
@classmethod
4851
def tearDownClass(cls):
49-
setuputils.clean()
52+
java_setuputils.clean()
53+
if os.path.exists(cls.hadoop_path):
54+
shutil.rmtree(cls.hadoop_path)
5055
if os.path.exists(cls.warehouse):
5156
shutil.rmtree(cls.warehouse)
5257

paimon_python_java/tests/utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
################################################################################
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
################################################################################
18+
19+
import os
20+
import urllib.request
21+
22+
from paimon_python_java.util import constants
23+
24+
25+
def setup_hadoop_bundle_jar(hadoop_dir):
26+
url = 'https://repo.maven.apache.org/maven2/org/apache/flink/' \
27+
'flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar'
28+
29+
response = urllib.request.urlopen(url)
30+
if not os.path.exists(hadoop_dir):
31+
os.mkdir(hadoop_dir)
32+
33+
jar_path = os.path.join(hadoop_dir, "bundled-hadoop.jar")
34+
with open(jar_path, 'wb') as file:
35+
file.write(response.read())
36+
37+
os.environ[constants.PYPAIMON_HADOOP_CLASSPATH] = jar_path

paimon_python_java/util/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
PYPAIMON_CONN_INFO_PATH = '_PYPAIMON_CONN_INFO_PATH'
2121
PYPAIMON_JVM_ARGS = '_PYPAIMON_JVM_ARGS'
2222
PYPAIMON_JAVA_CLASSPATH = '_PYPAIMON_JAVA_CLASSPATH'
23+
PYPAIMON_HADOOP_CLASSPATH = '_PYPAIMON_HADOOP_CLASSPATH'
2324
PYPAIMON_MAIN_CLASS = 'org.apache.paimon.python.PythonGatewayServer'
2425
PYPAIMON_MAIN_ARGS = '_PYPAIMON_MAIN_ARGS'
2526

0 commit comments

Comments
 (0)