Skip to content

Commit 1942522

Browse files
committed
Corrected reading process for Python 3
Tests pass in both Python 2 and 3
1 parent 962313b commit 1942522

File tree

2 files changed

+86
-11
lines changed

2 files changed

+86
-11
lines changed

README.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,4 @@ This fork intends to work both on Python 2.7 and Python 3.
1919
TODO
2020
****
2121

22-
* Correct I/O handling in Python 3 (bytes versus strings)
2322
* Correct tests (Swing, ...)

javaobj.py

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@
4949

5050
if sys.version_info[0] < 3:
5151
# Python 2
52-
from StringIO import StringIO
52+
from StringIO import StringIO as BytesIO
5353

5454
else:
5555
# Python 3+
56-
from io import BytesIO as StringIO
56+
from io import BytesIO
5757

5858
# ------------------------------------------------------------------------------
5959

@@ -82,6 +82,76 @@ def log_error(message, ident=0):
8282

8383
# ------------------------------------------------------------------------------
8484

85+
if sys.version_info[0] >= 3:
86+
# Python 3 interpreter : bytes & str
87+
def to_bytes(data, encoding="UTF-8"):
88+
"""
89+
Converts the given string to an array of bytes.
90+
Returns the first parameter if it is already an array of bytes.
91+
92+
:param data: A unicode string
93+
:param encoding: The encoding of data
94+
:return: The corresponding array of bytes
95+
"""
96+
if type(data) is bytes:
97+
# Nothing to do
98+
return data
99+
100+
return data.encode(encoding)
101+
102+
103+
def to_str(data, encoding="UTF-8"):
104+
"""
105+
Converts the given parameter to a string.
106+
Returns the first parameter if it is already an instance of ``str``.
107+
108+
:param data: A string
109+
:param encoding: The encoding of data
110+
:return: The corresponding string
111+
"""
112+
if type(data) is str:
113+
# Nothing to do
114+
return data
115+
116+
return str(data, encoding)
117+
118+
119+
def read_to_str(data):
120+
"""
121+
Concats all bytes into a string
122+
"""
123+
return ''.join(chr(char) for char in data)
124+
125+
else:
126+
# Python 2 interpreter : str & unicode
127+
def to_str(data, encoding="UTF-8"):
128+
"""
129+
Converts the given parameter to a string.
130+
Returns the first parameter if it is already an instance of ``str``.
131+
132+
:param data: A string
133+
:param encoding: The encoding of data
134+
:return: The corresponding string
135+
"""
136+
if type(data) is str:
137+
# Nothing to do
138+
return data
139+
140+
return data.encode(encoding)
141+
142+
143+
# Same operation
144+
to_bytes = to_str
145+
146+
147+
def read_to_str(data):
148+
"""
149+
Nothing to do in Python 2
150+
"""
151+
return data
152+
153+
# ------------------------------------------------------------------------------
154+
85155
def load(file_object):
86156
"""
87157
Deserializes Java primitive data and objects serialized using
@@ -103,8 +173,8 @@ def loads(string):
103173
:param string: A Java data string
104174
:return: The deserialized object
105175
"""
106-
f = StringIO(string)
107-
marshaller = JavaObjectUnmarshaller(f)
176+
file_like = BytesIO(string)
177+
marshaller = JavaObjectUnmarshaller(file_like)
108178
marshaller.add_transformer(DefaultObjectTransformer())
109179
return marshaller.readObject()
110180

@@ -267,7 +337,7 @@ def __init__(self, stream=None):
267337
"""
268338
Sets up members
269339
270-
:param stream: An optional input stream
340+
:param stream: An optional input stream (opened in binary/bytes mode)
271341
"""
272342
self.opmap = {
273343
self.TC_NULL: self.do_null,
@@ -389,7 +459,7 @@ def _readString(self):
389459
"""
390460
(length,) = self._readStruct(">H")
391461
ba = self.object_stream.read(length)
392-
return ba
462+
return to_str(ba)
393463

394464

395465
def do_classdesc(self, parent=None, ident=0):
@@ -487,7 +557,9 @@ def do_blockdata(self, parent=None, ident=0):
487557
log_debug("[blockdata]", ident)
488558
(length,) = self._readStruct(">B")
489559
ba = self.object_stream.read(length)
490-
return ba
560+
561+
# Ensure we have an str
562+
return read_to_str(ba)
491563

492564

493565
def do_class(self, parent=None, ident=0):
@@ -613,8 +685,8 @@ def do_string(self, parent=None, ident=0):
613685
"""
614686
log_debug("[string]", ident)
615687
ba = self._readString()
616-
self._add_reference(str(ba))
617-
return str(ba)
688+
self._add_reference(ba)
689+
return ba
618690

619691
def do_array(self, parent=None, ident=0):
620692
"""
@@ -827,7 +899,7 @@ def dump(self, obj):
827899
Dumps the given object in the Java serialization format
828900
"""
829901
self.object_obj = obj
830-
self.object_stream = StringIO()
902+
self.object_stream = BytesIO()
831903
self._writeStreamHeader()
832904
self.writeObject(obj)
833905
return self.object_stream.getvalue()
@@ -891,8 +963,12 @@ def write_blockdata(self, obj, parent=None):
891963
# TC_BLOCKDATA (unsigned byte)<size> (byte)[size]
892964
self._writeStruct(">B", 1, (self.TC_BLOCKDATA,))
893965
if type(obj) is str:
966+
# Latin-1: keep bytes as is
967+
obj = to_bytes(obj, "latin-1")
894968
self._writeStruct(">B", 1, (len(obj),))
895969
self.object_stream.write(obj)
970+
else:
971+
log_error("Not a str blockdata: %r" % obj)
896972

897973

898974
def write_object(self, obj, parent=None):

0 commit comments

Comments
 (0)