Skip to content

Commit a5e9be2

Browse files
committed
Better Py2/3 cross compatibility in modified_utf8
and added bytes_char in utils
1 parent 52d1339 commit a5e9be2

File tree

2 files changed

+48
-5
lines changed

2 files changed

+48
-5
lines changed

javaobj/modifiedutf8.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
:status: Alpha
1616
"""
1717

18+
from __future__ import unicode_literals
19+
20+
import sys
21+
22+
1823
# Module version
1924
__version_info__ = (0, 3, 0)
2025
__version__ = ".".join(str(x) for x in __version_info__)
@@ -27,6 +32,36 @@
2732

2833
# ------------------------------------------------------------------------------
2934

35+
if sys.version_info[0] >= 3:
36+
unicode_char = chr
37+
38+
def byte_to_int(data):
39+
# type: (bytes) -> int
40+
"""
41+
Converts the first byte of the given data to an integer
42+
"""
43+
if isinstance(data, int):
44+
return data
45+
elif isinstance(data, bytes):
46+
return data[0]
47+
48+
49+
else:
50+
unicode_char = unichr # pylint:disable=undefined-variable
51+
52+
def byte_to_int(data):
53+
# type: (bytes) -> int
54+
"""
55+
Converts the first byte of the given data to an integer
56+
"""
57+
if isinstance(data, int):
58+
return data
59+
elif isinstance(data, str):
60+
return ord(data[0])
61+
62+
63+
# ------------------------------------------------------------------------------
64+
3065

3166
class DecodeMap(object):
3267
"""
@@ -70,7 +105,11 @@ def apply(self, byte, value, data, i, count):
70105
value |= byte & self.mask2
71106
else:
72107
raise UnicodeDecodeError(
73-
NAME, data, i, i + count, "invalid {}-byte sequence".format(self.count)
108+
NAME,
109+
data,
110+
i,
111+
i + count,
112+
"invalid {}-byte sequence".format(self.count),
74113
)
75114
return value
76115

@@ -171,8 +210,8 @@ def decode_modified_utf8(data, errors="strict"):
171210
:return: unicode text and length
172211
:raises UnicodeDecodeError: sequence is invalid.
173212
"""
174-
value, length = u"", 0
175-
it = iter(decoder(data))
213+
value, length = "", 0
214+
it = iter(decoder(byte_to_int(d) for d in data))
176215
while True:
177216
try:
178217
value += next(it)
@@ -185,10 +224,10 @@ def decode_modified_utf8(data, errors="strict"):
185224
elif errors == "ignore":
186225
pass
187226
elif errors == "replace":
188-
value += u"\uFFFD"
227+
value += "\uFFFD"
189228
length += 1
190229
return value, length
191230

192231

193232
def mutf8_unichr(value):
194-
return chr(value)
233+
return unicode_char(value)

javaobj/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ def hexdump(src, start_offset=0, length=16):
141141
UNICODE_TYPE = str
142142
unicode_char = chr
143143

144+
def bytes_char(c):
145+
return bytes((c,))
146+
144147
# Python 3 interpreter : bytes & str
145148
def to_bytes(data, encoding="UTF-8"):
146149
"""
@@ -186,6 +189,7 @@ def read_to_str(data):
186189
else:
187190
UNICODE_TYPE = unicode # pylint:disable=undefined-variable
188191
unicode_char = unichr # pylint:disable=undefined-variable
192+
bytes_char = chr
189193

190194
# Python 2 interpreter : str & unicode
191195
def to_str(data, encoding="UTF-8"):

0 commit comments

Comments
 (0)