1515:status: Alpha
1616"""
1717
18+ from __future__ import unicode_literals
19+
20+ import sys
21+
22+
1823# Module version
1924__version_info__ = (0 , 3 , 0 )
2025__version__ = "." .join (str (x ) for x in __version_info__ )
2732
2833# ------------------------------------------------------------------------------
2934
35+ if sys .version_info [0 ] >= 3 :
36+ unicode_char = chr
37+
38+ def byte_to_int (data ):
39+ # type: (bytes) -> int
40+ """
41+ Converts the first byte of the given data to an integer
42+ """
43+ if isinstance (data , int ):
44+ return data
45+ elif isinstance (data , bytes ):
46+ return data [0 ]
47+
48+
49+ else :
50+ unicode_char = unichr # pylint:disable=undefined-variable
51+
52+ def byte_to_int (data ):
53+ # type: (bytes) -> int
54+ """
55+ Converts the first byte of the given data to an integer
56+ """
57+ if isinstance (data , int ):
58+ return data
59+ elif isinstance (data , str ):
60+ return ord (data [0 ])
61+
62+
63+ # ------------------------------------------------------------------------------
64+
3065
3166class DecodeMap (object ):
3267 """
@@ -70,7 +105,11 @@ def apply(self, byte, value, data, i, count):
70105 value |= byte & self .mask2
71106 else :
72107 raise UnicodeDecodeError (
73- NAME , data , i , i + count , "invalid {}-byte sequence" .format (self .count )
108+ NAME ,
109+ data ,
110+ i ,
111+ i + count ,
112+ "invalid {}-byte sequence" .format (self .count ),
74113 )
75114 return value
76115
@@ -171,8 +210,8 @@ def decode_modified_utf8(data, errors="strict"):
171210 :return: unicode text and length
172211 :raises UnicodeDecodeError: sequence is invalid.
173212 """
174- value , length = u "" , 0
175- it = iter (decoder (data ))
213+ value , length = "" , 0
214+ it = iter (decoder (byte_to_int ( d ) for d in data ))
176215 while True :
177216 try :
178217 value += next (it )
@@ -185,10 +224,10 @@ def decode_modified_utf8(data, errors="strict"):
185224 elif errors == "ignore" :
186225 pass
187226 elif errors == "replace" :
188- value += u "\uFFFD "
227+ value += "\uFFFD "
189228 length += 1
190229 return value , length
191230
192231
193232def mutf8_unichr (value ):
194- return chr (value )
233+ return unicode_char (value )
0 commit comments