Skip to content

Commit 55e54ee

Browse files
committed
Split modules into v1 and v2 packages
* Moved the old parser to the "v1" package * Renamed "deserialize" to "v2" * Use enum in constants to sanitize everything (seems to introduce some bugs) * Split the old parser into different modules to reduce its size a bit (to be continued) * Moved some v1 methods to the shared utils * Updated the tests to use the v1 package (still failing) See #32 and #34
1 parent d98961e commit 55e54ee

File tree

14 files changed

+1159
-997
lines changed

14 files changed

+1159
-997
lines changed

javaobj/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
"""
3535

3636
# Imports giving access to what the javaobj module provides
37-
from javaobj.core import *
37+
from javaobj.v1.beans import *
38+
from javaobj.v1.core import *
39+
from javaobj.v1.transformers import *
3840

3941
# ------------------------------------------------------------------------------
4042

javaobj/constants.py

Lines changed: 136 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -24,52 +24,142 @@
2424
limitations under the License.
2525
"""
2626

27-
STREAM_MAGIC = 0xACED
28-
STREAM_VERSION = 0x05
29-
30-
BASE_REFERENCE_IDX = 0x7E0000
31-
32-
TC_NULL = 0x70
33-
TC_REFERENCE = 0x71
34-
TC_CLASSDESC = 0x72
35-
TC_OBJECT = 0x73
36-
TC_STRING = 0x74
37-
TC_ARRAY = 0x75
38-
TC_CLASS = 0x76
39-
TC_BLOCKDATA = 0x77
40-
TC_ENDBLOCKDATA = 0x78
41-
TC_RESET = 0x79
42-
TC_BLOCKDATALONG = 0x7A
43-
TC_EXCEPTION = 0x7B
44-
TC_LONGSTRING = 0x7C
45-
TC_PROXYCLASSDESC = 0x7D
46-
TC_ENUM = 0x7E
47-
48-
SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE
49-
SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE
50-
SC_SERIALIZABLE = 0x02
51-
SC_EXTERNALIZABLE = 0x04
52-
SC_ENUM = 0x10
53-
54-
# type definition chars (typecode)
55-
TYPE_BYTE = ord("B") # 0x42
56-
TYPE_CHAR = ord("C") # 0x43
57-
TYPE_DOUBLE = ord("D") # 0x44
58-
TYPE_FLOAT = ord("F") # 0x46
59-
TYPE_INTEGER = ord("I") # 0x49
60-
TYPE_LONG = ord("J") # 0x4A
61-
TYPE_SHORT = ord("S") # 0x53
62-
TYPE_BOOLEAN = ord("Z") # 0x5A
63-
TYPE_OBJECT = ord("L") # 0x4C
64-
TYPE_ARRAY = ord("[") # 0x5B
27+
import enum
6528

29+
30+
__all__ = (
31+
"PRIMITIVE_TYPES",
32+
"StreamConstants",
33+
"TerminalCode",
34+
"ClassDescFlags",
35+
"TypeCode",
36+
"StreamCodeDebug",
37+
)
38+
39+
40+
class StreamConstants(enum.IntEnum):
41+
"""
42+
Basic constants of the stream protocol
43+
"""
44+
45+
# Magic bytes of any serialized files
46+
STREAM_MAGIC = 0xACED
47+
48+
# Only protocol version supported by javaobj
49+
STREAM_VERSION = 0x05
50+
51+
# Base index for handles
52+
BASE_REFERENCE_IDX = 0x7E0000
53+
54+
55+
class TerminalCode(enum.IntEnum):
56+
"""
57+
Stream type Codes
58+
"""
59+
60+
TC_NULL = 0x70
61+
TC_REFERENCE = 0x71
62+
TC_CLASSDESC = 0x72
63+
TC_OBJECT = 0x73
64+
TC_STRING = 0x74
65+
TC_ARRAY = 0x75
66+
TC_CLASS = 0x76
67+
TC_BLOCKDATA = 0x77
68+
TC_ENDBLOCKDATA = 0x78
69+
TC_RESET = 0x79
70+
TC_BLOCKDATALONG = 0x7A
71+
TC_EXCEPTION = 0x7B
72+
TC_LONGSTRING = 0x7C
73+
TC_PROXYCLASSDESC = 0x7D
74+
TC_ENUM = 0x7E
75+
# Ignore TC_MAX: we don't use it and it messes with TC_ENUM
76+
# TC_MAX = 0x7E
77+
78+
79+
class ClassDescFlags(enum.IntFlag):
80+
"""
81+
Class description flags
82+
"""
83+
84+
SC_WRITE_METHOD = 0x01 # if SC_SERIALIZABLE
85+
SC_BLOCK_DATA = 0x08 # if SC_EXTERNALIZABLE
86+
SC_SERIALIZABLE = 0x02
87+
SC_EXTERNALIZABLE = 0x04
88+
SC_ENUM = 0x10
89+
90+
91+
class TypeCode(enum.IntEnum):
92+
"""
93+
Type definition chars (typecode)
94+
"""
95+
96+
# Primitive types
97+
TYPE_BYTE = ord("B") # 0x42
98+
TYPE_CHAR = ord("C") # 0x43
99+
TYPE_DOUBLE = ord("D") # 0x44
100+
TYPE_FLOAT = ord("F") # 0x46
101+
TYPE_INTEGER = ord("I") # 0x49
102+
TYPE_LONG = ord("J") # 0x4A
103+
TYPE_SHORT = ord("S") # 0x53
104+
TYPE_BOOLEAN = ord("Z") # 0x5A
105+
# Object types
106+
TYPE_OBJECT = ord("L") # 0x4C
107+
TYPE_ARRAY = ord("[") # 0x5B
108+
109+
110+
# List of the types defined as primitive
66111
PRIMITIVE_TYPES = (
67-
TYPE_BYTE,
68-
TYPE_CHAR,
69-
TYPE_DOUBLE,
70-
TYPE_FLOAT,
71-
TYPE_INTEGER,
72-
TYPE_LONG,
73-
TYPE_SHORT,
74-
TYPE_BOOLEAN,
112+
TypeCode.TYPE_BYTE,
113+
TypeCode.TYPE_CHAR,
114+
TypeCode.TYPE_DOUBLE,
115+
TypeCode.TYPE_FLOAT,
116+
TypeCode.TYPE_INTEGER,
117+
TypeCode.TYPE_LONG,
118+
TypeCode.TYPE_SHORT,
119+
TypeCode.TYPE_BOOLEAN,
75120
)
121+
122+
123+
class StreamCodeDebug:
124+
"""
125+
Codes utility methods
126+
"""
127+
128+
@staticmethod
129+
def op_id(op_id):
130+
# type: (int) -> str
131+
"""
132+
Returns the name of the given OP Code
133+
:param op_id: OP Code
134+
:return: Name of the OP Code
135+
"""
136+
try:
137+
return TerminalCode(op_id).name
138+
except ValueError:
139+
return "<unknown TC:{0}>".format(op_id)
140+
141+
@staticmethod
142+
def type_code(type_id):
143+
# type: (int) -> str
144+
"""
145+
Returns the name of the given Type Code
146+
:param type_id: Type code
147+
:return: Name of the type code
148+
"""
149+
try:
150+
return TypeCode(type_id).name
151+
except ValueError:
152+
return "<unknown TypeCode:{0}>".format(type_id)
153+
154+
@staticmethod
155+
def flags(flags):
156+
# type: (int) -> str
157+
"""
158+
Returns the names of the class description flags found in the given
159+
integer
160+
161+
:param flags: A class description flag entry
162+
:return: The flags names as a single string
163+
"""
164+
names = sorted(key.name for key in ClassDescFlags if key & flags)
165+
return ", ".join(names)

javaobj/utils.py

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,15 @@
2727
limitations under the License.
2828
"""
2929

30+
from __future__ import absolute_import
31+
3032
# Standard library
3133
import logging
34+
import struct
3235
import sys
3336

3437
# Modified UTF-8 parser
35-
from javaobj.modifiedutf8 import decode_modified_utf8
38+
from .modifiedutf8 import decode_modified_utf8
3639

3740
# ------------------------------------------------------------------------------
3841

@@ -71,6 +74,69 @@ def log_error(message, ident=0):
7174

7275
# ------------------------------------------------------------------------------
7376

77+
78+
def read_struct(data, fmt_str):
79+
# type: (bytes, str) -> list
80+
"""
81+
Reads input bytes and extract the given structure. Returns both the read
82+
elements and the remaining data
83+
84+
:param data: Data as bytes
85+
:param fmt_str: Struct unpack format string
86+
:return: A tuple (results as tuple, remaining data)
87+
"""
88+
size = struct.calcsize(fmt_str)
89+
return struct.unpack(fmt_str, data[:size]), data[size:]
90+
91+
92+
def read_string(data, length_fmt="H"):
93+
# type: (bytes, str) -> UNICODE_TYPE
94+
"""
95+
Reads a serialized string
96+
97+
:param data: Bytes where to read the string from
98+
:param length_fmt: Structure format of the string length (H or Q)
99+
:return: The deserialized string
100+
"""
101+
(length,), data = read_struct(data, ">{0}".format(length_fmt))
102+
ba, data = data[:length], data[length:]
103+
return to_unicode(ba), data
104+
105+
106+
# ------------------------------------------------------------------------------
107+
108+
109+
def hexdump(src, start_offset=0, length=16):
110+
# type: (str, int, int) -> str
111+
"""
112+
Prepares an hexadecimal dump string
113+
114+
:param src: A string containing binary data
115+
:param start_offset: The start offset of the source
116+
:param length: Length of a dump line
117+
:return: A dump string
118+
"""
119+
FILTER = "".join(
120+
(len(repr(chr(x))) == 3) and chr(x) or "." for x in range(256)
121+
)
122+
pattern = "{{0:04X}} {{1:<{0}}} {{2}}\n".format(length * 3)
123+
124+
# Convert raw data to str (Python 3 compatibility)
125+
src = to_str(src, "latin-1")
126+
127+
result = []
128+
for i in range(0, len(src), length):
129+
s = src[i : i + length]
130+
hexa = " ".join("{0:02X}".format(ord(x)) for x in s)
131+
printable = s.translate(FILTER)
132+
result.append(pattern.format(i + start_offset, hexa, printable))
133+
134+
return "".join(result)
135+
136+
137+
# ------------------------------------------------------------------------------
138+
139+
74140
if sys.version_info[0] >= 3:
75141
UNICODE_TYPE = str
76142
unicode_char = chr
@@ -118,8 +184,8 @@ def read_to_str(data):
118184

119185

120186
else:
121-
UNICODE_TYPE = unicode
122-
unicode_char = unichr
187+
UNICODE_TYPE = unicode # pylint:disable=undefined-variable
188+
unicode_char = unichr # pylint:disable=undefined-variable
123189

124190
# Python 2 interpreter : str & unicode
125191
def to_str(data, encoding="UTF-8"):
@@ -149,7 +215,7 @@ def to_unicode(data, encoding="UTF-8"):
149215
:param encoding: The encoding of data
150216
:return: The corresponding string
151217
"""
152-
if type(data) is unicode:
218+
if type(data) is UNICODE_TYPE:
153219
# Nothing to do
154220
return data
155221
try:

javaobj/v1/__init__.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env python
2+
"""
3+
First version of the un-marshalling process of javaobj.
4+
5+
:authors: Thomas Calmant
6+
:license: Apache License 2.0
7+
:version: 0.4.0
8+
:status: Alpha
9+
10+
..
11+
12+
Copyright 2019 Thomas Calmant
13+
14+
Licensed under the Apache License, Version 2.0 (the "License");
15+
you may not use this file except in compliance with the License.
16+
You may obtain a copy of the License at
17+
18+
http://www.apache.org/licenses/LICENSE-2.0
19+
20+
Unless required by applicable law or agreed to in writing, software
21+
distributed under the License is distributed on an "AS IS" BASIS,
22+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23+
See the License for the specific language governing permissions and
24+
limitations under the License.
25+
"""
26+
27+
from . import beans, core, transformers
28+
from .core import (
29+
load,
30+
loads,
31+
dumps,
32+
JavaObjectMarshaller,
33+
JavaObjectUnmarshaller,
34+
)
35+
from .transformers import DefaultObjectTransformer

0 commit comments

Comments
 (0)