Skip to content

Commit e016b3d

Browse files
committed
Merge remote-tracking branch 'jnothman/read_size_cpp'
Conflicts: msgpack/_msgpack.pyx setup.py
2 parents 1526316 + 9d9c3ee commit e016b3d

File tree

6 files changed

+206
-29
lines changed

6 files changed

+206
-29
lines changed

msgpack/_msgpack.pyx

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,17 @@ cdef class Packer(object):
182182
self.pk.length = 0
183183
return buf
184184

185+
cpdef pack_array_header(self, size_t size):
186+
msgpack_pack_array(&self.pk, size)
187+
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
188+
self.pk.length = 0
189+
return buf
190+
191+
cpdef pack_map_header(self, size_t size):
192+
msgpack_pack_map(&self.pk, size)
193+
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
194+
self.pk.length = 0
195+
return buf
185196

186197
def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'):
187198
"""
@@ -213,8 +224,12 @@ cdef extern from "unpack.h":
213224
unsigned int ct
214225
PyObject* key
215226

216-
int template_execute(template_context* ctx, const_char_ptr data,
217-
size_t len, size_t* off, bint construct) except -1
227+
ctypedef int (*execute_fn)(template_context* ctx, const_char_ptr data,
228+
size_t len, size_t* off) except -1
229+
execute_fn template_construct
230+
execute_fn template_skip
231+
execute_fn read_array_header
232+
execute_fn read_map_header
218233
void template_init(template_context* ctx)
219234
object template_data(template_context* ctx)
220235

@@ -277,7 +292,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
277292
PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len)
278293

279294
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, encoding, unicode_errors)
280-
ret = template_execute(&ctx, buf, buf_len, &off, 1)
295+
ret = template_construct(&ctx, buf, buf_len, &off)
281296
if ret == 1:
282297
obj = template_data(&ctx)
283298
if off < buf_len:
@@ -452,16 +467,13 @@ cdef class Unpacker(object):
452467
else:
453468
self.file_like = None
454469

455-
cdef object _unpack(self, bint construct):
470+
cdef object _unpack(self, execute_fn execute):
456471
cdef int ret
457472
cdef object obj
458473
while 1:
459-
ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head, construct)
474+
ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
460475
if ret == 1:
461-
if construct:
462-
obj = template_data(&self.ctx)
463-
else:
464-
obj = None
476+
obj = template_data(&self.ctx)
465477
template_init(&self.ctx)
466478
return obj
467479
elif ret == 0:
@@ -474,17 +486,25 @@ cdef class Unpacker(object):
474486

475487
def unpack(self):
476488
"""unpack one object"""
477-
return self._unpack(1)
489+
return self._unpack(template_construct)
478490

479491
def skip(self):
480492
"""read and ignore one object, returning None"""
481-
return self._unpack(0)
493+
return self._unpack(template_skip)
494+
495+
def read_array_header(self):
496+
"""assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents."""
497+
return self._unpack(read_array_header)
498+
499+
def read_map_header(self):
500+
"""assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs."""
501+
return self._unpack(read_map_header)
482502

483503
def __iter__(self):
484504
return self
485505

486506
def __next__(self):
487-
return self._unpack(1)
507+
return self._unpack(template_construct)
488508

489509
# for debug.
490510
#def _buf(self):

msgpack/unpack.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ typedef struct unpack_user {
4242

4343
#define msgpack_unpack_user unpack_user
4444

45+
typedef int (*execute_fn)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off);
4546

4647
struct template_context;
4748
typedef struct template_context template_context;

msgpack/unpack_template.h

Lines changed: 76 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context
9595
}
9696

9797

98-
msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off, int construct)
98+
template <bool construct>
99+
msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off)
99100
{
100101
assert(len >= *off);
101102

@@ -380,6 +381,8 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c
380381

381382

382383
_finish:
384+
if (!construct)
385+
msgpack_unpack_callback(_nil)(user, &obj);
383386
stack[0].obj = obj;
384387
++p;
385388
ret = 1;
@@ -405,20 +408,85 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c
405408
#undef construct_cb
406409
}
407410

408-
409-
#undef msgpack_unpack_func
410-
#undef msgpack_unpack_callback
411-
#undef msgpack_unpack_struct
412-
#undef msgpack_unpack_object
413-
#undef msgpack_unpack_user
414-
411+
#undef SWITCH_RANGE_BEGIN
412+
#undef SWITCH_RANGE
413+
#undef SWITCH_RANGE_DEFAULT
414+
#undef SWITCH_RANGE_END
415415
#undef push_simple_value
416416
#undef push_fixed_value
417417
#undef push_variable_value
418418
#undef again_fixed_trail
419419
#undef again_fixed_trail_if_zero
420420
#undef start_container
421421

422+
template <unsigned int fixed_offset, unsigned int var_offset>
423+
msgpack_unpack_func(int, _container_header)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off)
424+
{
425+
assert(len >= *off);
426+
uint32_t size;
427+
const unsigned char *const p = (unsigned char*)data + *off;
428+
429+
#define inc_offset(inc) \
430+
if (len - *off < inc) \
431+
return 0; \
432+
*off += inc;
433+
434+
switch (*p) {
435+
case var_offset:
436+
inc_offset(3);
437+
size = _msgpack_load16(uint16_t, p + 1);
438+
break;
439+
case var_offset + 1:
440+
inc_offset(5);
441+
size = _msgpack_load32(uint32_t, p + 1);
442+
break;
443+
#ifdef USE_CASE_RANGE
444+
case fixed_offset + 0x0 ... fixed_offset + 0xf:
445+
#else
446+
case fixed_offset + 0x0:
447+
case fixed_offset + 0x1:
448+
case fixed_offset + 0x2:
449+
case fixed_offset + 0x3:
450+
case fixed_offset + 0x4:
451+
case fixed_offset + 0x5:
452+
case fixed_offset + 0x6:
453+
case fixed_offset + 0x7:
454+
case fixed_offset + 0x8:
455+
case fixed_offset + 0x9:
456+
case fixed_offset + 0xa:
457+
case fixed_offset + 0xb:
458+
case fixed_offset + 0xc:
459+
case fixed_offset + 0xd:
460+
case fixed_offset + 0xe:
461+
case fixed_offset + 0xf:
462+
#endif
463+
++*off;
464+
size = ((unsigned int)*p) & 0x0f;
465+
break;
466+
default:
467+
PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream");
468+
return -1;
469+
}
470+
msgpack_unpack_callback(_uint32)(&ctx->user, size, &ctx->stack[0].obj);
471+
return 1;
472+
}
473+
474+
#undef SWITCH_RANGE_BEGIN
475+
#undef SWITCH_RANGE
476+
#undef SWITCH_RANGE_DEFAULT
477+
#undef SWITCH_RANGE_END
478+
479+
static const execute_fn template_construct = &template_execute<true>;
480+
static const execute_fn template_skip = &template_execute<false>;
481+
static const execute_fn read_array_header = &template_container_header<0x90, 0xdc>;
482+
static const execute_fn read_map_header = &template_container_header<0x80, 0xde>;
483+
484+
#undef msgpack_unpack_func
485+
#undef msgpack_unpack_callback
486+
#undef msgpack_unpack_struct
487+
#undef msgpack_unpack_object
488+
#undef msgpack_unpack_user
489+
422490
#undef NEXT_CS
423491

424492
/* vim: set ts=4 sw=4 noexpandtab */

setup.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
def cythonize(src):
2020
sys.stderr.write("cythonize: %r\n" % (src,))
21-
cython_compiler.compile([src], emit_linenums=True)
21+
cython_compiler.compile([src], cplus=True, emit_linenums=True)
2222

2323
def ensure_source(src):
2424
pyx = os.path.splitext(src)[0] + '.pyx'
@@ -34,13 +34,6 @@ def ensure_source(src):
3434
os.stat(src).st_mtime < os.stat(pyx).st_mtime and
3535
have_cython):
3636
cythonize(pyx)
37-
38-
# Use C++ compiler on win32.
39-
# MSVC9 doesn't provide stdint.h when using C Compiler.
40-
if sys.platform == 'win32':
41-
cpp = src + 'pp'
42-
shutil.copy(src, cpp)
43-
return cpp
4437
else:
4538
return src
4639

@@ -67,7 +60,7 @@ def __init__(self, *args, **kwargs):
6760
else:
6861
Sdist = sdist
6962

70-
sources = ['msgpack/_msgpack.c']
63+
sources = ['msgpack/_msgpack.cpp']
7164
libraries = []
7265
if sys.platform == 'win32':
7366
libraries.append('ws2_32')

test/test_pack.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,35 @@ def testPackFloat():
9090
assert_equal(packb(1.0, use_single_float=True), b'\xca' + struct.pack('>f', 1.0))
9191
assert_equal(packb(1.0, use_single_float=False), b'\xcb' + struct.pack('>d', 1.0))
9292

93+
def testArraySize(sizes=[0, 5, 50, 1000]):
94+
bio = six.BytesIO()
95+
packer = Packer()
96+
for size in sizes:
97+
bio.write(packer.pack_array_header(size))
98+
for i in range(size):
99+
bio.write(packer.pack(i))
100+
101+
bio.seek(0)
102+
unpacker = Unpacker(bio)
103+
for size in sizes:
104+
assert unpacker.unpack() == tuple(range(size))
105+
106+
def testMapSize(sizes=[0, 5, 50, 1000]):
107+
bio = six.BytesIO()
108+
packer = Packer()
109+
for size in sizes:
110+
bio.write(packer.pack_map_header(size))
111+
for i in range(size):
112+
bio.write(packer.pack(i)) # key
113+
bio.write(packer.pack(i * 2)) # value
114+
115+
bio.seek(0)
116+
unpacker = Unpacker(bio)
117+
for size in sizes:
118+
assert unpacker.unpack() == {i: i * 2 for i in range(size)}
119+
120+
121+
93122

94123
class odict(dict):
95124
'''Reimplement OrderedDict to run test on Python 2.6'''

test/test_read_size.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""Test Unpacker's read_array_header and read_map_header methods"""
2+
from msgpack import packb, Unpacker
3+
UnexpectedTypeException = ValueError
4+
5+
def test_read_array_header():
6+
unpacker = Unpacker()
7+
unpacker.feed(packb(['a', 'b', 'c']))
8+
assert unpacker.read_array_header() == 3
9+
assert unpacker.unpack() == 'a'
10+
assert unpacker.unpack() == 'b'
11+
assert unpacker.unpack() == 'c'
12+
try:
13+
unpacker.unpack()
14+
assert 0, 'should raise exception'
15+
except StopIteration:
16+
assert 1, 'okay'
17+
18+
19+
def test_read_map_header():
20+
unpacker = Unpacker()
21+
unpacker.feed(packb({'a': 'A'}))
22+
assert unpacker.read_map_header() == 1
23+
assert unpacker.unpack() == 'a'
24+
assert unpacker.unpack() == 'A'
25+
try:
26+
unpacker.unpack()
27+
assert 0, 'should raise exception'
28+
except StopIteration:
29+
assert 1, 'okay'
30+
31+
def test_incorrect_type_array():
32+
unpacker = Unpacker()
33+
unpacker.feed(packb(1))
34+
try:
35+
unpacker.read_array_header()
36+
assert 0, 'should raise exception'
37+
except UnexpectedTypeException:
38+
assert 1, 'okay'
39+
40+
def test_incorrect_type_map():
41+
unpacker = Unpacker()
42+
unpacker.feed(packb(1))
43+
try:
44+
unpacker.read_map_header()
45+
assert 0, 'should raise exception'
46+
except UnexpectedTypeException:
47+
assert 1, 'okay'
48+
49+
def test_correct_type_nested_array():
50+
unpacker = Unpacker()
51+
unpacker.feed(packb({'a': ['b', 'c', 'd']}))
52+
try:
53+
unpacker.read_array_header()
54+
assert 0, 'should raise exception'
55+
except UnexpectedTypeException:
56+
assert 1, 'okay'
57+
58+
def test_incorrect_type_nested_map():
59+
unpacker = Unpacker()
60+
unpacker.feed(packb([{'a': 'b'}]))
61+
try:
62+
unpacker.read_map_header()
63+
assert 0, 'should raise exception'
64+
except UnexpectedTypeException:
65+
assert 1, 'okay'
66+

0 commit comments

Comments
 (0)