Skip to content

Commit 7b11670

Browse files
committed
Add max_buffer_size to Unpacker.
1 parent e133c7f commit 7b11670

File tree

2 files changed

+73
-14
lines changed

2 files changed

+73
-14
lines changed

ChangeLog.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
0.2.1
2+
=======
3+
:release date: NOT RELEASED YET
4+
5+
Changes
6+
-------
7+
* Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size
8+
and allows unpack data from untrusted source safely.
9+
10+
* Unpacker's buffer reallocation algorithm is less greedy now. It cause perforamce
11+
derease in rare case but memory efficient and don't allocate than ``max_buffer_size``.
12+
13+
Bugs fixed
14+
----------
15+
16+
117
0.2.0
218
=======
319
:release date: 2012-06-27

msgpack/_msgpack.pyx

Lines changed: 57 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ cdef extern from "Python.h":
1010

1111
from libc.stdlib cimport *
1212
from libc.string cimport *
13+
from libc.limits cimport *
14+
15+
1316
import gc
1417
_gc_disable = gc.disable
1518
_gc_enable = gc.enable
@@ -35,6 +38,11 @@ cdef extern from "pack.h":
3538

3639
cdef int DEFAULT_RECURSE_LIMIT=511
3740

41+
42+
class BufferFull(Exception):
43+
pass
44+
45+
3846
cdef class Packer(object):
3947
"""MessagePack Packer
4048
@@ -193,7 +201,9 @@ cdef extern from "unpack.h":
193201
object template_data(template_context* ctx)
194202

195203

196-
def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"):
204+
def unpackb(object packed, object object_hook=None, object list_hook=None,
205+
bint use_list=0, encoding=None, unicode_errors="strict",
206+
):
197207
"""
198208
Unpack packed_bytes to object. Returns an unpacked object."""
199209
cdef template_context ctx
@@ -243,12 +253,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, bint
243253
return None
244254

245255

246-
def unpack(object stream, object object_hook=None, object list_hook=None, bint use_list=0, encoding=None, unicode_errors="strict"):
256+
def unpack(object stream, object object_hook=None, object list_hook=None,
257+
bint use_list=0, encoding=None, unicode_errors="strict",
258+
):
247259
"""
248260
unpack an object from stream.
249261
"""
250262
return unpackb(stream.read(), use_list=use_list,
251-
object_hook=object_hook, list_hook=list_hook, encoding=encoding, unicode_errors=unicode_errors)
263+
object_hook=object_hook, list_hook=list_hook,
264+
encoding=encoding, unicode_errors=unicode_errors,
265+
)
252266

253267
cdef class Unpacker(object):
254268
"""
@@ -259,7 +273,7 @@ cdef class Unpacker(object):
259273
When `Unpacker` initialized with `file_like`, unpacker reads serialized data
260274
from it and `.feed()` method is not usable.
261275
262-
`read_size` is used as `file_like.read(read_size)`. (default: 1M)
276+
`read_size` is used as `file_like.read(read_size)`. (default: 1024**2)
263277
264278
If `use_list` is true, msgpack list is deserialized to Python list.
265279
Otherwise, it is deserialized to Python tuple. (default: False)
@@ -272,11 +286,24 @@ cdef class Unpacker(object):
272286
273287
`unicode_errors` is used for decoding bytes.
274288
275-
example::
289+
`max_buffer_size` limits size of data waiting unpacked. 0 means unlimited
290+
(default).
291+
Raises `BufferFull` exception when it is insufficient.
292+
You shoud set this parameter when unpacking data from untrasted source.
293+
294+
example of streaming deserialize from file-like object::
295+
296+
unpacker = Unpacker(file_like)
297+
for o in unpacker:
298+
do_something(o)
299+
300+
example of streaming deserialize from socket::
276301
277302
unpacker = Unpacker()
278303
while 1:
279-
buf = astream.read()
304+
buf = sock.recv(1024**2)
305+
if not buf:
306+
break
280307
unpacker.feed(buf)
281308
for o in unpacker:
282309
do_something(o)
@@ -293,6 +320,7 @@ cdef class Unpacker(object):
293320
cdef object _berrors
294321
cdef char *encoding
295322
cdef char *unicode_errors
323+
cdef size_t max_buffer_size
296324

297325
def __cinit__(self):
298326
self.buf = NULL
@@ -303,7 +331,7 @@ cdef class Unpacker(object):
303331

304332
def __init__(self, file_like=None, Py_ssize_t read_size=1024*1024, bint use_list=0,
305333
object object_hook=None, object list_hook=None,
306-
encoding=None, unicode_errors='strict'):
334+
encoding=None, unicode_errors='strict', int max_buffer_size=0):
307335
self.use_list = use_list
308336
self.file_like = file_like
309337
if file_like:
@@ -314,6 +342,10 @@ cdef class Unpacker(object):
314342
self.buf = <char*>malloc(read_size)
315343
if self.buf == NULL:
316344
raise MemoryError("Unable to allocate internal buffer.")
345+
if max_buffer_size:
346+
self.max_buffer_size = max_buffer_size
347+
else:
348+
self.max_buffer_size = INT_MAX
317349
self.buf_size = read_size
318350
self.buf_head = 0
319351
self.buf_tail = 0
@@ -355,28 +387,36 @@ cdef class Unpacker(object):
355387
cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):
356388
cdef:
357389
char* buf = self.buf
390+
char* new_buf
358391
size_t head = self.buf_head
359392
size_t tail = self.buf_tail
360393
size_t buf_size = self.buf_size
361394
size_t new_size
362395

363396
if tail + _buf_len > buf_size:
364-
if ((tail - head) + _buf_len)*2 < buf_size:
397+
if ((tail - head) + _buf_len) <= buf_size:
365398
# move to front.
366399
memmove(buf, buf + head, tail - head)
367400
tail -= head
368401
head = 0
369402
else:
370403
# expand buffer.
371-
new_size = tail + _buf_len
372-
if new_size < buf_size*2:
373-
new_size = buf_size*2
374-
buf = <char*>realloc(buf, new_size)
375-
if buf == NULL:
404+
new_size = (tail-head) + _buf_len
405+
if new_size > self.max_buffer_size:
406+
raise BufferFull
407+
new_size = min(new_size*2, self.max_buffer_size)
408+
new_buf = <char*>malloc(new_size)
409+
if new_buf == NULL:
376410
# self.buf still holds old buffer and will be freed during
377411
# obj destruction
378412
raise MemoryError("Unable to enlarge internal buffer.")
413+
memcpy(new_buf, buf + head, tail - head)
414+
free(buf)
415+
416+
buf = new_buf
379417
buf_size = new_size
418+
tail -= head
419+
head = 0
380420

381421
memcpy(buf + tail, <char*>(_buf), _buf_len)
382422
self.buf = buf
@@ -387,7 +427,10 @@ cdef class Unpacker(object):
387427
# prepare self.buf from file_like
388428
cdef fill_buffer(self):
389429
if self.file_like is not None:
390-
next_bytes = self.file_like_read(self.read_size)
430+
next_bytes = self.file_like_read(
431+
max(self.read_size,
432+
self.max_buffer_size - (self.buf_tail - self.buf_head)
433+
))
391434
if next_bytes:
392435
self.append_buffer(PyBytes_AsString(next_bytes),
393436
PyBytes_Size(next_bytes))

0 commit comments

Comments
 (0)