Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions cpp/fory/python/pyfory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,148 @@ int Fory_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
}
return 0;
}

// Write varint64 with ZigZag encoding inline
// Returns number of bytes written
static inline uint32_t WriteVarint64ZigZag(uint8_t *arr, int64_t value) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm refactoring fory cython Buffer to forward numeric read/write into c++ Buffer, you can use c++ Buffer.write_varint64 directly after I finsihed the refactor later

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WriteVarint64ZigZag should be removed now, python use same C++ Buffer for write/read ints

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me take a look.

// ZigZag encoding: (value << 1) ^ (value >> 63)
uint64_t v = (static_cast<uint64_t>(value) << 1) ^
(static_cast<uint64_t>(value >> 63));

if (v < 0x80) {
arr[0] = static_cast<uint8_t>(v);
return 1;
}
arr[0] = static_cast<uint8_t>((v & 0x7F) | 0x80);
if (v < 0x4000) {
arr[1] = static_cast<uint8_t>(v >> 7);
return 2;
}
arr[1] = static_cast<uint8_t>((v >> 7) | 0x80);
if (v < 0x200000) {
arr[2] = static_cast<uint8_t>(v >> 14);
return 3;
}
arr[2] = static_cast<uint8_t>((v >> 14) | 0x80);
if (v < 0x10000000) {
arr[3] = static_cast<uint8_t>(v >> 21);
return 4;
}
arr[3] = static_cast<uint8_t>((v >> 21) | 0x80);
if (v < 0x800000000ULL) {
arr[4] = static_cast<uint8_t>(v >> 28);
return 5;
}
arr[4] = static_cast<uint8_t>((v >> 28) | 0x80);
if (v < 0x40000000000ULL) {
arr[5] = static_cast<uint8_t>(v >> 35);
return 6;
}
arr[5] = static_cast<uint8_t>((v >> 35) | 0x80);
if (v < 0x2000000000000ULL) {
arr[6] = static_cast<uint8_t>(v >> 42);
return 7;
}
arr[6] = static_cast<uint8_t>((v >> 42) | 0x80);
if (v < 0x100000000000000ULL) {
arr[7] = static_cast<uint8_t>(v >> 49);
return 8;
}
arr[7] = static_cast<uint8_t>((v >> 49) | 0x80);
arr[8] = static_cast<uint8_t>(v >> 56);
return 9;
}

Py_ssize_t Fory_PyInt64SequenceWriteToBuffer(PyObject *collection,
Buffer *buffer) {
PyObject **items = PySequenceGetItems(collection);
if (items == nullptr) {
return -1;
}
Py_ssize_t size = Py_SIZE(collection);
uint32_t start_index = buffer->writer_index();
uint8_t *data = buffer->data() + start_index;
Py_ssize_t total_bytes = 0;

for (Py_ssize_t i = 0; i < size; i++) {
PyObject *item = items[i];
int64_t value = PyLong_AsLongLong(item);
if (value == -1 && PyErr_Occurred()) {
return -1;
}
uint32_t bytes_written = WriteVarint64ZigZag(data + total_bytes, value);
total_bytes += bytes_written;
}

buffer->IncreaseWriterIndex(total_bytes);
return total_bytes;
}

// Read varint64 with ZigZag decoding inline
// Returns the number of bytes read, or 0 on buffer overflow
static inline uint32_t
ReadVarint64ZigZag(const uint8_t *arr, Py_ssize_t remaining, int64_t *result) {
if (remaining <= 0) {
return 0;
}

uint64_t v = 0;
uint32_t shift = 0;
uint32_t bytes_read = 0;

// Read up to 8 bytes with continuation bit
for (int i = 0; i < 8; i++) {
if (bytes_read >= static_cast<uint32_t>(remaining)) {
return 0; // Buffer overflow
}
uint8_t b = arr[bytes_read++];
v |= static_cast<uint64_t>(b & 0x7F) << shift;
if ((b & 0x80) == 0) {
*result = static_cast<int64_t>((v >> 1) ^ (~(v & 1) + 1));
return bytes_read;
}
shift += 7;
}
// 9th byte: use all 8 bits (no continuation bit masking)
if (bytes_read >= static_cast<uint32_t>(remaining)) {
return 0; // Buffer overflow
}
uint8_t b = arr[bytes_read++];
v |= static_cast<uint64_t>(b) << 56;

*result = static_cast<int64_t>((v >> 1) ^ (~(v & 1) + 1));
return bytes_read;
}

Py_ssize_t Fory_PyInt64SequenceReadFromBuffer(PyObject *list, Buffer *buffer,
Py_ssize_t count) {
if (!PyList_CheckExact(list)) {
return -1;
}

uint32_t start_index = buffer->reader_index();
const uint8_t *data = buffer->data() + start_index;
Py_ssize_t remaining = buffer->size() - start_index;
Py_ssize_t total_bytes = 0;

for (Py_ssize_t i = 0; i < count; i++) {
int64_t value;
uint32_t bytes_read =
ReadVarint64ZigZag(data + total_bytes, remaining - total_bytes, &value);
if (bytes_read == 0) {
PyErr_SetString(PyExc_ValueError, "Buffer overflow reading varint64");
return -1;
}
total_bytes += bytes_read;

PyObject *py_int = PyLong_FromLongLong(value);
if (py_int == nullptr) {
return -1;
}
PyList_SET_ITEM(list, i, py_int);
}

buffer->IncreaseReaderIndex(total_bytes);
return total_bytes;
}
} // namespace fory
6 changes: 6 additions & 0 deletions cpp/fory/python/pyfory.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,10 @@ int Fory_PyBooleanSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
Py_ssize_t start_index);
int Fory_PyFloatSequenceWriteToBuffer(PyObject *collection, Buffer *buffer,
Py_ssize_t start_index);

Py_ssize_t Fory_PyInt64SequenceWriteToBuffer(PyObject *collection,
Buffer *buffer);

Py_ssize_t Fory_PyInt64SequenceReadFromBuffer(PyObject *list, Buffer *buffer,
Py_ssize_t count);
} // namespace fory
20 changes: 20 additions & 0 deletions python/pyfory/collection.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,30 @@ cdef class CollectionSerializer(Serializer):
self._add_element(collection_, i, buffer.read_string())

cdef inline _write_int(self, Buffer buffer, value):
cdef Py_ssize_t bytes_written
cdef Py_ssize_t max_size
value_type = type(value)
if value_type is list or value_type is tuple:
# Reserve maximum possible size (9 bytes per varint64)
max_size = Py_SIZE(value) * 9
buffer.grow(<int32_t>max_size)
# Try batch write, fall back to slow path on error (e.g., int overflow)
try:
bytes_written = Fory_PyInt64SequenceWriteToBuffer(value, &buffer.c_buffer)
if bytes_written >= 0:
return
except OverflowError:
pass # Fall through to slow path
# Slow path: write elements one by one
for s in value:
buffer.write_varint64(s)

cdef inline _read_int(self, Buffer buffer, int64_t len_, object collection_):
cdef Py_ssize_t bytes_read
if type(collection_) is list:
bytes_read = Fory_PyInt64SequenceReadFromBuffer(collection_, &buffer.c_buffer, len_)
if bytes_read >= 0:
return
for i in range(len_):
self._add_element(collection_, i, buffer.read_varint64())

Expand Down
2 changes: 2 additions & 0 deletions python/pyfory/includes/libserialization.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,5 @@ cdef extern from "fory/type/type.h" namespace "fory" nogil:
cdef extern from "fory/python/pyfory.h" namespace "fory":
int Fory_PyBooleanSequenceWriteToBuffer(object collection, CBuffer *buffer, Py_ssize_t start_index)
int Fory_PyFloatSequenceWriteToBuffer(object collection, CBuffer *buffer, Py_ssize_t start_index)
Py_ssize_t Fory_PyInt64SequenceWriteToBuffer(object collection, CBuffer *buffer) except? -1
Py_ssize_t Fory_PyInt64SequenceReadFromBuffer(object list, CBuffer *buffer, Py_ssize_t count) except? -1
2 changes: 1 addition & 1 deletion python/pyfory/serialization.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ from pyfory.meta.metastring import Encoding
from pyfory.types import is_primitive_type
from pyfory.policy import DeserializationPolicy, DEFAULT_POLICY
from pyfory.includes.libserialization cimport \
(TypeId, IsNamespacedType, IsTypeShareMeta, Fory_PyBooleanSequenceWriteToBuffer, Fory_PyFloatSequenceWriteToBuffer)
(TypeId, IsNamespacedType, IsTypeShareMeta, Fory_PyBooleanSequenceWriteToBuffer, Fory_PyFloatSequenceWriteToBuffer, Fory_PyInt64SequenceWriteToBuffer, Fory_PyInt64SequenceReadFromBuffer)

from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, uint64_t
from libc.stdint cimport *
Expand Down
Loading