|
| 1 | +# CPython Internals Summary Sheet |
| 2 | + |
| 3 | +## Object Model |
| 4 | + |
| 5 | +``` |
| 6 | +PyObject (fixed size) PyVarObject (variable size) |
| 7 | +┌─────────────────┐ ┌─────────────────┐ |
| 8 | +│ ob_refcnt │ │ ob_refcnt │ |
| 9 | +│ ob_type ────────┼──→ type │ ob_type │ |
| 10 | +└─────────────────┘ │ ob_size │ ← element count |
| 11 | + ├─────────────────┤ |
| 12 | + │ items[0] │ |
| 13 | + │ items[1] │ ← flexible array |
| 14 | + │ ... │ |
| 15 | + └─────────────────┘ |
| 16 | +``` |
| 17 | + |
| 18 | +## Reference Counting |
| 19 | + |
| 20 | +``` |
| 21 | +Py_INCREF(obj) → obj->ob_refcnt++ |
| 22 | +Py_DECREF(obj) → obj->ob_refcnt--; if (0) tp_dealloc(obj) |
| 23 | +``` |
| 24 | + |
| 25 | +Rules: |
| 26 | +- INCREF when you store a reference |
| 27 | +- DECREF when you release it |
| 28 | +- INCREF before returning a PyObject* (caller owns it) |
| 29 | +- Forget INCREF → use-after-free |
| 30 | +- Forget DECREF → memory leak |
| 31 | + |
| 32 | +Debug builds track `_Py_RefTotal` for leak detection. |
| 33 | + |
| 34 | +## Type Slots |
| 35 | + |
| 36 | +``` |
| 37 | +obj->ob_type->tp_hash(obj) # hash(obj) |
| 38 | +obj->ob_type->tp_repr(obj) # repr(obj) |
| 39 | +obj->ob_type->tp_richcompare(obj, other, op) # obj == other |
| 40 | +obj->ob_type->tp_getattro(obj, name) # obj.name |
| 41 | +obj->ob_type->tp_as_sequence->sq_length(obj) # len(obj) |
| 42 | +obj->ob_type->tp_as_sequence->sq_item(obj, i) # obj[i] |
| 43 | +``` |
| 44 | + |
| 45 | +NULL slot → TypeError: type doesn't support operation |
| 46 | + |
| 47 | +Prefixes: `tp_` (type), `sq_` (sequence), `nb_` (number), `mp_` (mapping) |
| 48 | + |
| 49 | +## RecordObject Design |
| 50 | + |
| 51 | +``` |
| 52 | +┌─────────────────┐ |
| 53 | +│ ob_refcnt │ |
| 54 | +│ ob_type │ |
| 55 | +│ ob_size = n │ ← field count |
| 56 | +├─────────────────┤ |
| 57 | +│ names ──────────┼──→ ("x", "y", "z") ← shared tuple |
| 58 | +├─────────────────┤ |
| 59 | +│ values[0] │ |
| 60 | +│ values[1] │ ← flexible array |
| 61 | +│ values[2] │ |
| 62 | +└─────────────────┘ |
| 63 | +``` |
| 64 | + |
| 65 | +```c |
| 66 | +typedef struct { |
| 67 | + PyObject_VAR_HEAD |
| 68 | + PyObject *names; // tuple of field names |
| 69 | + PyObject *values[1]; // flexible array |
| 70 | +} RecordObject; |
| 71 | +``` |
| 72 | + |
| 73 | +## Slots to Implement |
| 74 | + |
| 75 | +| Slot | Purpose | Key Pattern | |
| 76 | +|------|---------|-------------| |
| 77 | +| `tp_dealloc` | Destructor | DECREF names + each value, then tp_free | |
| 78 | +| `tp_repr` | `repr(r)` | PyUnicodeWriter, PyObject_Repr per value | |
| 79 | +| `tp_hash` | `hash(r)` | Combine element hashes (xxHash), -1 → -2 | |
| 80 | +| `tp_richcompare` | `r == r2` | Compare names AND values, Py_NotImplemented for < > | |
| 81 | +| `tp_getattro` | `r.x` | Search names, return values[i], fallback GenericGetAttr | |
| 82 | +| `tp_new` | `Record(x=1)` | Parse kwargs, call PyRecord_New | |
| 83 | +| `sq_length` | `len(r)` | Return Py_SIZE(self) | |
| 84 | +| `sq_item` | `r[i]` | Bounds check, INCREF, return | |
| 85 | + |
| 86 | +## Evaluation Loop |
| 87 | + |
| 88 | +``` |
| 89 | +_PyEval_EvalFrameDefault() { |
| 90 | + for (;;) { |
| 91 | + switch (opcode) { |
| 92 | + case BUILD_TUPLE: ... |
| 93 | + case BUILD_RECORD: ... ← we add this |
| 94 | + } |
| 95 | + } |
| 96 | +} |
| 97 | +``` |
| 98 | + |
| 99 | +Stack macros: |
| 100 | +- `POP()` - pop and take ownership |
| 101 | +- `PUSH(obj)` - push onto stack |
| 102 | +- `PEEK(n)` - read without popping |
| 103 | +- `STACK_SHRINK(n)` - drop n items |
| 104 | + |
| 105 | +## BUILD_RECORD Design |
| 106 | + |
| 107 | +``` |
| 108 | +Stack before: [..., names_tuple, val0, val1, val2] |
| 109 | + ↑ └─── oparg=3 ───┘ |
| 110 | + └── PEEK(oparg+1) |
| 111 | +
|
| 112 | +Stack after: [..., record] |
| 113 | +``` |
| 114 | + |
| 115 | +```c |
| 116 | +case TARGET(BUILD_RECORD): { |
| 117 | + PyObject *names = PEEK(oparg + 1); |
| 118 | + PyObject **values = &PEEK(oparg); |
| 119 | + PyObject *rec = PyRecord_New(names, values, oparg); // steals refs |
| 120 | + if (rec == NULL) goto error; |
| 121 | + STACK_SHRINK(oparg + 1); |
| 122 | + PUSH(rec); |
| 123 | + DISPATCH(); |
| 124 | +} |
| 125 | +``` |
| 126 | +
|
| 127 | +## Reference Stealing vs Copying |
| 128 | +
|
| 129 | +``` |
| 130 | +BUILD_TUPLE: POP → SET_ITEM (steals) → no DECREF needed |
| 131 | +BUILD_MAP: PEEK → SetItem (copies) → must DECREF after |
| 132 | +BUILD_RECORD: PEEK → PyRecord_New (steals) → no DECREF needed |
| 133 | +``` |
| 134 | +
|
| 135 | +## Constructors |
| 136 | +
|
| 137 | +``` |
| 138 | +PyRecord_New(names, values, n) ← C API, called by opcode, steals refs |
| 139 | +record_new(type, args, kwargs) ← Python API (tp_new), parses kwargs |
| 140 | +``` |
| 141 | +
|
| 142 | +## Files to Create/Modify |
| 143 | +
|
| 144 | +``` |
| 145 | +Include/recordobject.h ← struct + declarations |
| 146 | +Objects/recordobject.c ← implementation |
| 147 | +Python/ceval.c ← BUILD_RECORD case |
| 148 | +Lib/opcode.py ← register opcode number |
| 149 | +Makefile.pre.in ← add to build |
| 150 | +Python/bltinmodule.c ← register type |
| 151 | +``` |
0 commit comments