@@ -97,7 +97,7 @@ _getrecord_ex(Py_UCS4 code)
9797}
9898
9999typedef struct {
100- PyObject * GraphemeType ;
100+ PyObject * SegmentType ;
101101 PyObject * GraphemeBreakIteratorType ;
102102} unicodedatastate ;
103103
@@ -1873,72 +1873,79 @@ _Py_NextGraphemeBreak(_PyGraphemeBreak *iter)
18731873}
18741874
18751875
1876- /* Grapheme Cluster object */
1876+ /* Text Segment object */
18771877
18781878typedef struct {
18791879 PyObject_HEAD
18801880 PyObject * string ;
18811881 Py_ssize_t start ;
18821882 Py_ssize_t end ;
1883- } GraphemeObject ;
1883+ } SegmentObject ;
18841884
18851885static void
1886- Grapheme_dealloc (PyObject * self )
1886+ Segment_dealloc (PyObject * self )
18871887{
18881888 PyObject_GC_UnTrack (self );
1889- Py_DECREF (((GraphemeObject * )self )-> string );
1889+ Py_DECREF (((SegmentObject * )self )-> string );
18901890 PyObject_GC_Del (self );
18911891}
18921892
18931893static int
1894- Grapheme_traverse (PyObject * self , visitproc visit , void * arg )
1894+ Segment_traverse (PyObject * self , visitproc visit , void * arg )
18951895{
1896- Py_VISIT (((GraphemeObject * )self )-> string );
1896+ Py_VISIT (((SegmentObject * )self )-> string );
18971897 return 0 ;
18981898}
18991899
19001900static int
1901- Grapheme_clear (PyObject * self )
1901+ Segment_clear (PyObject * self )
19021902{
1903- Py_CLEAR (((GraphemeObject * )self )-> string );
1903+ Py_CLEAR (((SegmentObject * )self )-> string );
19041904 return 0 ;
19051905}
19061906
19071907static PyObject *
1908- Grapheme_str (PyObject * self )
1908+ Segment_str (PyObject * self )
19091909{
1910- GraphemeObject * g = (GraphemeObject * )self ;
1911- return PyUnicode_Substring (g -> string , g -> start , g -> end );
1910+ SegmentObject * s = (SegmentObject * )self ;
1911+ return PyUnicode_Substring (s -> string , s -> start , s -> end );
19121912}
19131913
1914- static PyMemberDef Grapheme_members [] = {
1915- {"start" , Py_T_PYSSIZET , offsetof(GraphemeObject , start ), 0 ,
1914+ static PyObject *
1915+ Segment_repr (PyObject * self )
1916+ {
1917+ SegmentObject * s = (SegmentObject * )self ;
1918+ return PyUnicode_FromFormat ("<Segment %zd:%zd>" , s -> start , s -> end );
1919+ }
1920+
1921+ static PyMemberDef Segment_members [] = {
1922+ {"start" , Py_T_PYSSIZET , offsetof(SegmentObject , start ), 0 ,
19161923 PyDoc_STR ("grapheme start" )},
1917- {"end" , Py_T_PYSSIZET , offsetof(GraphemeObject , end ), 0 ,
1924+ {"end" , Py_T_PYSSIZET , offsetof(SegmentObject , end ), 0 ,
19181925 PyDoc_STR ("grapheme end" )},
19191926 {NULL } /* Sentinel */
19201927};
19211928
1922- static PyType_Slot Grapheme_slots [] = {
1923- {Py_tp_dealloc , Grapheme_dealloc },
1924- {Py_tp_iter , PyObject_SelfIter },
1925- {Py_tp_traverse , Grapheme_traverse },
1926- {Py_tp_clear , Grapheme_clear },
1927- {Py_tp_str , Grapheme_str },
1928- {Py_tp_members , Grapheme_members },
1929+ static PyType_Slot Segment_slots [] = {
1930+ {Py_tp_dealloc , Segment_dealloc },
1931+ {Py_tp_traverse , Segment_traverse },
1932+ {Py_tp_clear , Segment_clear },
1933+ {Py_tp_str , Segment_str },
1934+ {Py_tp_repr , Segment_repr },
1935+ {Py_tp_members , Segment_members },
19291936 {0 , 0 },
19301937};
19311938
1932- static PyType_Spec Grapheme_spec = {
1933- .name = "unicodedata.Grapheme " ,
1934- .basicsize = sizeof (GraphemeObject ),
1939+ static PyType_Spec Segment_spec = {
1940+ .name = "unicodedata.Segment " ,
1941+ .basicsize = sizeof (SegmentObject ),
19351942 .flags = (
19361943 Py_TPFLAGS_DEFAULT
19371944 | Py_TPFLAGS_HAVE_GC
19381945 | Py_TPFLAGS_DISALLOW_INSTANTIATION
19391946 | Py_TPFLAGS_IMMUTABLETYPE
19401947 ),
1941- .slots = Grapheme_slots
1948+ .slots = Segment_slots
19421949};
19431950
19441951
@@ -1982,18 +1989,17 @@ GBI_iternext(PyObject *self)
19821989 return NULL ;
19831990 }
19841991 PyObject * module = PyType_GetModule (Py_TYPE (it ));
1985- PyObject * GraphemeType = get_unicodedata_state (module )-> GraphemeType ;
1986- GraphemeObject * g = PyObject_GC_New (GraphemeObject ,
1987- (PyTypeObject * )GraphemeType );
1988- if (!g ) {
1992+ PyObject * SegmentType = get_unicodedata_state (module )-> SegmentType ;
1993+ SegmentObject * s = PyObject_GC_New (SegmentObject ,
1994+ (PyTypeObject * )SegmentType );
1995+ if (!s ) {
19891996 return NULL ;
19901997 }
1991- g -> string = Py_NewRef (it -> iter .str );
1992- g -> start = start ;
1993- g -> end = pos ;
1994- PyObject_GC_Track (g );
1995- return (PyObject * )g ;
1996- // return PyUnicode_Substring(it->iter.str, start, pos);
1998+ s -> string = Py_NewRef (it -> iter .str );
1999+ s -> start = start ;
2000+ s -> end = pos ;
2001+ PyObject_GC_Track (s );
2002+ return (PyObject * )s ;
19972003}
19982004
19992005
@@ -2180,7 +2186,7 @@ static int
21802186unicodedata_traverse (PyObject * module , visitproc visit , void * arg )
21812187{
21822188 unicodedatastate * state = get_unicodedata_state (module );
2183- Py_VISIT (state -> GraphemeType );
2189+ Py_VISIT (state -> SegmentType );
21842190 Py_VISIT (state -> GraphemeBreakIteratorType );
21852191 return 0 ;
21862192}
@@ -2189,7 +2195,7 @@ static int
21892195unicodedata_clear (PyObject * module )
21902196{
21912197 unicodedatastate * state = get_unicodedata_state (module );
2192- Py_CLEAR (state -> GraphemeType );
2198+ Py_CLEAR (state -> SegmentType );
21932199 Py_CLEAR (state -> GraphemeBreakIteratorType );
21942200 return 0 ;
21952201}
@@ -2205,11 +2211,11 @@ unicodedata_exec(PyObject *module)
22052211{
22062212 unicodedatastate * state = get_unicodedata_state (module );
22072213
2208- PyObject * GraphemeType = PyType_FromModuleAndSpec (module , & Grapheme_spec , NULL );
2209- if (GraphemeType == NULL ) {
2214+ PyObject * SegmentType = PyType_FromModuleAndSpec (module , & Segment_spec , NULL );
2215+ if (SegmentType == NULL ) {
22102216 return -1 ;
22112217 }
2212- state -> GraphemeType = GraphemeType ;
2218+ state -> SegmentType = SegmentType ;
22132219
22142220 PyObject * GraphemeBreakIteratorType = PyType_FromModuleAndSpec (module , & GraphemeBreakIterator_spec , NULL );
22152221 if (GraphemeBreakIteratorType == NULL ) {
0 commit comments