Skip to content

Commit 5c44aca

Browse files
lysnikolaouvstinner
authored andcommitted
gh-76535: Make PyUnicode_ToLowerFull and friends public
Make `PyUnicode_ToLowerFull`, `PyUnicode_ToUpperFull` and `PyUnicode_ToTitleFull` public and rename them to `PyUnicode_ToLower` etc.
1 parent 8d83b7d commit 5c44aca

File tree

5 files changed

+79
-21
lines changed

5 files changed

+79
-21
lines changed

Doc/c-api/unicode.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,36 @@ These APIs can be used for fast direct character conversions:
307307
possible. This function does not raise exceptions.
308308
309309
310+
.. c:function:: Py_ssize_t PyUnicode_ToLower(Py_UCS4 ch, Py_UCS4 *buffer)
311+
312+
Convert *ch* to lower case, store result in *buffer*, which should be
313+
able to hold as many characters needed for *ch* to be lower cased
314+
(maximum three), and return the number of characters stored.
315+
Passing a ``NULL`` buffer returns the buffer size needed.
316+
317+
.. versionadded:: next
318+
319+
320+
.. c:function:: Py_ssize_t PyUnicode_ToUpper(Py_UCS4 ch, Py_UCS4 *buffer)
321+
322+
Convert *ch* to lower case, store result in *buffer*, which should be
323+
able to hold as many characters needed for *ch* to be lower cased
324+
(maximum three), and return the number of characters stored.
325+
Passing a ``NULL`` buffer returns the buffer size needed.
326+
327+
.. versionadded:: next
328+
329+
330+
.. c:function:: Py_ssize_t PyUnicode_ToTitle(Py_UCS4 ch, Py_UCS4 *buffer)
331+
332+
Convert *ch* to lower case, store result in *buffer*, which should be
333+
able to hold as many characters needed for *ch* to be lower cased
334+
(maximum three), and return the number of characters stored.
335+
Passing a ``NULL`` buffer returns the buffer size needed.
336+
337+
.. versionadded:: next
338+
339+
310340
These APIs can be used to work with surrogates:
311341
312342
.. c:function:: int Py_UNICODE_IS_SURROGATE(Py_UCS4 ch)

Include/cpython/unicodeobject.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,21 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
733733
Py_UCS4 ch /* Unicode character */
734734
);
735735

736+
PyAPI_FUNC(int) PyUnicode_ToLower(
737+
Py_UCS4 ch, /* Unicode character */
738+
Py_UCS4 *res /* Output buffer */
739+
);
740+
741+
PyAPI_FUNC(int) PyUnicode_ToUpper(
742+
Py_UCS4 ch, /* Unicode character */
743+
Py_UCS4 *res /* Output buffer */
744+
);
745+
746+
PyAPI_FUNC(int) PyUnicode_ToTitle(
747+
Py_UCS4 ch, /* Unicode character */
748+
Py_UCS4 *res /* Output buffer */
749+
);
750+
736751
// Helper array used by Py_UNICODE_ISSPACE().
737752
PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
738753

Include/internal/pycore_unicodeobject.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@ extern "C" {
1515

1616
extern int _PyUnicode_IsXidStart(Py_UCS4 ch);
1717
extern int _PyUnicode_IsXidContinue(Py_UCS4 ch);
18-
extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res);
19-
extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res);
20-
extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res);
2118
extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res);
2219
extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch);
2320
extern int _PyUnicode_IsCased(Py_UCS4 ch);

Objects/unicodectype.c

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,51 +198,67 @@ Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
198198
return ch + ctype->lower;
199199
}
200200

201-
int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
201+
int PyUnicode_ToLower(Py_UCS4 ch, Py_UCS4 *res)
202202
{
203203
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
204204

205205
if (ctype->flags & EXTENDED_CASE_MASK) {
206206
int index = ctype->lower & 0xFFFF;
207207
int n = ctype->lower >> 24;
208208
int i;
209-
for (i = 0; i < n; i++)
210-
res[i] = _PyUnicode_ExtendedCase[index + i];
209+
for (i = 0; i < n; i++) {
210+
if (res != NULL) {
211+
res[i] = _PyUnicode_ExtendedCase[index + i];
212+
}
213+
}
211214
return n;
212215
}
213-
res[0] = ch + ctype->lower;
216+
217+
if (res != NULL) {
218+
res[0] = ch + ctype->lower;
219+
}
214220
return 1;
215221
}
216222

217-
int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res)
223+
int PyUnicode_ToTitle(Py_UCS4 ch, Py_UCS4 *res)
218224
{
219225
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
220226

221227
if (ctype->flags & EXTENDED_CASE_MASK) {
222228
int index = ctype->title & 0xFFFF;
223229
int n = ctype->title >> 24;
224230
int i;
225-
for (i = 0; i < n; i++)
226-
res[i] = _PyUnicode_ExtendedCase[index + i];
231+
for (i = 0; i < n; i++) {
232+
if (res != NULL) {
233+
res[i] = _PyUnicode_ExtendedCase[index + i];
234+
}
235+
}
227236
return n;
228237
}
229-
res[0] = ch + ctype->title;
238+
if (res != NULL) {
239+
res[0] = ch + ctype->title;
240+
}
230241
return 1;
231242
}
232243

233-
int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
244+
int PyUnicode_ToUpper(Py_UCS4 ch, Py_UCS4 *res)
234245
{
235246
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
236247

237248
if (ctype->flags & EXTENDED_CASE_MASK) {
238249
int index = ctype->upper & 0xFFFF;
239250
int n = ctype->upper >> 24;
240251
int i;
241-
for (i = 0; i < n; i++)
242-
res[i] = _PyUnicode_ExtendedCase[index + i];
252+
for (i = 0; i < n; i++) {
253+
if (res != NULL) {
254+
res[i] = _PyUnicode_ExtendedCase[index + i];
255+
}
256+
}
243257
return n;
244258
}
245-
res[0] = ch + ctype->upper;
259+
if (res != NULL) {
260+
res[0] = ch + ctype->upper;
261+
}
246262
return 1;
247263
}
248264

@@ -258,7 +274,7 @@ int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res)
258274
res[i] = _PyUnicode_ExtendedCase[index + i];
259275
return n;
260276
}
261-
return _PyUnicode_ToLowerFull(ch, res);
277+
return PyUnicode_ToLowerFull(ch, res);
262278
}
263279

264280
int _PyUnicode_IsCased(Py_UCS4 ch)

Objects/unicodeobject.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10003,7 +10003,7 @@ lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
1000310003
mapped[0] = handle_capital_sigma(kind, data, length, i);
1000410004
return 1;
1000510005
}
10006-
return _PyUnicode_ToLowerFull(c, mapped);
10006+
return PyUnicode_ToLower(c, mapped);
1000710007
}
1000810008

1000910009
static Py_ssize_t
@@ -10014,7 +10014,7 @@ do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UC
1001410014
Py_UCS4 c, mapped[3];
1001510015

1001610016
c = PyUnicode_READ(kind, data, 0);
10017-
n_res = _PyUnicode_ToTitleFull(c, mapped);
10017+
n_res = PyUnicode_ToTitle(c, mapped);
1001810018
for (j = 0; j < n_res; j++) {
1001910019
*maxchar = Py_MAX(*maxchar, mapped[j]);
1002010020
res[k++] = mapped[j];
@@ -10041,7 +10041,7 @@ do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4
1004110041
n_res = lower_ucs4(kind, data, length, i, c, mapped);
1004210042
}
1004310043
else if (Py_UNICODE_ISLOWER(c)) {
10044-
n_res = _PyUnicode_ToUpperFull(c, mapped);
10044+
n_res = PyUnicode_ToUpper(c, mapped);
1004510045
}
1004610046
else {
1004710047
n_res = 1;
@@ -10067,7 +10067,7 @@ do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
1006710067
if (lower)
1006810068
n_res = lower_ucs4(kind, data, length, i, c, mapped);
1006910069
else
10070-
n_res = _PyUnicode_ToUpperFull(c, mapped);
10070+
n_res = PyUnicode_ToUpper(c, mapped);
1007110071
for (j = 0; j < n_res; j++) {
1007210072
*maxchar = Py_MAX(*maxchar, mapped[j]);
1007310073
res[k++] = mapped[j];
@@ -10120,7 +10120,7 @@ do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *m
1012010120
if (previous_is_cased)
1012110121
n_res = lower_ucs4(kind, data, length, i, c, mapped);
1012210122
else
10123-
n_res = _PyUnicode_ToTitleFull(c, mapped);
10123+
n_res = PyUnicode_ToTitle(c, mapped);
1012410124

1012510125
for (j = 0; j < n_res; j++) {
1012610126
*maxchar = Py_MAX(*maxchar, mapped[j]);

0 commit comments

Comments
 (0)