Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 58 additions & 26 deletions src/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -2377,6 +2377,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
*/
size_t
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}

if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
Expand All @@ -2397,6 +2401,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
*/
size_t
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}

if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
Expand All @@ -2417,6 +2425,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
*/
bool
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}

if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
Expand All @@ -2435,7 +2447,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {

static pm_unicode_codepoint_t
pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
if (b[0] < 0x80) {

if ((n > 0) && (b[0] < 0x80)) {
*width = 1;
return (pm_unicode_codepoint_t) b[0];
}
Expand Down Expand Up @@ -2474,13 +2487,21 @@ pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {

static size_t
pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}

size_t width;
pm_cesu_8_codepoint(b, n, &width);
return width;
}

static size_t
pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}

if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
Expand All @@ -2497,6 +2518,10 @@ pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {

static size_t
pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}

if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
Expand All @@ -2513,6 +2538,10 @@ pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {

static bool
pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}

if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
Expand Down Expand Up @@ -3928,14 +3957,14 @@ static const uint8_t pm_encoding_windows_874_table[256] = {
};

#define PRISM_ENCODING_TABLE(name) \
static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, ptrdiff_t n) { \
return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT)); \
} \
static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, ptrdiff_t n) { \
return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; \
} \
static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, ptrdiff_t n) { \
return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT)); \
}

PRISM_ENCODING_TABLE(cp850)
Expand Down Expand Up @@ -4004,17 +4033,17 @@ PRISM_ENCODING_TABLE(windows_874)
* means that if the top bit is not set, the character is 1 byte long.
*/
static size_t
pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return *b < 0x80 ? 1 : 0;
pm_encoding_ascii_char_width(const uint8_t *b, ptrdiff_t n) {
return ((n > 0) && (*b < 0x80)) ? 1 : 0;
}

/**
* Return the size of the next character in the ASCII encoding if it is an
* alphabetical character.
*/
static size_t
pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
pm_encoding_ascii_alpha_char(const uint8_t *b, ptrdiff_t n) {
return (n > 0) ? (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) : 0;
}

/**
Expand All @@ -4024,16 +4053,16 @@ pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
*/
static size_t
pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
return (*b < 0x80) ? pm_encoding_ascii_alpha_char(b, n) : 0;
return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alpha_char(b, n) : 0;
}

/**
* Return the size of the next character in the ASCII encoding if it is an
* alphanumeric character.
*/
static size_t
pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
pm_encoding_ascii_alnum_char(const uint8_t *b, ptrdiff_t n) {
return ((n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}

/**
Expand All @@ -4043,16 +4072,16 @@ pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
*/
static size_t
pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
return (*b < 0x80) ? pm_encoding_ascii_alnum_char(b, n) : 0;
return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alnum_char(b, n) : 0;
}

/**
* Return true if the next character in the ASCII encoding if it is an uppercase
* character.
*/
static bool
pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) {
return (n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
}

/**
Expand All @@ -4071,7 +4100,7 @@ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATT
static size_t
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
if ((n > 0) && (*b < 0x80)) {
return 1;
}

Expand Down Expand Up @@ -4115,6 +4144,9 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
*/
static size_t
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
if (n == 0) {
return 0;
}
// These are the single byte characters.
if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
return 1;
Expand Down Expand Up @@ -4178,7 +4210,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
*/
static bool
pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
return (n > 0) && (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
}

/**
Expand All @@ -4188,7 +4220,7 @@ pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
if ((n > 0) && (*b < 0x80)) {
return 1;
}

Expand All @@ -4207,7 +4239,7 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters
if (*b <= 0x80) {
if ((n > 0) && (*b <= 0x80)) {
return 1;
}

Expand All @@ -4226,7 +4258,7 @@ pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the 1 byte characters.
if (*b < 0x80) {
if ((n > 0) && (*b < 0x80)) {
return 1;
}

Expand Down Expand Up @@ -4269,7 +4301,7 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
if ((n > 0) && (*b < 0x80)) {
return 1;
}

Expand All @@ -4288,7 +4320,7 @@ pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b < 0x80) {
if ((n > 0) && (*b < 0x80)) {
return 1;
}

Expand All @@ -4312,7 +4344,7 @@ pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the 1 byte characters.
if (*b < 0x80) {
if ((n > 0) && (*b < 0x80)) {
return 1;
}

Expand All @@ -4336,7 +4368,7 @@ pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
if (*b <= 0x80) {
if ((n > 0) && (*b <= 0x80)) {
return 1;
}

Expand Down
Loading