diff --git a/enc/ascii.c b/enc/ascii.c index ae7db97f25ed79..4ba93f4febdb21 100644 --- a/enc/ascii.c +++ b/enc/ascii.c @@ -54,7 +54,11 @@ OnigEncodingDefine(ascii, ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif ENCINDEX_ASCII_8BIT, ONIGENC_FLAG_NONE, }; diff --git a/enc/big5.c b/enc/big5.c index ab4fb69819b60e..e141ebdbe36988 100644 --- a/enc/big5.c +++ b/enc/big5.c @@ -300,7 +300,11 @@ OnigEncodingDefine(big5, BIG5) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; @@ -335,7 +339,11 @@ OnigEncodingDefine(big5_hkscs, BIG5_HKSCS) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; @@ -370,7 +378,11 @@ OnigEncodingDefine(big5_uao, BIG5_UAO) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/cp949.c b/enc/cp949.c index 1600d0cd5bee29..77e961a7cdf3d0 100644 --- a/enc/cp949.c +++ b/enc/cp949.c @@ -211,7 +211,11 @@ OnigEncodingDefine(cp949, CP949) = { onigenc_not_support_get_ctype_code_range, cp949_left_adjust_char_head, cp949_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/emacs_mule.c b/enc/emacs_mule.c index f92eb183cf788d..abd986a1878e0b 100644 --- a/enc/emacs_mule.c +++ b/enc/emacs_mule.c @@ -334,7 +334,11 @@ OnigEncodingDefine(emacs_mule, Emacs_Mule) = { onigenc_not_support_get_ctype_code_range, left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/euc_jp.c b/enc/euc_jp.c index d283bf4ebb1208..678d0116682bee 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -576,7 +576,11 @@ OnigEncodingDefine(euc_jp, EUC_JP) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/euc_kr.c b/enc/euc_kr.c index 21d6ab4e1c10b9..4079a0ece05b20 100644 --- a/enc/euc_kr.c +++ b/enc/euc_kr.c @@ -188,7 +188,11 @@ OnigEncodingDefine(euc_kr, EUC_KR) = { onigenc_not_support_get_ctype_code_range, euckr_left_adjust_char_head, euckr_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; @@ -213,7 +217,11 @@ OnigEncodingDefine(euc_cn, EUC_CN) = { onigenc_not_support_get_ctype_code_range, euckr_left_adjust_char_head, euckr_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/euc_tw.c b/enc/euc_tw.c index 1c5659cb1d0895..722e29a9dac70e 100644 --- a/enc/euc_tw.c +++ b/enc/euc_tw.c @@ -221,7 +221,11 @@ OnigEncodingDefine(euc_tw, EUC_TW) = { onigenc_not_support_get_ctype_code_range, euctw_left_adjust_char_head, euctw_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/gb18030.c b/enc/gb18030.c index 63d2e633ecb16e..316737db11463d 100644 --- a/enc/gb18030.c +++ b/enc/gb18030.c @@ -597,7 +597,11 @@ OnigEncodingDefine(gb18030, GB18030) = { onigenc_not_support_get_ctype_code_range, gb18030_left_adjust_char_head, gb18030_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/gbk.c b/enc/gbk.c index 31032553bf58ed..3df4e4b6d6a6a9 100644 --- a/enc/gbk.c +++ b/enc/gbk.c @@ -211,7 +211,11 @@ OnigEncodingDefine(gbk, GBK) = { onigenc_not_support_get_ctype_code_range, gbk_left_adjust_char_head, gbk_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c index 7af0888c3edced..78ea1fba600582 100644 --- a/enc/iso_8859_1.c +++ b/enc/iso_8859_1.c @@ -255,6 +255,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE return FALSE; } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -297,6 +298,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { onigenc_single_byte_mbc_enc_len, @@ -315,7 +317,11 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c index cae4be2db0367b..bf1c884cb23511 100644 --- a/enc/iso_8859_10.c +++ b/enc/iso_8859_10.c @@ -224,6 +224,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -269,6 +270,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { onigenc_single_byte_mbc_enc_len, @@ -287,7 +289,11 @@ OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c index b9c6119fd9a02b..403ae6499e0cf8 100644 --- a/enc/iso_8859_11.c +++ b/enc/iso_8859_11.c @@ -93,7 +93,11 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c index fe1ddd7065e6ae..8c6e758b8066f3 100644 --- a/enc/iso_8859_13.c +++ b/enc/iso_8859_13.c @@ -217,6 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -264,6 +265,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { onigenc_single_byte_mbc_enc_len, @@ -282,7 +284,11 @@ OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c index 647514a01626c0..21dffea76f087b 100644 --- a/enc/iso_8859_14.c +++ b/enc/iso_8859_14.c @@ -226,6 +226,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -280,6 +281,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { onigenc_single_byte_mbc_enc_len, @@ -298,7 +300,11 @@ OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c index 377a3afc7b15c5..dd6c29a6432455 100644 --- a/enc/iso_8859_15.c +++ b/enc/iso_8859_15.c @@ -220,6 +220,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -271,6 +272,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { onigenc_single_byte_mbc_enc_len, @@ -289,7 +291,11 @@ OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c index 135630eb73df46..aa7ce99fbac467 100644 --- a/enc/iso_8859_16.c +++ b/enc/iso_8859_16.c @@ -222,6 +222,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -275,6 +276,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { onigenc_single_byte_mbc_enc_len, @@ -293,7 +295,11 @@ OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c index 3a05c6320dbbeb..859073fd149cb4 100644 --- a/enc/iso_8859_2.c +++ b/enc/iso_8859_2.c @@ -220,6 +220,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE return FALSE; } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -266,6 +267,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { onigenc_single_byte_mbc_enc_len, @@ -284,7 +286,11 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c index 2a343eac638482..d8199d5125b19c 100644 --- a/enc/iso_8859_3.c +++ b/enc/iso_8859_3.c @@ -220,6 +220,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xB9) #define I_WITH_DOT_ABOVE (0xA9) static int @@ -276,6 +277,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { onigenc_single_byte_mbc_enc_len, @@ -294,7 +296,11 @@ OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c index e2134e8c0b27a5..5f01f0157556dd 100644 --- a/enc/iso_8859_4.c +++ b/enc/iso_8859_4.c @@ -223,6 +223,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -272,6 +273,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { onigenc_single_byte_mbc_enc_len, @@ -290,7 +292,11 @@ OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c index 6fafc358233eb9..8223fc0ec706c8 100644 --- a/enc/iso_8859_5.c +++ b/enc/iso_8859_5.c @@ -209,6 +209,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -240,6 +241,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { onigenc_single_byte_mbc_enc_len, @@ -258,7 +260,11 @@ OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_6.c b/enc/iso_8859_6.c index cdb74054d1e312..78543ea307d221 100644 --- a/enc/iso_8859_6.c +++ b/enc/iso_8859_6.c @@ -93,7 +93,11 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c index ac973f74ba51ef..e84f5c3460ad4e 100644 --- a/enc/iso_8859_7.c +++ b/enc/iso_8859_7.c @@ -205,6 +205,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -259,6 +260,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { onigenc_single_byte_mbc_enc_len, @@ -277,7 +279,11 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c index e256855f2130a7..b757a283de15a0 100644 --- a/enc/iso_8859_8.c +++ b/enc/iso_8859_8.c @@ -93,7 +93,11 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index 004eec310fcd52..f15953963bf4de 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -213,6 +213,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xFD) #define I_WITH_DOT_ABOVE (0xDD) static int @@ -265,6 +266,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { onigenc_single_byte_mbc_enc_len, @@ -283,7 +285,11 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/koi8_r.c b/enc/koi8_r.c index a52097577416a1..39f24824651275 100644 --- a/enc/koi8_r.c +++ b/enc/koi8_r.c @@ -214,7 +214,11 @@ OnigEncodingDefine(koi8_r, KOI8_R) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/koi8_u.c b/enc/koi8_u.c index 50bb78bd04841d..8cd890dd16a041 100644 --- a/enc/koi8_u.c +++ b/enc/koi8_u.c @@ -218,7 +218,11 @@ OnigEncodingDefine(koi8_u, KOI8_U) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/shift_jis.c b/enc/shift_jis.c index f1355d2d95fcb4..48f648868af13d 100644 --- a/enc/shift_jis.c +++ b/enc/shift_jis.c @@ -47,7 +47,11 @@ OnigEncodingDefine(shift_jis, Shift_JIS) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/unicode.c b/enc/unicode.c index 07497cdbe46731..5bc806863e8f55 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -655,6 +655,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, return n; } +#ifdef USE_CASE_MAP_API /* length in bytes for three characters in UTF-32; e.g. needed for ffi (U+FB03) */ #define CASE_MAPPING_SLACK 12 #define MODIFIED (flags |= ONIGENC_CASE_MODIFIED) @@ -798,6 +799,7 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, *flagP = flags; return (int )(to - to_start); } +#endif const char onigenc_unicode_version_string[] = #ifdef ONIG_UNICODE_VERSION_STRING diff --git a/enc/us_ascii.c b/enc/us_ascii.c index 08f9072c435591..253ee695724159 100644 --- a/enc/us_ascii.c +++ b/enc/us_ascii.c @@ -32,7 +32,11 @@ OnigEncodingDefine(us_ascii, US_ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif ENCINDEX_US_ASCII, ONIGENC_FLAG_NONE, }; diff --git a/enc/utf_16be.c b/enc/utf_16be.c index f9dd7119d65a0e..0086040b5d5bd9 100644 --- a/enc/utf_16be.c +++ b/enc/utf_16be.c @@ -249,7 +249,11 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = { onigenc_utf16_32_get_ctype_code_range, utf16be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_16le.c b/enc/utf_16le.c index 2c8438d0be2554..ca0fce53872045 100644 --- a/enc/utf_16le.c +++ b/enc/utf_16le.c @@ -242,7 +242,11 @@ OnigEncodingDefine(utf_16le, UTF_16LE) = { onigenc_utf16_32_get_ctype_code_range, utf16le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_32be.c b/enc/utf_32be.c index 17841e52a4e82d..e05cfaf1b2fdf8 100644 --- a/enc/utf_32be.c +++ b/enc/utf_32be.c @@ -199,7 +199,11 @@ OnigEncodingDefine(utf_32be, UTF_32BE) = { onigenc_utf16_32_get_ctype_code_range, utf32be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_32le.c b/enc/utf_32le.c index 18b798f102c5d1..651efdcec57790 100644 --- a/enc/utf_32le.c +++ b/enc/utf_32le.c @@ -199,7 +199,11 @@ OnigEncodingDefine(utf_32le, UTF_32LE) = { onigenc_utf16_32_get_ctype_code_range, utf32le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_8.c b/enc/utf_8.c index cdf2510d84c829..ae7c98469d50bd 100644 --- a/enc/utf_8.c +++ b/enc/utf_8.c @@ -431,7 +431,11 @@ OnigEncodingDefine(utf_8, UTF_8) = { get_ctype_code_range, left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif ENCINDEX_UTF_8, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/windows_1250.c b/enc/windows_1250.c index daf23e9d1e6e6a..d38d50a01d370d 100644 --- a/enc/windows_1250.c +++ b/enc/windows_1250.c @@ -190,6 +190,7 @@ cp1250_get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -239,6 +240,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1250, Windows_1250) = { onigenc_single_byte_mbc_enc_len, @@ -257,7 +259,11 @@ OnigEncodingDefine(windows_1250, Windows_1250) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1251.c b/enc/windows_1251.c index 6c892c1b8ce39a..81641d0337f2b2 100644 --- a/enc/windows_1251.c +++ b/enc/windows_1251.c @@ -180,6 +180,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -221,6 +222,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1251, Windows_1251) = { onigenc_single_byte_mbc_enc_len, @@ -239,7 +241,11 @@ OnigEncodingDefine(windows_1251, Windows_1251) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1252.c b/enc/windows_1252.c index b685878d3fc5d7..6aece95c0ab2ae 100644 --- a/enc/windows_1252.c +++ b/enc/windows_1252.c @@ -181,6 +181,7 @@ cp1252_get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -228,6 +229,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1252, Windows_1252) = { onigenc_single_byte_mbc_enc_len, @@ -246,7 +248,11 @@ OnigEncodingDefine(windows_1252, Windows_1252) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1253.c b/enc/windows_1253.c index b2a43581c39240..c95ea3f41ccd9f 100644 --- a/enc/windows_1253.c +++ b/enc/windows_1253.c @@ -213,6 +213,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -272,6 +273,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1253, Windows_1253) = { onigenc_single_byte_mbc_enc_len, @@ -290,7 +292,11 @@ OnigEncodingDefine(windows_1253, Windows_1253) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1254.c b/enc/windows_1254.c index 5e6d92d3d2680e..c8d5991686a0b3 100644 --- a/enc/windows_1254.c +++ b/enc/windows_1254.c @@ -221,6 +221,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xFD) #define I_WITH_DOT_ABOVE (0xDD) static int @@ -277,6 +278,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1254, Windows_1254) = { onigenc_single_byte_mbc_enc_len, @@ -295,7 +297,11 @@ OnigEncodingDefine(windows_1254, Windows_1254) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1257.c b/enc/windows_1257.c index ada03b72bf01cf..def13c8c49fed0 100644 --- a/enc/windows_1257.c +++ b/enc/windows_1257.c @@ -225,6 +225,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xB9) #define I_WITH_DOT_ABOVE (0xA9) static int @@ -279,6 +280,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1257, Windows_1257) = { onigenc_single_byte_mbc_enc_len, @@ -297,7 +299,11 @@ OnigEncodingDefine(windows_1257, Windows_1257) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_31j.c b/enc/windows_31j.c index 1eb859596a3bc3..cd8bd83fddf693 100644 --- a/enc/windows_31j.c +++ b/enc/windows_31j.c @@ -48,7 +48,11 @@ OnigEncodingDefine(windows_31j, Windows_31J) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index db290cd47a644d..9dcddee829a86f 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -4,8 +4,8 @@ onigmo.h - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2009 K.Kosako - * Copyright (c) 2011-2017 K.Takata + * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,8 +38,8 @@ extern "C" { #endif #define ONIGMO_VERSION_MAJOR 6 -#define ONIGMO_VERSION_MINOR 1 -#define ONIGMO_VERSION_TEENY 3 +#define ONIGMO_VERSION_MINOR 2 +#define ONIGMO_VERSION_TEENY 0 #ifndef ONIG_EXTERN # ifdef RUBY_EXTERN @@ -789,8 +789,8 @@ typedef struct re_pattern_buffer { unsigned char *exact; unsigned char *exact_end; unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ + int *reserved1; + int *reserved2; OnigDistance dmin; /* min-distance of exact or map */ OnigDistance dmax; /* max-distance of exact or map */ diff --git a/regcomp.c b/regcomp.c index 0ecf162556b777..18b2c97eb6381c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2,8 +2,8 @@ regcomp.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2013 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -4216,7 +4216,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Sunday's quick search */ static int set_bm_skip(UChar* s, UChar* end, regex_t* reg, - UChar skip[], int** int_skip, int ignore_case) + UChar skip[], int ignore_case) { OnigDistance i, len; int clen, flen, n, j, k; @@ -4225,94 +4225,60 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, OnigEncoding enc = reg->enc; len = end - s; - if (len < ONIG_CHAR_TABLE_SIZE) { - if (ignore_case) { - for (i = 0; i < len; i += clen) { - p = s + i; - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { - /* Different length isn't supported. Stop optimization at here. */ - end = p; - goto endcheck; - } - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); - if (flen != clen) { - /* Different length isn't supported. Stop optimization at here. */ - end = p; - goto endcheck; - } - } - } -endcheck: - ; - } - - len = end - s; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - skip[i] = (UChar )(len + 1); - n = 0; - for (i = 0; i < len; i += clen) { - p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < clen; j++) { - skip[s[i + j]] = (UChar )(len - i - j); - for (k = 0; k < n; k++) { - ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); - skip[buf[j]] = (UChar )(len - i - j); - } - } - } - } - else { -# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE + if (len >= ONIG_CHAR_TABLE_SIZE) { /* This should not happen. */ return ONIGERR_TYPE_BUG; -# else - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1); + } - n = 0; + if (ignore_case) { for (i = 0; i < len; i += clen) { p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); clen = enclen(enc, p, end); if (p + clen > end) clen = (int )(end - p); for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); + if (flen != clen) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } } - for (j = 0; j < clen; j++) { - (*int_skip)[s[i + j]] = (int )(len - i - j); - for (k = 0; k < n; k++) { - (*int_skip)[buf[k][j]] = (int )(len - i - j); - } + } +endcheck: + len = end - s; + } + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + skip[i] = (UChar )(len + 1); + n = 0; + for (i = 0; i < len; i += clen) { + p = s + i; + if (ignore_case) + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); + clen = enclen(enc, p, end); + if (p + clen > end) + clen = (int )(end - p); + + for (j = 0; j < clen; j++) { + skip[s[i + j]] = (UChar )(len - i - j); + for (k = 0; k < n; k++) { + ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); + skip[buf[j]] = (UChar )(len - i - j); } } -# endif } - return (int)len; + + return (int )len; } typedef struct { @@ -5299,7 +5265,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { e->len = set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 1); + reg->map, 1); reg->exact_end = reg->exact + e->len; if (e->len >= 3) { reg->optimize = (allow_reverse != 0 @@ -5318,7 +5284,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) else { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 0); + reg->map, 0); reg->optimize = (allow_reverse != 0 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } @@ -5601,8 +5567,6 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg)) { xfree(reg->p); xfree(reg->exact); - xfree(reg->int_map); - xfree(reg->int_map_backward); xfree(reg->repeat_range); onig_free(reg->chain); @@ -5649,14 +5613,6 @@ onig_reg_copy(regex_t** nreg, regex_t* oreg) (reg)->exact_end = (reg)->exact + exact_size; } - if (IS_NOT_NULL(reg->int_map)) { - if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) - goto err_int_map; - } - if (IS_NOT_NULL(reg->int_map_backward)) { - if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) - goto err_int_map_backward; - } if (IS_NOT_NULL(reg->p)) { if (COPY_FAILED(p, reg->alloc)) goto err_p; @@ -5683,10 +5639,6 @@ onig_reg_copy(regex_t** nreg, regex_t* oreg) err_repeat_range: xfree(reg->p); err_p: - xfree(reg->int_map_backward); - err_int_map_backward: - xfree(reg->int_map); - err_int_map: xfree(reg->exact); err: xfree(reg); @@ -5703,8 +5655,6 @@ onig_memsize(const regex_t *reg) if (IS_NULL(reg)) return 0; if (IS_NOT_NULL(reg->p)) size += reg->alloc; if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; - if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; - if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); @@ -5969,6 +5919,12 @@ onig_reg_init(regex_t* reg, OnigOptionType option, if (IS_NULL(reg)) return ONIGERR_INVALID_ARGUMENT; + (reg)->exact = (UChar* )NULL; + (reg)->chain = (regex_t* )NULL; + (reg)->p = (UChar* )NULL; + (reg)->name_table = (void* )NULL; + (reg)->repeat_range = (OnigRepeatRange* )NULL; + if (ONIGENC_IS_UNDEF(enc)) return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET; @@ -5988,15 +5944,9 @@ onig_reg_init(regex_t* reg, OnigOptionType option, (reg)->options = option; (reg)->syntax = syntax; (reg)->optimize = 0; - (reg)->exact = (UChar* )NULL; - (reg)->int_map = (int* )NULL; - (reg)->int_map_backward = (int* )NULL; - (reg)->chain = (regex_t* )NULL; - (reg)->p = (UChar* )NULL; (reg)->alloc = 0; (reg)->used = 0; - (reg)->name_table = (void* )NULL; (reg)->case_fold_flag = case_fold_flag; diff --git a/regenc.c b/regenc.c index 823aacc28e615b..c595f44b29e36d 100644 --- a/regenc.c +++ b/regenc.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -640,18 +640,19 @@ onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, } extern int -onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) { + if (code > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; return 1; } extern int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) { -#ifdef RUBY - if (code > 0xff) - rb_raise(rb_eRangeError, "%u out of char range", code); -#endif + if (code > 0xff) { + return ONIGERR_INVALID_CODE_POINT_VALUE; + } *buf = (UChar )(code & 0xff); return 1; } @@ -966,6 +967,7 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, } #endif +#ifdef USE_CASE_MAP_API extern int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) @@ -1027,3 +1029,4 @@ onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar *flagP = flags; return (int )(to - to_start); } +#endif diff --git a/regenc.h b/regenc.h index 4fbe403b6301d8..fe0440dd740e9b 100644 --- a/regenc.h +++ b/regenc.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -134,11 +134,13 @@ typedef struct { #define roomof(x, y) (((x) + (y) - 1) / (y)) #define type_roomof(x, y) roomof(sizeof(x), sizeof(y)) +/* config */ #define USE_CRNL_AS_LINE_TERMINATOR #define USE_UNICODE_PROPERTIES #define USE_UNICODE_AGE_PROPERTIES /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ +#define USE_CASE_MAP_API #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII diff --git a/regerror.c b/regerror.c index 8667084d41c931..e772feee81914a 100644 --- a/regerror.c +++ b/regerror.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -63,14 +63,18 @@ onig_error_code_to_format(OnigPosition code) p = "parse depth limit over"; break; case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET: p = "default multibyte-encoding is not set"; break; +#if 0 case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: p = "can't convert to wide-char on specified multibyte-encoding"; break; +#endif case ONIGERR_INVALID_ARGUMENT: p = "invalid argument"; break; case ONIGERR_END_PATTERN_AT_LEFT_BRACE: p = "end pattern at left brace"; break; +#if 0 case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: p = "end pattern at left bracket"; break; +#endif case ONIGERR_EMPTY_CHAR_CLASS: p = "empty char-class"; break; case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: @@ -87,16 +91,20 @@ onig_error_code_to_format(OnigPosition code) p = "invalid control-code syntax"; break; case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: p = "char-class value at end of range"; break; +#if 0 case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: p = "char-class value at start of range"; break; +#endif case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: p = "unmatched range specifier in char-class"; break; case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: p = "target of repeat operator is not specified"; break; case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: p = "target of repeat operator is invalid"; break; +#if 0 case ONIGERR_NESTED_REPEAT_OPERATOR: p = "nested repeat operator"; break; +#endif case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: p = "unmatched close parenthesis"; break; case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: @@ -121,14 +129,18 @@ onig_error_code_to_format(OnigPosition code) p = "upper is smaller than lower in repeat range"; break; case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: p = "empty range in char class"; break; +#if 0 case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: p = "mismatch multibyte code length in char-class range"; break; +#endif case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: p = "too many multibyte code ranges are specified"; break; case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: p = "too short multibyte code string"; break; +#if 0 case ONIGERR_TOO_BIG_BACKREF_NUMBER: p = "too big backref number"; break; +#endif case ONIGERR_INVALID_BACKREF: #ifdef USE_NAMED_GROUP p = "invalid backref number/name"; break; @@ -161,8 +173,10 @@ onig_error_code_to_format(OnigPosition code) p = "multiplex definition name <%n> call"; break; case ONIGERR_NEVER_ENDING_RECURSION: p = "never ending recursion"; break; +#ifdef USE_CAPTURE_HISTORY case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: p = "group number is too big for capture history"; break; +#endif case ONIGERR_INVALID_CHAR_PROPERTY_NAME: p = "invalid character property name {%n}"; break; case ONIGERR_TOO_MANY_CAPTURE_GROUPS: diff --git a/regexec.c b/regexec.c index eec3e236631805..3210c7cc1b5603 100644 --- a/regexec.c +++ b/regexec.c @@ -2,8 +2,8 @@ regexec.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -4401,39 +4401,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } -# endif + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -4460,32 +4440,17 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->map[s[1]]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; } -# endif + if (s + 1 >= end) break; + s += reg->map[s[1]]; } + return (UChar* )NULL; } @@ -4514,35 +4479,17 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif + while (s < end) { + se = s + tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, se + 1)) + return (UChar* )s; + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -4571,82 +4518,17 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->map[s[1]]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; - } -# endif - } - return (UChar* )NULL; -} - -#ifdef USE_INT_MAP_BACKWARD -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = (int )(end - s); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return (UChar* )s; - - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); + while (s < end) { + p = s - tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + p, s + 1)) + return (UChar* )p; + if (s + 1 >= end) break; + s += reg->map[s[1]]; } return (UChar* )NULL; } -#endif static UChar* map_search(OnigEncoding enc, UChar map[], @@ -4894,21 +4776,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT_BM: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: -#ifdef USE_INT_MAP_BACKWARD - if (IS_NULL(reg->int_map_backward)) { - int r; - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); -#else goto exact_method; -#endif break; case ONIG_OPTIMIZE_MAP: diff --git a/regint.h b/regint.h index 9d69e2d25e51a8..3f4aa919e5046f 100644 --- a/regint.h +++ b/regint.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2013 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -91,7 +91,7 @@ #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ #define DEFAULT_PARSE_DEPTH_LIMIT 4096 -#define OPT_EXACT_MAXLEN 24 +#define OPT_EXACT_MAXLEN 24 /* This must be smaller than ONIG_CHAR_TABLE_SIZE. */ /* check config */ #if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP) diff --git a/regparse.c b/regparse.c index 418bd3814076d9..123b3015a5a936 100644 --- a/regparse.c +++ b/regparse.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -4043,7 +4043,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (c == 'R' || c == '0') { PINC; /* skip 'R' / '0' */ - if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME; + if (!PPEEK_IS(')')) { + r = ONIGERR_INVALID_GROUP_NAME; + onig_scan_env_set_error_string(env, r, p - 1, p + 1); + return r; + } PINC; /* skip ')' */ name_end = name = p; gnum = 0; @@ -6309,11 +6313,14 @@ parse_exp(Node** np, OnigToken* tok, int term, int r, len, group = 0; Node* qn; Node** targetp; + unsigned int parse_depth; *np = NULL; if (tok->type == (enum TokenSyms )term) goto end_of_token; + parse_depth = env->parse_depth; + switch (tok->type) { case TK_ALT: case TK_EOT: @@ -6624,6 +6631,10 @@ parse_exp(Node** np, OnigToken* tok, int term, if (is_invalid_quantifier_target(*targetp)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + parse_depth++; + if (parse_depth > ParseDepthLimit) + return ONIGERR_PARSE_DEPTH_LIMIT_OVER; + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, (r == TK_INTERVAL ? 1 : 0)); CHECK_NULL_RETURN_MEMERR(qn); diff --git a/regparse.h b/regparse.h index dd35d485255bad..65da835a55aca1 100644 --- a/regparse.h +++ b/regparse.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sprintf.c b/sprintf.c index cb266a98416e33..de88a9f4b35a20 100644 --- a/sprintf.c +++ b/sprintf.c @@ -441,7 +441,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) VALUE val = GETARG(); VALUE tmp; unsigned int c; - int n; + int n, encidx; tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { @@ -451,11 +451,13 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) goto format_s1; } n = NUM2INT(val); - if (n >= 0) n = rb_enc_codelen((c = n), enc); + if (n >= 0) { + n = rb_enc_codelen((c = n), enc); + encidx = rb_ascii8bit_appendable_encoding_index(enc, c); + } if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } - int encidx = rb_ascii8bit_appendable_encoding_index(enc, c); if (encidx >= 0 && encidx != rb_enc_to_index(enc)) { /* special case */ rb_enc_associate_index(result, encidx); diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index 6945c6cdce52a6..b37c75e704bb4a 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -66,6 +66,19 @@ def lib((upstream, branch), gemspec_in_subdir: false) "lib/unicode_normalize", # not to match with "lib/un" ] REPOSITORIES = { + Onigmo: repo("k-takata/Onigmo", [ + ["regcomp.c", "regcomp.c"], + ["regenc.c", "regenc.c"], + ["regenc.h", "regenc.h"], + ["regerror.c", "regerror.c"], + ["regexec.c", "regexec.c"], + ["regint.h", "regint.h"], + ["regparse.c", "regparse.c"], + ["regparse.h", "regparse.h"], + ["regsyntax.c", "regsyntax.c"], + ["onigmo.h", "include/ruby/onigmo.h"], + ["enc", "enc"], + ]), "io-console": repo("ruby/io-console", [ ["ext/io/console", "ext/io/console"], ["test/io/console", "test/io/console"], diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 798a460c1980ac..794293d1d321c7 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -413,8 +413,6 @@ fn main() { .allowlist_function("rb_FL_TEST_RAW") .allowlist_function("rb_RB_TYPE_P") .allowlist_function("rb_BASIC_OP_UNREDEFINED_P") - .allowlist_function("rb_RSTRUCT_LEN") - .allowlist_function("rb_RSTRUCT_SET") .allowlist_function("rb_vm_ci_argc") .allowlist_function("rb_vm_ci_mid") .allowlist_function("rb_vm_ci_flag") diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 57a3bee7e01d8c..51faaab9c24658 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -198,7 +198,6 @@ pub use rb_FL_TEST as FL_TEST; pub use rb_FL_TEST_RAW as FL_TEST_RAW; pub use rb_RB_TYPE_P as RB_TYPE_P; pub use rb_BASIC_OP_UNREDEFINED_P as BASIC_OP_UNREDEFINED_P; -pub use rb_RSTRUCT_LEN as RSTRUCT_LEN; pub use rb_vm_ci_argc as vm_ci_argc; pub use rb_vm_ci_mid as vm_ci_mid; pub use rb_vm_ci_flag as vm_ci_flag; diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index efb1559fb75512..5d4fed0c3ac18d 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -2153,7 +2153,6 @@ unsafe extern "C" { pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; - pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long; pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool; pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE;