From 916c0a810552b4c2c9a7fce2a50dc8c6efd5bb2f Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Sun, 11 Jan 2026 10:29:32 +0100 Subject: [PATCH 01/18] ZJIT: remove unused rb_RSTRUCT_SET() --- zjit/bindgen/src/main.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 798a460c1980ac..4af4886384a1e9 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -414,7 +414,6 @@ fn main() { .allowlist_function("rb_RB_TYPE_P") .allowlist_function("rb_BASIC_OP_UNREDEFINED_P") .allowlist_function("rb_RSTRUCT_LEN") - .allowlist_function("rb_RSTRUCT_SET") .allowlist_function("rb_vm_ci_argc") .allowlist_function("rb_vm_ci_mid") .allowlist_function("rb_vm_ci_flag") From bf36ad9c12481461031a2dbde39e27d96e5a86e4 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Sun, 11 Jan 2026 10:37:30 +0100 Subject: [PATCH 02/18] ZJIT: remove unused rb_RSTRUCT_LEN() --- zjit/bindgen/src/main.rs | 1 - zjit/src/cruby.rs | 1 - zjit/src/cruby_bindings.inc.rs | 1 - 3 files changed, 3 deletions(-) diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 4af4886384a1e9..794293d1d321c7 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -413,7 +413,6 @@ fn main() { .allowlist_function("rb_FL_TEST_RAW") .allowlist_function("rb_RB_TYPE_P") .allowlist_function("rb_BASIC_OP_UNREDEFINED_P") - .allowlist_function("rb_RSTRUCT_LEN") .allowlist_function("rb_vm_ci_argc") .allowlist_function("rb_vm_ci_mid") .allowlist_function("rb_vm_ci_flag") diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 57a3bee7e01d8c..51faaab9c24658 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -198,7 +198,6 @@ pub use rb_FL_TEST as FL_TEST; pub use rb_FL_TEST_RAW as FL_TEST_RAW; pub use rb_RB_TYPE_P as RB_TYPE_P; pub use rb_BASIC_OP_UNREDEFINED_P as BASIC_OP_UNREDEFINED_P; -pub use rb_RSTRUCT_LEN as RSTRUCT_LEN; pub use rb_vm_ci_argc as vm_ci_argc; pub use rb_vm_ci_mid as vm_ci_mid; pub use rb_vm_ci_flag as vm_ci_flag; diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index efb1559fb75512..5d4fed0c3ac18d 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -2153,7 +2153,6 @@ unsafe extern "C" { pub fn rb_FL_TEST(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE; pub fn rb_RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool; - pub fn rb_RSTRUCT_LEN(st: VALUE) -> ::std::os::raw::c_long; pub fn rb_get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo; pub fn rb_BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: u32) -> bool; pub fn rb_RCLASS_ORIGIN(c: VALUE) -> VALUE; From 6484a71a4fee654e73f377a01db331500222dd13 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 12 Jan 2026 10:23:10 +0900 Subject: [PATCH 03/18] Add Onigmo to sync_default_gems.rb --- tool/sync_default_gems.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index 6945c6cdce52a6..b37c75e704bb4a 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -66,6 +66,19 @@ def lib((upstream, branch), gemspec_in_subdir: false) "lib/unicode_normalize", # not to match with "lib/un" ] REPOSITORIES = { + Onigmo: repo("k-takata/Onigmo", [ + ["regcomp.c", "regcomp.c"], + ["regenc.c", "regenc.c"], + ["regenc.h", "regenc.h"], + ["regerror.c", "regerror.c"], + ["regexec.c", "regexec.c"], + ["regint.h", "regint.h"], + ["regparse.c", "regparse.c"], + ["regparse.h", "regparse.h"], + ["regsyntax.c", "regsyntax.c"], + ["onigmo.h", "include/ruby/onigmo.h"], + ["enc", "enc"], + ]), "io-console": repo("ruby/io-console", [ ["ext/io/console", "ext/io/console"], ["test/io/console", "test/io/console"], From fb7f344b09a8351544f9b4bbb593917b552b07ca Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Tue, 29 Jan 2019 18:59:50 +0900 Subject: [PATCH 04/18] [k-takata/Onigmo] Remove code for reg->int_map https://github.com/k-takata/Onigmo/commit/6c58de82d2 --- include/ruby/onigmo.h | 2 +- regcomp.c | 46 ++----------- regexec.c | 146 ++++++++++++------------------------------ 3 files changed, 45 insertions(+), 149 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index db290cd47a644d..f949219308b2a3 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -789,7 +789,7 @@ typedef struct re_pattern_buffer { unsigned char *exact; unsigned char *exact_end; unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ + int *reserved1; int *int_map_backward; /* BM skip for backward search */ OnigDistance dmin; /* min-distance of exact or map */ OnigDistance dmax; /* max-distance of exact or map */ diff --git a/regcomp.c b/regcomp.c index 0ecf162556b777..3445fcaccb0291 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4216,7 +4216,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Sunday's quick search */ static int set_bm_skip(UChar* s, UChar* end, regex_t* reg, - UChar skip[], int** int_skip, int ignore_case) + UChar skip[], int ignore_case) { OnigDistance i, len; int clen, flen, n, j, k; @@ -4280,36 +4280,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, /* This should not happen. */ return ONIGERR_TYPE_BUG; # else - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1); - - n = 0; - for (i = 0; i < len; i += clen) { - p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) - return 1; /* different length isn't supported. */ - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); - if (flen != clen) - return 1; /* different length isn't supported. */ - } - for (j = 0; j < clen; j++) { - (*int_skip)[s[i + j]] = (int )(len - i - j); - for (k = 0; k < n; k++) { - (*int_skip)[buf[k][j]] = (int )(len - i - j); - } - } - } +# error OPT_EXACT_MAXLEN exceeds ONIG_CHAR_TABLE_SIZE. # endif } return (int)len; @@ -5299,7 +5270,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->ignore_case > 0) { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { e->len = set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 1); + reg->map, 1); reg->exact_end = reg->exact + e->len; if (e->len >= 3) { reg->optimize = (allow_reverse != 0 @@ -5318,7 +5289,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) else { if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { set_bm_skip(reg->exact, reg->exact_end, reg, - reg->map, &(reg->int_map), 0); + reg->map, 0); reg->optimize = (allow_reverse != 0 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } @@ -5601,7 +5572,6 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg)) { xfree(reg->p); xfree(reg->exact); - xfree(reg->int_map); xfree(reg->int_map_backward); xfree(reg->repeat_range); onig_free(reg->chain); @@ -5649,10 +5619,6 @@ onig_reg_copy(regex_t** nreg, regex_t* oreg) (reg)->exact_end = (reg)->exact + exact_size; } - if (IS_NOT_NULL(reg->int_map)) { - if (COPY_FAILED(int_map, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) - goto err_int_map; - } if (IS_NOT_NULL(reg->int_map_backward)) { if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) goto err_int_map_backward; @@ -5685,8 +5651,6 @@ onig_reg_copy(regex_t** nreg, regex_t* oreg) err_p: xfree(reg->int_map_backward); err_int_map_backward: - xfree(reg->int_map); - err_int_map: xfree(reg->exact); err: xfree(reg); @@ -5703,7 +5667,6 @@ onig_memsize(const regex_t *reg) if (IS_NULL(reg)) return 0; if (IS_NOT_NULL(reg->p)) size += reg->alloc; if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; - if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); @@ -5989,7 +5952,6 @@ onig_reg_init(regex_t* reg, OnigOptionType option, (reg)->syntax = syntax; (reg)->optimize = 0; (reg)->exact = (UChar* )NULL; - (reg)->int_map = (int* )NULL; (reg)->int_map_backward = (int* )NULL; (reg)->chain = (regex_t* )NULL; diff --git a/regexec.c b/regexec.c index eec3e236631805..31be00864ef55d 100644 --- a/regexec.c +++ b/regexec.c @@ -4401,39 +4401,19 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } -# endif + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -4460,32 +4440,17 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->map[s[1]]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; } -# endif + if (s + 1 >= end) break; + s += reg->map[s[1]]; } + return (UChar* )NULL; } @@ -4514,35 +4479,17 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - se = s + tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, se + 1)) - return (UChar* )s; - if (s + 1 >= end) break; - skip = reg->int_map[se[1]]; - t = s; - do { - s += enclen(enc, s, end); - } while ((s - t) < skip && s < end); - } -# endif + while (s < end) { + se = s + tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, se + 1)) + return (UChar* )s; + if (s + 1 >= end) break; + skip = reg->map[se[1]]; + t = s; + do { + s += enclen(enc, s, end); + } while ((s - t) < skip && s < end); } return (UChar* )NULL; @@ -4571,28 +4518,15 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, end = text_end; s = text + tlen1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->map[s[1]]; - } - } - else { /* see int_map[] */ -# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE - while (s < end) { - p = s - tlen1; - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - p, s + 1)) - return (UChar* )p; - if (s + 1 >= end) break; - s += reg->int_map[s[1]]; - } -# endif + while (s < end) { + p = s - tlen1; + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + p, s + 1)) + return (UChar* )p; + if (s + 1 >= end) break; + s += reg->map[s[1]]; } + return (UChar* )NULL; } From bbf9bf3fc5c322ab8e622714b0178aca58e82191 Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Tue, 29 Jan 2019 19:00:02 +0900 Subject: [PATCH 05/18] [k-takata/Onigmo] Remove code for backward BM search The code has not been used for long. (Oniguruma also removed this code.) https://github.com/k-takata/Onigmo/commit/8796781fdd --- include/ruby/onigmo.h | 2 +- regcomp.c | 9 ------ regexec.c | 66 ------------------------------------------- 3 files changed, 1 insertion(+), 76 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index f949219308b2a3..b9a21206dea0bb 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -790,7 +790,7 @@ typedef struct re_pattern_buffer { unsigned char *exact_end; unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ int *reserved1; - int *int_map_backward; /* BM skip for backward search */ + int *reserved2; OnigDistance dmin; /* min-distance of exact or map */ OnigDistance dmax; /* max-distance of exact or map */ diff --git a/regcomp.c b/regcomp.c index 3445fcaccb0291..6ad72bcbd0bdc3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5572,7 +5572,6 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg)) { xfree(reg->p); xfree(reg->exact); - xfree(reg->int_map_backward); xfree(reg->repeat_range); onig_free(reg->chain); @@ -5619,10 +5618,6 @@ onig_reg_copy(regex_t** nreg, regex_t* oreg) (reg)->exact_end = (reg)->exact + exact_size; } - if (IS_NOT_NULL(reg->int_map_backward)) { - if (COPY_FAILED(int_map_backward, sizeof(int) * ONIG_CHAR_TABLE_SIZE)) - goto err_int_map_backward; - } if (IS_NOT_NULL(reg->p)) { if (COPY_FAILED(p, reg->alloc)) goto err_p; @@ -5649,8 +5644,6 @@ onig_reg_copy(regex_t** nreg, regex_t* oreg) err_repeat_range: xfree(reg->p); err_p: - xfree(reg->int_map_backward); - err_int_map_backward: xfree(reg->exact); err: xfree(reg); @@ -5667,7 +5660,6 @@ onig_memsize(const regex_t *reg) if (IS_NULL(reg)) return 0; if (IS_NOT_NULL(reg->p)) size += reg->alloc; if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; - if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); @@ -5952,7 +5944,6 @@ onig_reg_init(regex_t* reg, OnigOptionType option, (reg)->syntax = syntax; (reg)->optimize = 0; (reg)->exact = (UChar* )NULL; - (reg)->int_map_backward = (int* )NULL; (reg)->chain = (regex_t* )NULL; (reg)->p = (UChar* )NULL; diff --git a/regexec.c b/regexec.c index 31be00864ef55d..2e79623c719171 100644 --- a/regexec.c +++ b/regexec.c @@ -4530,58 +4530,6 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, return (UChar* )NULL; } -#ifdef USE_INT_MAP_BACKWARD -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = (int )(end - s); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return (UChar* )s; - - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); - } - - return (UChar* )NULL; -} -#endif - static UChar* map_search(OnigEncoding enc, UChar map[], const UChar* text, const UChar* text_range, const UChar* text_end) @@ -4828,21 +4776,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, case ONIG_OPTIMIZE_EXACT_BM: case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: -#ifdef USE_INT_MAP_BACKWARD - if (IS_NULL(reg->int_map_backward)) { - int r; - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); -#else goto exact_method; -#endif break; case ONIG_OPTIMIZE_MAP: From f9131412f874aa348df383266cee7dc2cc82a9ca Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Tue, 29 Jan 2019 19:00:12 +0900 Subject: [PATCH 06/18] [k-takata/Onigmo] Revise set_bm_skip() https://github.com/k-takata/Onigmo/commit/6875da50f7 --- regcomp.c | 89 ++++++++++++++++++++++++++----------------------------- regint.h | 2 +- 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/regcomp.c b/regcomp.c index 6ad72bcbd0bdc3..41154b54855dd9 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4225,64 +4225,59 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, OnigEncoding enc = reg->enc; len = end - s; - if (len < ONIG_CHAR_TABLE_SIZE) { - if (ignore_case) { - for (i = 0; i < len; i += clen) { - p = s + i; - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); - clen = enclen(enc, p, end); - if (p + clen > end) - clen = (int )(end - p); - - for (j = 0; j < n; j++) { - if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { - /* Different length isn't supported. Stop optimization at here. */ - end = p; - goto endcheck; - } - flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); - if (flen != clen) { - /* Different length isn't supported. Stop optimization at here. */ - end = p; - goto endcheck; - } - } - } -endcheck: - ; - } + if (len >= ONIG_CHAR_TABLE_SIZE) { + /* This should not happen. */ + return ONIGERR_TYPE_BUG; + } - len = end - s; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - skip[i] = (UChar )(len + 1); - n = 0; + if (ignore_case) { for (i = 0; i < len; i += clen) { p = s + i; - if (ignore_case) - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); clen = enclen(enc, p, end); if (p + clen > end) clen = (int )(end - p); - for (j = 0; j < clen; j++) { - skip[s[i + j]] = (UChar )(len - i - j); - for (k = 0; k < n; k++) { - ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); - skip[buf[j]] = (UChar )(len - i - j); - } + for (j = 0; j < n; j++) { + if ((items[j].code_len != 1) || (items[j].byte_len != clen)) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } + flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf); + if (flen != clen) { + /* Different length isn't supported. Stop optimization at here. */ + end = p; + goto endcheck; + } } } +endcheck: + len = end - s; } - else { -# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE - /* This should not happen. */ - return ONIGERR_TYPE_BUG; -# else -# error OPT_EXACT_MAXLEN exceeds ONIG_CHAR_TABLE_SIZE. -# endif + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + skip[i] = (UChar )(len + 1); + n = 0; + for (i = 0; i < len; i += clen) { + p = s + i; + if (ignore_case) + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, + p, end, items); + clen = enclen(enc, p, end); + if (p + clen > end) + clen = (int )(end - p); + + for (j = 0; j < clen; j++) { + skip[s[i + j]] = (UChar )(len - i - j); + for (k = 0; k < n; k++) { + ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf); + skip[buf[j]] = (UChar )(len - i - j); + } + } } + return (int)len; } diff --git a/regint.h b/regint.h index 9d69e2d25e51a8..0593fa2cc1f057 100644 --- a/regint.h +++ b/regint.h @@ -91,7 +91,7 @@ #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ #define DEFAULT_PARSE_DEPTH_LIMIT 4096 -#define OPT_EXACT_MAXLEN 24 +#define OPT_EXACT_MAXLEN 24 /* This must be smaller than ONIG_CHAR_TABLE_SIZE. */ /* check config */ #if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP) From 85a7171b413305c4bfaca72ad958b8404353f723 Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Wed, 30 Jan 2019 18:51:56 +0900 Subject: [PATCH 07/18] [k-takata/Onigmo] Add USE_CASE_MAP_API config The case_map API is mainly (only?) used in Ruby. Make it possible to disable the API. https://github.com/k-takata/Onigmo/commit/80e289d6bb --- enc/ascii.c | 4 ++++ enc/big5.c | 12 ++++++++++++ enc/cp949.c | 4 ++++ enc/emacs_mule.c | 4 ++++ enc/euc_jp.c | 4 ++++ enc/euc_kr.c | 8 ++++++++ enc/euc_tw.c | 4 ++++ enc/gb18030.c | 4 ++++ enc/gbk.c | 4 ++++ enc/iso_8859_1.c | 6 ++++++ enc/iso_8859_10.c | 6 ++++++ enc/iso_8859_11.c | 4 ++++ enc/iso_8859_13.c | 6 ++++++ enc/iso_8859_14.c | 6 ++++++ enc/iso_8859_15.c | 6 ++++++ enc/iso_8859_16.c | 6 ++++++ enc/iso_8859_2.c | 6 ++++++ enc/iso_8859_3.c | 6 ++++++ enc/iso_8859_4.c | 6 ++++++ enc/iso_8859_5.c | 6 ++++++ enc/iso_8859_6.c | 4 ++++ enc/iso_8859_7.c | 6 ++++++ enc/iso_8859_8.c | 4 ++++ enc/iso_8859_9.c | 6 ++++++ enc/koi8_r.c | 4 ++++ enc/koi8_u.c | 4 ++++ enc/shift_jis.c | 4 ++++ enc/unicode.c | 2 ++ enc/us_ascii.c | 4 ++++ enc/utf_16be.c | 4 ++++ enc/utf_16le.c | 4 ++++ enc/utf_32be.c | 4 ++++ enc/utf_32le.c | 4 ++++ enc/utf_8.c | 4 ++++ enc/windows_1250.c | 6 ++++++ enc/windows_1251.c | 6 ++++++ enc/windows_1252.c | 6 ++++++ enc/windows_1253.c | 6 ++++++ enc/windows_1254.c | 6 ++++++ enc/windows_1257.c | 6 ++++++ enc/windows_31j.c | 4 ++++ regenc.c | 2 ++ regenc.h | 2 ++ 43 files changed, 214 insertions(+) diff --git a/enc/ascii.c b/enc/ascii.c index ae7db97f25ed79..4ba93f4febdb21 100644 --- a/enc/ascii.c +++ b/enc/ascii.c @@ -54,7 +54,11 @@ OnigEncodingDefine(ascii, ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif ENCINDEX_ASCII_8BIT, ONIGENC_FLAG_NONE, }; diff --git a/enc/big5.c b/enc/big5.c index ab4fb69819b60e..e141ebdbe36988 100644 --- a/enc/big5.c +++ b/enc/big5.c @@ -300,7 +300,11 @@ OnigEncodingDefine(big5, BIG5) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; @@ -335,7 +339,11 @@ OnigEncodingDefine(big5_hkscs, BIG5_HKSCS) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; @@ -370,7 +378,11 @@ OnigEncodingDefine(big5_uao, BIG5_UAO) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/cp949.c b/enc/cp949.c index 1600d0cd5bee29..77e961a7cdf3d0 100644 --- a/enc/cp949.c +++ b/enc/cp949.c @@ -211,7 +211,11 @@ OnigEncodingDefine(cp949, CP949) = { onigenc_not_support_get_ctype_code_range, cp949_left_adjust_char_head, cp949_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/emacs_mule.c b/enc/emacs_mule.c index f92eb183cf788d..abd986a1878e0b 100644 --- a/enc/emacs_mule.c +++ b/enc/emacs_mule.c @@ -334,7 +334,11 @@ OnigEncodingDefine(emacs_mule, Emacs_Mule) = { onigenc_not_support_get_ctype_code_range, left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/euc_jp.c b/enc/euc_jp.c index d283bf4ebb1208..678d0116682bee 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -576,7 +576,11 @@ OnigEncodingDefine(euc_jp, EUC_JP) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/euc_kr.c b/enc/euc_kr.c index 21d6ab4e1c10b9..4079a0ece05b20 100644 --- a/enc/euc_kr.c +++ b/enc/euc_kr.c @@ -188,7 +188,11 @@ OnigEncodingDefine(euc_kr, EUC_KR) = { onigenc_not_support_get_ctype_code_range, euckr_left_adjust_char_head, euckr_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; @@ -213,7 +217,11 @@ OnigEncodingDefine(euc_cn, EUC_CN) = { onigenc_not_support_get_ctype_code_range, euckr_left_adjust_char_head, euckr_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/euc_tw.c b/enc/euc_tw.c index 1c5659cb1d0895..722e29a9dac70e 100644 --- a/enc/euc_tw.c +++ b/enc/euc_tw.c @@ -221,7 +221,11 @@ OnigEncodingDefine(euc_tw, EUC_TW) = { onigenc_not_support_get_ctype_code_range, euctw_left_adjust_char_head, euctw_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/gb18030.c b/enc/gb18030.c index 63d2e633ecb16e..316737db11463d 100644 --- a/enc/gb18030.c +++ b/enc/gb18030.c @@ -597,7 +597,11 @@ OnigEncodingDefine(gb18030, GB18030) = { onigenc_not_support_get_ctype_code_range, gb18030_left_adjust_char_head, gb18030_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/gbk.c b/enc/gbk.c index 31032553bf58ed..3df4e4b6d6a6a9 100644 --- a/enc/gbk.c +++ b/enc/gbk.c @@ -211,7 +211,11 @@ OnigEncodingDefine(gbk, GBK) = { onigenc_not_support_get_ctype_code_range, gbk_left_adjust_char_head, gbk_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c index 7af0888c3edced..78ea1fba600582 100644 --- a/enc/iso_8859_1.c +++ b/enc/iso_8859_1.c @@ -255,6 +255,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE return FALSE; } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -297,6 +298,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { onigenc_single_byte_mbc_enc_len, @@ -315,7 +317,11 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c index cae4be2db0367b..bf1c884cb23511 100644 --- a/enc/iso_8859_10.c +++ b/enc/iso_8859_10.c @@ -224,6 +224,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -269,6 +270,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { onigenc_single_byte_mbc_enc_len, @@ -287,7 +289,11 @@ OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c index b9c6119fd9a02b..403ae6499e0cf8 100644 --- a/enc/iso_8859_11.c +++ b/enc/iso_8859_11.c @@ -93,7 +93,11 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c index fe1ddd7065e6ae..8c6e758b8066f3 100644 --- a/enc/iso_8859_13.c +++ b/enc/iso_8859_13.c @@ -217,6 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -264,6 +265,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { onigenc_single_byte_mbc_enc_len, @@ -282,7 +284,11 @@ OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c index 647514a01626c0..21dffea76f087b 100644 --- a/enc/iso_8859_14.c +++ b/enc/iso_8859_14.c @@ -226,6 +226,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -280,6 +281,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { onigenc_single_byte_mbc_enc_len, @@ -298,7 +300,11 @@ OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c index 377a3afc7b15c5..dd6c29a6432455 100644 --- a/enc/iso_8859_15.c +++ b/enc/iso_8859_15.c @@ -220,6 +220,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -271,6 +272,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { onigenc_single_byte_mbc_enc_len, @@ -289,7 +291,11 @@ OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c index 135630eb73df46..aa7ce99fbac467 100644 --- a/enc/iso_8859_16.c +++ b/enc/iso_8859_16.c @@ -222,6 +222,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -275,6 +276,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { onigenc_single_byte_mbc_enc_len, @@ -293,7 +295,11 @@ OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c index 3a05c6320dbbeb..859073fd149cb4 100644 --- a/enc/iso_8859_2.c +++ b/enc/iso_8859_2.c @@ -220,6 +220,7 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE return FALSE; } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -266,6 +267,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { onigenc_single_byte_mbc_enc_len, @@ -284,7 +286,11 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c index 2a343eac638482..d8199d5125b19c 100644 --- a/enc/iso_8859_3.c +++ b/enc/iso_8859_3.c @@ -220,6 +220,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xB9) #define I_WITH_DOT_ABOVE (0xA9) static int @@ -276,6 +277,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { onigenc_single_byte_mbc_enc_len, @@ -294,7 +296,11 @@ OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c index e2134e8c0b27a5..5f01f0157556dd 100644 --- a/enc/iso_8859_4.c +++ b/enc/iso_8859_4.c @@ -223,6 +223,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -272,6 +273,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { onigenc_single_byte_mbc_enc_len, @@ -290,7 +292,11 @@ OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c index 6fafc358233eb9..8223fc0ec706c8 100644 --- a/enc/iso_8859_5.c +++ b/enc/iso_8859_5.c @@ -209,6 +209,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -240,6 +241,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { onigenc_single_byte_mbc_enc_len, @@ -258,7 +260,11 @@ OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_6.c b/enc/iso_8859_6.c index cdb74054d1e312..78543ea307d221 100644 --- a/enc/iso_8859_6.c +++ b/enc/iso_8859_6.c @@ -93,7 +93,11 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c index ac973f74ba51ef..e84f5c3460ad4e 100644 --- a/enc/iso_8859_7.c +++ b/enc/iso_8859_7.c @@ -205,6 +205,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -259,6 +260,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { onigenc_single_byte_mbc_enc_len, @@ -277,7 +279,11 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c index e256855f2130a7..b757a283de15a0 100644 --- a/enc/iso_8859_8.c +++ b/enc/iso_8859_8.c @@ -93,7 +93,11 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index 004eec310fcd52..f15953963bf4de 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -213,6 +213,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xFD) #define I_WITH_DOT_ABOVE (0xDD) static int @@ -265,6 +266,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { onigenc_single_byte_mbc_enc_len, @@ -283,7 +285,11 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/koi8_r.c b/enc/koi8_r.c index a52097577416a1..39f24824651275 100644 --- a/enc/koi8_r.c +++ b/enc/koi8_r.c @@ -214,7 +214,11 @@ OnigEncodingDefine(koi8_r, KOI8_R) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/koi8_u.c b/enc/koi8_u.c index 50bb78bd04841d..8cd890dd16a041 100644 --- a/enc/koi8_u.c +++ b/enc/koi8_u.c @@ -218,7 +218,11 @@ OnigEncodingDefine(koi8_u, KOI8_U) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/shift_jis.c b/enc/shift_jis.c index f1355d2d95fcb4..48f648868af13d 100644 --- a/enc/shift_jis.c +++ b/enc/shift_jis.c @@ -47,7 +47,11 @@ OnigEncodingDefine(shift_jis, Shift_JIS) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/unicode.c b/enc/unicode.c index 07497cdbe46731..5bc806863e8f55 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -655,6 +655,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, return n; } +#ifdef USE_CASE_MAP_API /* length in bytes for three characters in UTF-32; e.g. needed for ffi (U+FB03) */ #define CASE_MAPPING_SLACK 12 #define MODIFIED (flags |= ONIGENC_CASE_MODIFIED) @@ -798,6 +799,7 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, *flagP = flags; return (int )(to - to_start); } +#endif const char onigenc_unicode_version_string[] = #ifdef ONIG_UNICODE_VERSION_STRING diff --git a/enc/us_ascii.c b/enc/us_ascii.c index 08f9072c435591..253ee695724159 100644 --- a/enc/us_ascii.c +++ b/enc/us_ascii.c @@ -32,7 +32,11 @@ OnigEncodingDefine(us_ascii, US_ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_single_byte_ascii_only_case_map, +#else + NULL, +#endif ENCINDEX_US_ASCII, ONIGENC_FLAG_NONE, }; diff --git a/enc/utf_16be.c b/enc/utf_16be.c index f9dd7119d65a0e..0086040b5d5bd9 100644 --- a/enc/utf_16be.c +++ b/enc/utf_16be.c @@ -249,7 +249,11 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = { onigenc_utf16_32_get_ctype_code_range, utf16be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_16le.c b/enc/utf_16le.c index 2c8438d0be2554..ca0fce53872045 100644 --- a/enc/utf_16le.c +++ b/enc/utf_16le.c @@ -242,7 +242,11 @@ OnigEncodingDefine(utf_16le, UTF_16LE) = { onigenc_utf16_32_get_ctype_code_range, utf16le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_32be.c b/enc/utf_32be.c index 17841e52a4e82d..e05cfaf1b2fdf8 100644 --- a/enc/utf_32be.c +++ b/enc/utf_32be.c @@ -199,7 +199,11 @@ OnigEncodingDefine(utf_32be, UTF_32BE) = { onigenc_utf16_32_get_ctype_code_range, utf32be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_32le.c b/enc/utf_32le.c index 18b798f102c5d1..651efdcec57790 100644 --- a/enc/utf_32le.c +++ b/enc/utf_32le.c @@ -199,7 +199,11 @@ OnigEncodingDefine(utf_32le, UTF_32LE) = { onigenc_utf16_32_get_ctype_code_range, utf32le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/utf_8.c b/enc/utf_8.c index cdf2510d84c829..ae7c98469d50bd 100644 --- a/enc/utf_8.c +++ b/enc/utf_8.c @@ -431,7 +431,11 @@ OnigEncodingDefine(utf_8, UTF_8) = { get_ctype_code_range, left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_unicode_case_map, +#else + NULL, +#endif ENCINDEX_UTF_8, ONIGENC_FLAG_UNICODE, }; diff --git a/enc/windows_1250.c b/enc/windows_1250.c index daf23e9d1e6e6a..d38d50a01d370d 100644 --- a/enc/windows_1250.c +++ b/enc/windows_1250.c @@ -190,6 +190,7 @@ cp1250_get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -239,6 +240,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1250, Windows_1250) = { onigenc_single_byte_mbc_enc_len, @@ -257,7 +259,11 @@ OnigEncodingDefine(windows_1250, Windows_1250) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1251.c b/enc/windows_1251.c index 6c892c1b8ce39a..81641d0337f2b2 100644 --- a/enc/windows_1251.c +++ b/enc/windows_1251.c @@ -180,6 +180,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -221,6 +222,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1251, Windows_1251) = { onigenc_single_byte_mbc_enc_len, @@ -239,7 +241,11 @@ OnigEncodingDefine(windows_1251, Windows_1251) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1252.c b/enc/windows_1252.c index b685878d3fc5d7..6aece95c0ab2ae 100644 --- a/enc/windows_1252.c +++ b/enc/windows_1252.c @@ -181,6 +181,7 @@ cp1252_get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -228,6 +229,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1252, Windows_1252) = { onigenc_single_byte_mbc_enc_len, @@ -246,7 +248,11 @@ OnigEncodingDefine(windows_1252, Windows_1252) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1253.c b/enc/windows_1253.c index b2a43581c39240..c95ea3f41ccd9f 100644 --- a/enc/windows_1253.c +++ b/enc/windows_1253.c @@ -213,6 +213,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API static int case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, @@ -272,6 +273,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1253, Windows_1253) = { onigenc_single_byte_mbc_enc_len, @@ -290,7 +292,11 @@ OnigEncodingDefine(windows_1253, Windows_1253) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1254.c b/enc/windows_1254.c index 5e6d92d3d2680e..c8d5991686a0b3 100644 --- a/enc/windows_1254.c +++ b/enc/windows_1254.c @@ -221,6 +221,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xFD) #define I_WITH_DOT_ABOVE (0xDD) static int @@ -277,6 +278,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1254, Windows_1254) = { onigenc_single_byte_mbc_enc_len, @@ -295,7 +297,11 @@ OnigEncodingDefine(windows_1254, Windows_1254) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_1257.c b/enc/windows_1257.c index ada03b72bf01cf..def13c8c49fed0 100644 --- a/enc/windows_1257.c +++ b/enc/windows_1257.c @@ -225,6 +225,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, flag, p, end, items); } +#ifdef USE_CASE_MAP_API #define DOTLESS_i (0xB9) #define I_WITH_DOT_ABOVE (0xA9) static int @@ -279,6 +280,7 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, *flagP = flags; return (int )(to - to_start); } +#endif OnigEncodingDefine(windows_1257, Windows_1257) = { onigenc_single_byte_mbc_enc_len, @@ -297,7 +299,11 @@ OnigEncodingDefine(windows_1257, Windows_1257) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/enc/windows_31j.c b/enc/windows_31j.c index 1eb859596a3bc3..cd8bd83fddf693 100644 --- a/enc/windows_31j.c +++ b/enc/windows_31j.c @@ -48,7 +48,11 @@ OnigEncodingDefine(windows_31j, Windows_31J) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, +#ifdef USE_CASE_MAP_API onigenc_ascii_only_case_map, +#else + NULL, +#endif 0, ONIGENC_FLAG_NONE, }; diff --git a/regenc.c b/regenc.c index 823aacc28e615b..978d0cad1bce2d 100644 --- a/regenc.c +++ b/regenc.c @@ -966,6 +966,7 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, } #endif +#ifdef USE_CASE_MAP_API extern int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) @@ -1027,3 +1028,4 @@ onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar *flagP = flags; return (int )(to - to_start); } +#endif diff --git a/regenc.h b/regenc.h index 4fbe403b6301d8..2d96768763901b 100644 --- a/regenc.h +++ b/regenc.h @@ -134,11 +134,13 @@ typedef struct { #define roomof(x, y) (((x) + (y) - 1) / (y)) #define type_roomof(x, y) roomof(sizeof(x), sizeof(y)) +/* config */ #define USE_CRNL_AS_LINE_TERMINATOR #define USE_UNICODE_PROPERTIES #define USE_UNICODE_AGE_PROPERTIES /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */ +#define USE_CASE_MAP_API #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII From 916fbf1063268e27e241363d81e68bf132e7602b Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Wed, 30 Jan 2019 10:31:10 +0900 Subject: [PATCH 08/18] [k-takata/Onigmo] Update copyright information * Update our copyright information. * Import the latest information from oniguruma. Related: #95 https://github.com/k-takata/Onigmo/commit/0d8662b500 --- include/ruby/onigmo.h | 4 ++-- regcomp.c | 4 ++-- regenc.c | 2 +- regenc.h | 2 +- regerror.c | 2 +- regexec.c | 4 ++-- regint.h | 2 +- regparse.c | 2 +- regparse.h | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index b9a21206dea0bb..74d012d79f3915 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -4,8 +4,8 @@ onigmo.h - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2009 K.Kosako - * Copyright (c) 2011-2017 K.Takata + * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regcomp.c b/regcomp.c index 41154b54855dd9..71e57c8e3923bc 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2,8 +2,8 @@ regcomp.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2013 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regenc.c b/regenc.c index 978d0cad1bce2d..ff2155aabc1e80 100644 --- a/regenc.c +++ b/regenc.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regenc.h b/regenc.h index 2d96768763901b..fe0440dd740e9b 100644 --- a/regenc.h +++ b/regenc.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regerror.c b/regerror.c index 8667084d41c931..0134fc29f5dffa 100644 --- a/regerror.c +++ b/regerror.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regexec.c b/regexec.c index 2e79623c719171..3210c7cc1b5603 100644 --- a/regexec.c +++ b/regexec.c @@ -2,8 +2,8 @@ regexec.c - Onigmo (Oniguruma-mod) (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regint.h b/regint.h index 0593fa2cc1f057..3f4aa919e5046f 100644 --- a/regint.h +++ b/regint.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2013 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regparse.c b/regparse.c index 418bd3814076d9..c61a8b28a2c118 100644 --- a/regparse.c +++ b/regparse.c @@ -3,7 +3,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2008 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/regparse.h b/regparse.h index dd35d485255bad..65da835a55aca1 100644 --- a/regparse.h +++ b/regparse.h @@ -5,7 +5,7 @@ **********************************************************************/ /*- * Copyright (c) 2002-2007 K.Kosako - * Copyright (c) 2011-2016 K.Takata + * Copyright (c) 2011-2019 K.Takata * All rights reserved. * * Redistribution and use in source and binary forms, with or without From a097878ed4fdecc347909f1cd62089bf6ab554c0 Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Wed, 30 Jan 2019 19:02:41 +0900 Subject: [PATCH 09/18] [k-takata/Onigmo] Comment out unused errors https://github.com/k-takata/Onigmo/commit/5555ee4c81 --- regerror.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/regerror.c b/regerror.c index 0134fc29f5dffa..703d747de9f727 100644 --- a/regerror.c +++ b/regerror.c @@ -63,14 +63,18 @@ onig_error_code_to_format(OnigPosition code) p = "parse depth limit over"; break; case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET: p = "default multibyte-encoding is not set"; break; +#if 0 case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: p = "can't convert to wide-char on specified multibyte-encoding"; break; +#endif case ONIGERR_INVALID_ARGUMENT: p = "invalid argument"; break; case ONIGERR_END_PATTERN_AT_LEFT_BRACE: p = "end pattern at left brace"; break; +#if 0 case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: p = "end pattern at left bracket"; break; +#endif case ONIGERR_EMPTY_CHAR_CLASS: p = "empty char-class"; break; case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: @@ -87,16 +91,20 @@ onig_error_code_to_format(OnigPosition code) p = "invalid control-code syntax"; break; case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: p = "char-class value at end of range"; break; +#if 0 case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: p = "char-class value at start of range"; break; +#endif case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: p = "unmatched range specifier in char-class"; break; case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: p = "target of repeat operator is not specified"; break; case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: p = "target of repeat operator is invalid"; break; +#if 0 case ONIGERR_NESTED_REPEAT_OPERATOR: p = "nested repeat operator"; break; +#endif case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: p = "unmatched close parenthesis"; break; case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: @@ -121,14 +129,18 @@ onig_error_code_to_format(OnigPosition code) p = "upper is smaller than lower in repeat range"; break; case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: p = "empty range in char class"; break; +#if 0 case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: p = "mismatch multibyte code length in char-class range"; break; +#endif case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: p = "too many multibyte code ranges are specified"; break; case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: p = "too short multibyte code string"; break; +#if 0 case ONIGERR_TOO_BIG_BACKREF_NUMBER: p = "too big backref number"; break; +#endif case ONIGERR_INVALID_BACKREF: #ifdef USE_NAMED_GROUP p = "invalid backref number/name"; break; From ad150e90397ef85c1dbcf3ef54c0b1338fde4204 Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Wed, 30 Jan 2019 19:47:54 +0900 Subject: [PATCH 10/18] [k-takata/Onigmo] Update version number (6.2.0) * Update the version number to 6.2.0 * Update LTVERSION to 6:5:0. https://github.com/k-takata/Onigmo/commit/9e0f7ceee0 --- include/ruby/onigmo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h index 74d012d79f3915..9dcddee829a86f 100644 --- a/include/ruby/onigmo.h +++ b/include/ruby/onigmo.h @@ -38,8 +38,8 @@ extern "C" { #endif #define ONIGMO_VERSION_MAJOR 6 -#define ONIGMO_VERSION_MINOR 1 -#define ONIGMO_VERSION_TEENY 3 +#define ONIGMO_VERSION_MINOR 2 +#define ONIGMO_VERSION_TEENY 0 #ifndef ONIG_EXTERN # ifdef RUBY_EXTERN From 496e74d0ccedd513eca9a156b207a36ed88e484f Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Thu, 31 Jan 2019 19:04:02 +0900 Subject: [PATCH 11/18] [k-takata/Onigmo] Fix that onig_new() may crash When onig_reg_init() returns an error, onig_free_body() which is called via onig_new() may crash because some members are not properly initialized. Fix it. https://github.com/k-takata/Onigmo/commit/d2a090a57e --- regcomp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/regcomp.c b/regcomp.c index 71e57c8e3923bc..9dfdb21e851e4d 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5919,6 +5919,12 @@ onig_reg_init(regex_t* reg, OnigOptionType option, if (IS_NULL(reg)) return ONIGERR_INVALID_ARGUMENT; + (reg)->exact = (UChar* )NULL; + (reg)->chain = (regex_t* )NULL; + (reg)->p = (UChar* )NULL; + (reg)->name_table = (void* )NULL; + (reg)->repeat_range = (OnigRepeatRange* )NULL; + if (ONIGENC_IS_UNDEF(enc)) return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET; @@ -5938,13 +5944,9 @@ onig_reg_init(regex_t* reg, OnigOptionType option, (reg)->options = option; (reg)->syntax = syntax; (reg)->optimize = 0; - (reg)->exact = (UChar* )NULL; - (reg)->chain = (regex_t* )NULL; - (reg)->p = (UChar* )NULL; (reg)->alloc = 0; (reg)->used = 0; - (reg)->name_table = (void* )NULL; (reg)->case_fold_flag = case_fold_flag; From f0b31a5898ae8101286dc47085139c56ba0bda54 Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Mon, 29 Jul 2019 20:15:26 +0900 Subject: [PATCH 12/18] [k-takata/Onigmo] Fix SEGV in onig_error_code_to_str() (Fix https://github.com/k-takata/Onigmo/pull/132) When onig_new(ONIG_SYNTAX_PERL) fails with ONIGERR_INVALID_GROUP_NAME, onig_error_code_to_str() crashes. onig_scan_env_set_error_string() should have been used when returning ONIGERR_INVALID_GROUP_NAME. https://github.com/k-takata/Onigmo/commit/00cc7e28a3 --- regparse.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/regparse.c b/regparse.c index c61a8b28a2c118..1772196bcda027 100644 --- a/regparse.c +++ b/regparse.c @@ -4043,7 +4043,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (c == 'R' || c == '0') { PINC; /* skip 'R' / '0' */ - if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME; + if (!PPEEK_IS(')')) { + r = ONIGERR_INVALID_GROUP_NAME; + onig_scan_env_set_error_string(env, r, p - 1, p + 1); + return r; + } PINC; /* skip ')' */ name_end = name = p; gnum = 0; From ac379278e818eb92e87a8f82e6841d7ab59baeb2 Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Mon, 29 Jul 2019 20:16:46 +0900 Subject: [PATCH 13/18] =?UTF-8?q?[k-takata/Onigmo]=20Fix=20stack=20overflo?= =?UTF-8?q?w=20with=20X+++++++++++++++++++=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imported the fix from: https://github.com/kkos/oniguruma/commit/4097828d7cc87589864fecf452f2cd46c5f37180 https://github.com/k-takata/Onigmo/commit/786b4849c1 --- regparse.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/regparse.c b/regparse.c index 1772196bcda027..123b3015a5a936 100644 --- a/regparse.c +++ b/regparse.c @@ -6313,11 +6313,14 @@ parse_exp(Node** np, OnigToken* tok, int term, int r, len, group = 0; Node* qn; Node** targetp; + unsigned int parse_depth; *np = NULL; if (tok->type == (enum TokenSyms )term) goto end_of_token; + parse_depth = env->parse_depth; + switch (tok->type) { case TK_ALT: case TK_EOT: @@ -6628,6 +6631,10 @@ parse_exp(Node** np, OnigToken* tok, int term, if (is_invalid_quantifier_target(*targetp)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + parse_depth++; + if (parse_depth > ParseDepthLimit) + return ONIGERR_PARSE_DEPTH_LIMIT_OVER; + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, (r == TK_INTERVAL ? 1 : 0)); CHECK_NULL_RETURN_MEMERR(qn); From 16086128ccb5fa9133ef57c0e16bd9eaa82d818c Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Tue, 30 Jul 2019 23:15:05 +0900 Subject: [PATCH 14/18] [k-takata/Onigmo] Suppress warning on 64-bit builds https://github.com/k-takata/Onigmo/commit/ced209d5e9 --- regcomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regcomp.c b/regcomp.c index 9dfdb21e851e4d..18b2c97eb6381c 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4278,7 +4278,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg, } } - return (int)len; + return (int )len; } typedef struct { From 81c13349049a3674819842e87b14cf35b8755392 Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Thu, 1 Aug 2019 21:27:51 +0900 Subject: [PATCH 15/18] [k-takata/Onigmo] Fix out-of-bounds read in parse_char_class() (Close https://github.com/k-takata/Onigmo/pull/139) /[\x{111111}]/ causes out-of-bounds read when encoding is a single byte encoding. \x{111111} is an invalid codepoint for a single byte encoding. Check if it is a valid codepoint. https://github.com/k-takata/Onigmo/commit/d4cf99d30b --- regenc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/regenc.c b/regenc.c index ff2155aabc1e80..c561cee29d210d 100644 --- a/regenc.c +++ b/regenc.c @@ -640,18 +640,23 @@ onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, } extern int -onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) { + if (code > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; return 1; } extern int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) { + if (code > 0xff) { #ifdef RUBY - if (code > 0xff) rb_raise(rb_eRangeError, "%u out of char range", code); +#else + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif + } *buf = (UChar )(code & 0xff); return 1; } From 76b1d4a4814eefa9bf50a1d3d0bf4e7094d5936b Mon Sep 17 00:00:00 2001 From: "K.Takata" Date: Fri, 2 Aug 2019 01:03:11 +0900 Subject: [PATCH 16/18] [k-takata/Onigmo] Disable error message for capture history when not needed Add `#ifdef USE_CAPTURE_HISTORY`. https://github.com/k-takata/Onigmo/commit/8217be2c3a --- regerror.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/regerror.c b/regerror.c index 703d747de9f727..e772feee81914a 100644 --- a/regerror.c +++ b/regerror.c @@ -173,8 +173,10 @@ onig_error_code_to_format(OnigPosition code) p = "multiplex definition name <%n> call"; break; case ONIGERR_NEVER_ENDING_RECURSION: p = "never ending recursion"; break; +#ifdef USE_CAPTURE_HISTORY case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: p = "group number is too big for capture history"; break; +#endif case ONIGERR_INVALID_CHAR_PROPERTY_NAME: p = "invalid character property name {%n}"; break; case ONIGERR_TOO_MANY_CAPTURE_GROUPS: From aaf47cca03c4c7561fd931e0aa1a76adfdb23eba Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 12 Jan 2026 18:39:15 +0900 Subject: [PATCH 17/18] Now onigenc_single_byte_code_to_mbclen checks out-of-bound --- sprintf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sprintf.c b/sprintf.c index cb266a98416e33..de88a9f4b35a20 100644 --- a/sprintf.c +++ b/sprintf.c @@ -441,7 +441,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) VALUE val = GETARG(); VALUE tmp; unsigned int c; - int n; + int n, encidx; tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { @@ -451,11 +451,13 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) goto format_s1; } n = NUM2INT(val); - if (n >= 0) n = rb_enc_codelen((c = n), enc); + if (n >= 0) { + n = rb_enc_codelen((c = n), enc); + encidx = rb_ascii8bit_appendable_encoding_index(enc, c); + } if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } - int encidx = rb_ascii8bit_appendable_encoding_index(enc, c); if (encidx >= 0 && encidx != rb_enc_to_index(enc)) { /* special case */ rb_enc_associate_index(result, encidx); From f34297604f2b43bcec7f57b1f0ac1e2813ce58e3 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 12 Jan 2026 18:57:20 +0900 Subject: [PATCH 18/18] Remove a direct call of `rb_raise` in Onigmo --- regenc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/regenc.c b/regenc.c index c561cee29d210d..c595f44b29e36d 100644 --- a/regenc.c +++ b/regenc.c @@ -651,11 +651,7 @@ extern int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) { if (code > 0xff) { -#ifdef RUBY - rb_raise(rb_eRangeError, "%u out of char range", code); -#else return ONIGERR_INVALID_CODE_POINT_VALUE; -#endif } *buf = (UChar )(code & 0xff); return 1;