From 8c1900528b527e5948fb167182937196ff611600 Mon Sep 17 00:00:00 2001 From: git Date: Sat, 1 Nov 2025 06:50:27 +0000 Subject: [PATCH 01/13] Update bundled gems list as of 2025-10-31 --- NEWS.md | 2 +- gems/bundled_gems | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index abaf585eabf609..fb700d88e17964 100644 --- a/NEWS.md +++ b/NEWS.md @@ -161,7 +161,7 @@ The following bundled gems are promoted from default gems. * pstore 0.2.0 * benchmark 0.5.0 * logger 1.7.0 -* rdoc 6.15.0 +* rdoc 6.15.1 * win32ole 1.9.2 * irb 1.15.2 * reline 0.6.2 diff --git a/gems/bundled_gems b/gems/bundled_gems index 19ed86f4534994..563f5e762750be 100644 --- a/gems/bundled_gems +++ b/gems/bundled_gems @@ -39,7 +39,7 @@ ostruct 0.6.3 https://github.com/ruby/ostruct pstore 0.2.0 https://github.com/ruby/pstore benchmark 0.5.0 https://github.com/ruby/benchmark logger 1.7.0 https://github.com/ruby/logger -rdoc 6.15.0 https://github.com/ruby/rdoc ac2a6fbf62b584a8325a665a9e7b368388bc7df6 +rdoc 6.15.1 https://github.com/ruby/rdoc win32ole 1.9.2 https://github.com/ruby/win32ole irb 1.15.2 https://github.com/ruby/irb d43c3d764ae439706aa1b26a3ec299cc45eaed5b reline 0.6.2 https://github.com/ruby/reline From 6e2e7a335546275edf15a1617663d5a1c03c6188 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 1 Nov 2025 09:24:18 +0100 Subject: [PATCH 02/13] [ruby/json] parser.c: Extract `json_parse_number` https://github.com/ruby/json/commit/2681b23b87 --- ext/json/parser/parser.c | 184 ++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 89 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index ca8f501539c322..565f8e020386b1 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1022,6 +1022,95 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig return Qfalse; } +static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start) +{ + bool integer = true; + + // Variables for Ryu optimization - extract digits during parsing + uint64_t mantissa = 0; + int mantissa_digits = 0; + int32_t exponent = 0; + int decimal_point_pos = -1; + + const char first_digit = *state->cursor; + + // Parse integer part and extract mantissa digits + while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { + mantissa = mantissa * 10 + (*state->cursor - '0'); + mantissa_digits++; + state->cursor++; + } + + if (RB_UNLIKELY(first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0)) { + raise_parse_error_at("invalid number: %s", state, start); + } + + // Parse fractional part + if ((state->cursor < state->end) && (*state->cursor == '.')) { + integer = false; + decimal_point_pos = mantissa_digits; // Remember position of decimal point + state->cursor++; + + if (state->cursor == state->end || !rb_isdigit(*state->cursor)) { + raise_parse_error_at("invalid number: %s", state, start); + } + + while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { + mantissa = mantissa * 10 + (*state->cursor - '0'); + mantissa_digits++; + state->cursor++; + } + } + + // Parse exponent + if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) { + integer = false; + state->cursor++; + + bool negative_exponent = false; + if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) { + negative_exponent = (*state->cursor == '-'); + state->cursor++; + } + + if (state->cursor == state->end || !rb_isdigit(*state->cursor)) { + raise_parse_error_at("invalid number: %s", state, start); + } + + while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { + exponent = exponent * 10 + (*state->cursor - '0'); + state->cursor++; + } + + if (negative_exponent) { + exponent = -exponent; + } + } + + if (integer) { + return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor); + } + + // Adjust exponent based on decimal point position + if (decimal_point_pos >= 0) { + exponent -= (mantissa_digits - decimal_point_pos); + } + + return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor); +} + +static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + return json_parse_number(state, config, false, state->cursor); +} + +static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + const char *start = state->cursor; + state->cursor++; + return json_parse_number(state, config, true, start); +} + static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) { json_eat_whitespace(state); @@ -1072,7 +1161,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); break; - case '-': + case '-': { // Note: memcmp with a small power of two compile to an integer comparison if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { if (config->allow_nan) { @@ -1082,95 +1171,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); } } - // Fallthrough - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { - bool integer = true; - - // Variables for Ryu optimization - extract digits during parsing - uint64_t mantissa = 0; - int mantissa_digits = 0; - int32_t exponent = 0; - bool negative = false; - int decimal_point_pos = -1; - - // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/ - const char *start = state->cursor; - - // Handle optional negative sign - if (*state->cursor == '-') { - negative = true; - state->cursor++; - if (state->cursor >= state->end || !rb_isdigit(*state->cursor)) { - raise_parse_error_at("invalid number: %s", state, start); - } - } - - // Parse integer part and extract mantissa digits - while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { - mantissa = mantissa * 10 + (*state->cursor - '0'); - mantissa_digits++; - state->cursor++; - } - - if (RB_UNLIKELY(start[0] == '0' && mantissa_digits > 1)) { - raise_parse_error_at("invalid number: %s", state, start); - } else if (RB_UNLIKELY(mantissa_digits > 1 && negative && start[1] == '0')) { - raise_parse_error_at("invalid number: %s", state, start); - } - - // Parse fractional part - if ((state->cursor < state->end) && (*state->cursor == '.')) { - integer = false; - decimal_point_pos = mantissa_digits; // Remember position of decimal point - state->cursor++; - - if (state->cursor == state->end || !rb_isdigit(*state->cursor)) { - raise_parse_error_at("invalid number: %s", state, start); - } - - while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { - mantissa = mantissa * 10 + (*state->cursor - '0'); - mantissa_digits++; - state->cursor++; - } - } - - // Parse exponent - if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) { - integer = false; - state->cursor++; - - bool negative_exponent = false; - if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) { - negative_exponent = (*state->cursor == '-'); - state->cursor++; - } - - if (state->cursor == state->end || !rb_isdigit(*state->cursor)) { - raise_parse_error_at("invalid number: %s", state, start); - } - - while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { - exponent = exponent * 10 + (*state->cursor - '0'); - state->cursor++; - } - - if (negative_exponent) { - exponent = -exponent; - } - } - - if (integer) { - return json_push_value(state, config, json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor)); - } - - // Adjust exponent based on decimal point position - if (decimal_point_pos >= 0) { - exponent -= (mantissa_digits - decimal_point_pos); - } - - return json_push_value(state, config, json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor)); + return json_push_value(state, config, json_parse_negative_number(state, config)); + break; } + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + return json_push_value(state, config, json_parse_positive_number(state, config)); + break; case '"': { // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} return json_parse_string(state, config, false); From a6bdf52bc9005f4c8709736588fca53787ad3d78 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 1 Nov 2025 09:42:24 +0100 Subject: [PATCH 03/13] [ruby/json] parser.c: Extract json_parse_digits https://github.com/ruby/json/commit/1bf405ecc6 --- ext/json/parser/parser.c | 49 +++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 565f8e020386b1..d69cc28a92ff46 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1022,24 +1022,28 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig return Qfalse; } +static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) +{ + const char *start = state->cursor; + while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { + *accumulator = *accumulator * 10 + (*state->cursor - '0'); + state->cursor++; + } + return (int)(state->cursor - start); +} + static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start) { bool integer = true; + const char first_digit = *state->cursor; // Variables for Ryu optimization - extract digits during parsing - uint64_t mantissa = 0; - int mantissa_digits = 0; int32_t exponent = 0; int decimal_point_pos = -1; - - const char first_digit = *state->cursor; + uint64_t mantissa = 0; // Parse integer part and extract mantissa digits - while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { - mantissa = mantissa * 10 + (*state->cursor - '0'); - mantissa_digits++; - state->cursor++; - } + int mantissa_digits = json_parse_digits(state, &mantissa); if (RB_UNLIKELY(first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0)) { raise_parse_error_at("invalid number: %s", state, start); @@ -1051,19 +1055,16 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig decimal_point_pos = mantissa_digits; // Remember position of decimal point state->cursor++; - if (state->cursor == state->end || !rb_isdigit(*state->cursor)) { - raise_parse_error_at("invalid number: %s", state, start); - } + int fractional_digits = json_parse_digits(state, &mantissa); + mantissa_digits += fractional_digits; - while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { - mantissa = mantissa * 10 + (*state->cursor - '0'); - mantissa_digits++; - state->cursor++; + if (RB_UNLIKELY(!fractional_digits)) { + raise_parse_error_at("invalid number: %s", state, start); } } // Parse exponent - if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) { + if ((state->cursor < state->end) && ((rb_tolower(*state->cursor) == 'e'))) { integer = false; state->cursor++; @@ -1073,18 +1074,14 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig state->cursor++; } - if (state->cursor == state->end || !rb_isdigit(*state->cursor)) { - raise_parse_error_at("invalid number: %s", state, start); - } + uint64_t abs_exponent = 0; + int exponent_digits = json_parse_digits(state, &abs_exponent); - while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { - exponent = exponent * 10 + (*state->cursor - '0'); - state->cursor++; + if (RB_UNLIKELY(!exponent_digits)) { + raise_parse_error_at("invalid number: %s", state, start); } - if (negative_exponent) { - exponent = -exponent; - } + exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent); } if (integer) { From b3d5c96613f38f7741e829403872be11491e42b6 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 1 Nov 2025 10:27:15 +0100 Subject: [PATCH 04/13] [ruby/json] parser.c: Introduce `peek()` and `eos()` helpers Encapsulate pointer arithmetic to reduce possibility of mistakes. https://github.com/ruby/json/commit/8b39407225 --- ext/json/parser/parser.c | 154 +++++++++++++++++++++------------------ 1 file changed, 85 insertions(+), 69 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index d69cc28a92ff46..90460f3a2a008b 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -406,6 +406,18 @@ typedef struct JSON_ParserStateStruct { int current_nesting; } JSON_ParserState; +static inline bool eos(JSON_ParserState *state) { + return state->cursor >= state->end; +} + +static inline char peek(JSON_ParserState *state) +{ + if (RB_UNLIKELY(eos(state))) { + return 0; + } + return *state->cursor; +} + static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out) { const char *cursor = state->cursor; @@ -571,7 +583,7 @@ json_eat_comments(JSON_ParserState *state) raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end); } else { state->cursor++; - if (state->cursor < state->end && *state->cursor == '/') { + if (peek(state) == '/') { state->cursor++; break; } @@ -591,11 +603,12 @@ json_eat_comments(JSON_ParserState *state) static inline void json_eat_whitespace(JSON_ParserState *state) { - while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) { - if (RB_LIKELY(*state->cursor != '/')) { - state->cursor++; - } else { + unsigned char cursor; + while (RB_UNLIKELY(whitespace[cursor = (unsigned char)peek(state)])) { + if (RB_UNLIKELY(cursor == '/')) { json_eat_comments(state); + } else { + state->cursor++; } } } @@ -980,7 +993,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state) #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */ #endif /* HAVE_SIMD */ - while (state->cursor < state->end) { + while (!eos(state)) { if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) { return 1; } @@ -1025,8 +1038,10 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) { const char *start = state->cursor; - while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) { - *accumulator = *accumulator * 10 + (*state->cursor - '0'); + char next_char; + + while (rb_isdigit(next_char = peek(state))) { + *accumulator = *accumulator * 10 + (next_char - '0'); state->cursor++; } return (int)(state->cursor - start); @@ -1050,7 +1065,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig } // Parse fractional part - if ((state->cursor < state->end) && (*state->cursor == '.')) { + if (peek(state) == '.') { integer = false; decimal_point_pos = mantissa_digits; // Remember position of decimal point state->cursor++; @@ -1064,13 +1079,14 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig } // Parse exponent - if ((state->cursor < state->end) && ((rb_tolower(*state->cursor) == 'e'))) { + if (rb_tolower(peek(state)) == 'e') { integer = false; state->cursor++; bool negative_exponent = false; - if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) { - negative_exponent = (*state->cursor == '-'); + const char next_char = peek(state); + if (next_char == '-' || next_char == '+') { + negative_exponent = next_char == '-'; state->cursor++; } @@ -1111,11 +1127,8 @@ static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_Par static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) { json_eat_whitespace(state); - if (state->cursor >= state->end) { - raise_parse_error("unexpected end of input", state); - } - switch (*state->cursor) { + switch (peek(state)) { case 'n': if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) { state->cursor += 4; @@ -1184,7 +1197,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) json_eat_whitespace(state); long stack_head = state->stack->head; - if ((state->cursor < state->end) && (*state->cursor == ']')) { + if (peek(state) == ']') { state->cursor++; return json_push_value(state, config, json_decode_array(state, config, 0)); } else { @@ -1199,26 +1212,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) while (true) { json_eat_whitespace(state); - if (state->cursor < state->end) { - if (*state->cursor == ']') { - state->cursor++; - long count = state->stack->head - stack_head; - state->current_nesting--; - state->in_array--; - return json_push_value(state, config, json_decode_array(state, config, count)); - } + const char next_char = peek(state); - if (*state->cursor == ',') { - state->cursor++; - if (config->allow_trailing_comma) { - json_eat_whitespace(state); - if ((state->cursor < state->end) && (*state->cursor == ']')) { - continue; - } + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == ']') { + continue; } - json_parse_any(state, config); - continue; } + json_parse_any(state, config); + continue; + } + + if (next_char == ']') { + state->cursor++; + long count = state->stack->head - stack_head; + state->current_nesting--; + state->in_array--; + return json_push_value(state, config, json_decode_array(state, config, count)); } raise_parse_error("expected ',' or ']' after array value", state); @@ -1232,7 +1245,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) json_eat_whitespace(state); long stack_head = state->stack->head; - if ((state->cursor < state->end) && (*state->cursor == '}')) { + if (peek(state) == '}') { state->cursor++; return json_push_value(state, config, json_decode_object(state, config, 0)); } else { @@ -1241,13 +1254,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); } - if (*state->cursor != '"') { + if (peek(state) != '"') { raise_parse_error("expected object key, got %s", state); } json_parse_string(state, config, true); json_eat_whitespace(state); - if ((state->cursor >= state->end) || (*state->cursor != ':')) { + if (peek(state) != ':') { raise_parse_error("expected ':' after object key", state); } state->cursor++; @@ -1258,46 +1271,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) while (true) { json_eat_whitespace(state); - if (state->cursor < state->end) { - if (*state->cursor == '}') { - state->cursor++; - state->current_nesting--; - size_t count = state->stack->head - stack_head; + const char next_char = peek(state); + if (next_char == '}') { + state->cursor++; + state->current_nesting--; + size_t count = state->stack->head - stack_head; - // Temporary rewind cursor in case an error is raised - const char *final_cursor = state->cursor; - state->cursor = object_start_cursor; - VALUE object = json_decode_object(state, config, count); - state->cursor = final_cursor; + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = object_start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; - return json_push_value(state, config, object); - } + return json_push_value(state, config, object); + } - if (*state->cursor == ',') { - state->cursor++; - json_eat_whitespace(state); + if (next_char == ',') { + state->cursor++; + json_eat_whitespace(state); - if (config->allow_trailing_comma) { - if ((state->cursor < state->end) && (*state->cursor == '}')) { - continue; - } + if (config->allow_trailing_comma) { + if (peek(state) == '}') { + continue; } + } - if (*state->cursor != '"') { - raise_parse_error("expected object key, got: %s", state); - } - json_parse_string(state, config, true); + if (RB_UNLIKELY(peek(state) != '"')) { + raise_parse_error("expected object key, got: %s", state); + } + json_parse_string(state, config, true); - json_eat_whitespace(state); - if ((state->cursor >= state->end) || (*state->cursor != ':')) { - raise_parse_error("expected ':' after object key, got: %s", state); - } - state->cursor++; + json_eat_whitespace(state); + if (RB_UNLIKELY(peek(state) != ':')) { + raise_parse_error("expected ':' after object key, got: %s", state); + } + state->cursor++; - json_parse_any(state, config); + json_parse_any(state, config); - continue; - } + continue; } raise_parse_error("expected ',' or '}' after object value, got: %s", state); @@ -1305,6 +1317,10 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) break; } + case 0: + raise_parse_error("unexpected end of input", state); + break; + default: raise_parse_error("unexpected character: %s", state); break; @@ -1316,7 +1332,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) static void json_ensure_eof(JSON_ParserState *state) { json_eat_whitespace(state); - if (state->cursor != state->end) { + if (!eos(state)) { raise_parse_error("unexpected token at end of stream %s", state); } } From 1942cb219ab50c55b198e4a6173b4d09f3fced84 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 31 Oct 2025 08:31:11 +0100 Subject: [PATCH 05/13] [ruby/json] Add test coverage for T_BIGNUM parsing https://github.com/ruby/json/commit/f0150e2944 --- test/json/json_parser_test.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 9d387cb808925a..30188c4ebdea32 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -131,6 +131,12 @@ def test_parse_numbers capture_output { assert_equal(Float::INFINITY, parse("23456789012E666")) } end + def test_parse_bignum + bignum = Integer('1234567890' * 10) + assert_equal(bignum, JSON.parse(bignum.to_s)) + assert_equal(bignum.to_f, JSON.parse(bignum.to_s + ".0")) + end + def test_parse_bigdecimals assert_equal(BigDecimal, JSON.parse('{"foo": 9.01234567890123456789}', decimal_class: BigDecimal)["foo"].class) assert_equal(BigDecimal("0.901234567890123456789E1"),JSON.parse('{"foo": 9.01234567890123456789}', decimal_class: BigDecimal)["foo"] ) From bca8fce78f588c1119c56945b99dd29d95a67a0c Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 1 Nov 2025 10:54:03 +0100 Subject: [PATCH 06/13] [ruby/json] parser.c: Introduce `rest()` helper https://github.com/ruby/json/commit/11f4e7b7be --- ext/json/parser/parser.c | 74 +++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 90460f3a2a008b..3bd654dc3b8e71 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -406,6 +406,10 @@ typedef struct JSON_ParserStateStruct { int current_nesting; } JSON_ParserState; +static inline ssize_t rest(JSON_ParserState *state) { + return state->end - state->cursor; +} + static inline bool eos(JSON_ParserState *state) { return state->cursor >= state->end; } @@ -564,39 +568,39 @@ static const bool whitespace[256] = { static void json_eat_comments(JSON_ParserState *state) { - if (state->cursor + 1 < state->end) { - switch (state->cursor[1]) { - case '/': { - state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); - if (!state->cursor) { - state->cursor = state->end; - } else { - state->cursor++; - } - break; + const char *start = state->cursor; + state->cursor++; + + switch (peek(state)) { + case '/': { + state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); + if (!state->cursor) { + state->cursor = state->end; + } else { + state->cursor++; } - case '*': { - state->cursor += 2; - while (true) { - state->cursor = memchr(state->cursor, '*', state->end - state->cursor); - if (!state->cursor) { - raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end); - } else { - state->cursor++; - if (peek(state) == '/') { - state->cursor++; - break; - } - } + break; + } + case '*': { + state->cursor++; + + while (true) { + const char *next_match = memchr(state->cursor, '*', state->end - state->cursor); + if (!next_match) { + raise_parse_error_at("unterminated comment, expected closing '*/'", state, start); + } + + state->cursor = next_match + 1; + if (peek(state) == '/') { + state->cursor++; + break; } - break; } - default: - raise_parse_error("unexpected token %s", state); - break; + break; } - } else { - raise_parse_error("unexpected token %s", state); + default: + raise_parse_error_at("unexpected token %s", state, start); + break; } } @@ -1130,7 +1134,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) switch (peek(state)) { case 'n': - if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) { + if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) { state->cursor += 4; return json_push_value(state, config, Qnil); } @@ -1138,7 +1142,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); break; case 't': - if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) { + if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) { state->cursor += 4; return json_push_value(state, config, Qtrue); } @@ -1147,7 +1151,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) break; case 'f': // Note: memcmp with a small power of two compile to an integer comparison - if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) { + if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) { state->cursor += 5; return json_push_value(state, config, Qfalse); } @@ -1156,7 +1160,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) break; case 'N': // Note: memcmp with a small power of two compile to an integer comparison - if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) { + if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) { state->cursor += 3; return json_push_value(state, config, CNaN); } @@ -1164,7 +1168,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); break; case 'I': - if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) { + if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) { state->cursor += 8; return json_push_value(state, config, CInfinity); } @@ -1173,7 +1177,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) break; case '-': { // Note: memcmp with a small power of two compile to an integer comparison - if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { + if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { if (config->allow_nan) { state->cursor += 9; return json_push_value(state, config, CMinusInfinity); From 5ce27bef014d394e1c478de78165edaf9af122aa Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 1 Nov 2025 13:47:59 +0900 Subject: [PATCH 07/13] Flush NEWS.md only when NEW is not given Split flushing NEWS and bumping up versions --- defs/gmake.mk | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index c7eaca91a54992..4069a4b8c9127c 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -538,34 +538,53 @@ spec/%/ spec/%_spec.rb: programs exts PHONY ruby.pc: $(filter-out ruby.pc,$(ruby_pc)) matz: up - $(eval OLD := $(MAJOR).$(MINOR).0) + +matz: OLD := $(MAJOR).$(MINOR).0 ifdef NEW - $(eval MAJOR := $(word 1,$(subst ., ,$(NEW)))) - $(eval MINOR := $(word 2,$(subst ., ,$(NEW)))) +matz: MAJOR := $(word 1,$(subst ., ,$(NEW))) +matz: MINOR := $(word 2,$(subst ., ,$(NEW))) +matz: .WAIT bump_news else - $(eval MINOR := $(shell expr $(MINOR) + 1)) +matz: MINOR := $(shell expr $(MINOR) + 1) +matz: .WAIT reset_news endif - $(eval override NEW := $(MAJOR).$(MINOR).0) - $(eval message := Development of $(NEW) started.) - $(eval files := include/ruby/version.h include/ruby/internal/abi.h) + +matz: .WAIT bump_headers +matz: override NEW := $(MAJOR).$(MINOR).0 +matz: files := include/ruby/version.h include/ruby/internal/abi.h +matz: message := Development of $(NEW) started. + +flush_news: $(GIT_IN_SRC) mv -f NEWS.md doc/NEWS/NEWS-$(OLD).md $(GIT_IN_SRC) commit -m "[DOC] Flush NEWS.md" + +.PHONY: flush_news reset_news bump_news bump_headers + +bump_headers: sed -i~ \ -e "s/^\(#define RUBY_API_VERSION_MAJOR\) .*/\1 $(MAJOR)/" \ -e "s/^\(#define RUBY_API_VERSION_MINOR\) .*/\1 $(MINOR)/" \ -e "s/^\(#define RUBY_ABI_VERSION\) .*/\1 0/" \ $(files:%=$(srcdir)/%) - $(GIT_IN_SRC) add $(files) + +reset_news: flush_news $(BASERUBY) -C $(srcdir) -p -00 \ - -e 'BEGIN {old, new = ARGV.shift(2); STDOUT.reopen("NEWS.md")}' \ + -e 'BEGIN {old, new = ARGV.shift(2); STDOUT.reopen(ARGV.shift)}' \ -e 'case $$.' \ -e 'when 1; $$_.sub!(/Ruby \K[0-9.]+/, new)' \ -e 'when 2; $$_.sub!(/\*\*\K[0-9.]+(?=\*\*)/, old)' \ -e 'end' \ -e 'next if /^[\[ *]/ =~ $$_' \ -e '$$_.sub!(/\n{2,}\z/, "\n\n")' \ - $(OLD) $(NEW) doc/NEWS/NEWS-$(OLD).md - $(GIT_IN_SRC) add NEWS.md + $(OLD) $(NEW) NEWS.md doc/NEWS/NEWS-$(OLD).md + +bump_news: + $(BASERUBY) -C $(srcdir) -p -i \ + -e 'BEGIN {new = ARGV.shift; print gets("").sub(/Ruby \K[0-9.]+/, new)}' \ + $(NEW) NEWS.md + +matz: + $(GIT_IN_SRC) add NEWS.md $(files) $(GIT_IN_SRC) commit -m "$(message)" tags: From db5708045037a159458de741b46e9c47fe430284 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 1 Nov 2025 19:10:19 +0900 Subject: [PATCH 08/13] [DOC] How to use `make matz` --- defs/gmake.mk | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/defs/gmake.mk b/defs/gmake.mk index 4069a4b8c9127c..3dcfe9f639a244 100644 --- a/defs/gmake.mk +++ b/defs/gmake.mk @@ -537,8 +537,13 @@ spec/%/ spec/%_spec.rb: programs exts PHONY ruby.pc: $(filter-out ruby.pc,$(ruby_pc)) -matz: up +# `make matz`: bump up the MINOR; +# Copying NEWS.md to doc/NEWS/, and empty the details in NEWS.md. +# +# `make matz NEW=x.y`: bump up to x.y.0; +# Just update the version in the title of NEWS.md. +matz: up matz: OLD := $(MAJOR).$(MINOR).0 ifdef NEW matz: MAJOR := $(word 1,$(subst ., ,$(NEW))) From babf50e33bb0d9e1f3c37d11c1cfdc50c4f5bc7e Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 1 Nov 2025 11:06:32 +0100 Subject: [PATCH 09/13] [ruby/json] Use SWAR for parsing integers on little endian machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes: https://github.com/ruby/json/pull/878 ``` == Parsing float parsing (2251051 bytes) ruby 3.4.6 (2025-09-16 revision https://github.com/ruby/json/commit/dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 23.000 i/100ms Calculating ------------------------------------- after 214.382 (± 0.5%) i/s (4.66 ms/i) - 1.081k in 5.042555s Comparison: before: 189.5 i/s after: 214.4 i/s - 1.13x faster ``` https://github.com/ruby/json/commit/6348ff0891 Co-Authored-By: Scott Myron --- ext/json/parser/parser.c | 52 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 3bd654dc3b8e71..e591ca2c5a9f85 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1039,11 +1039,61 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig return Qfalse; } +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +// Additional References: +// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html +static inline uint64_t decode_8digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return val; +} + +static inline uint64_t decode_4digits_unrolled(uint32_t val) { + const uint32_t mask = 0x000000FF; + const uint32_t mul1 = 100; + val -= 0x30303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = ((val & mask) * mul1) + (((val >> 16) & mask)); + return val; +} +#endif + static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) { const char *start = state->cursor; - char next_char; +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + while (rest(state) >= 8) { + uint64_t next_8bytes; + memcpy(&next_8bytes, state->cursor, sizeof(uint64_t)); + + // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333 + // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html + uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4); + + if (match == 0x3333333333333333) { // 8 consecutive digits + *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes); + state->cursor += 8; + continue; + } + + if ((match & 0xFFFFFFFF) == 0x33333333) { // 4 consecutive digits + *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes); + state->cursor += 4; + break; + } + + break; + } +#endif + + char next_char; while (rb_isdigit(next_char = peek(state))) { *accumulator = *accumulator * 10 + (next_char - '0'); state->cursor++; From 33a026fedd43fa5472937e77834397742fed6180 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Sat, 1 Nov 2025 11:38:16 +0100 Subject: [PATCH 10/13] Fix the description and logic for the Ractor.make_shareable(Proc) test --- bootstraptest/test_ractor.rb | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index fef55ffd688003..a437faeac1814f 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -1187,16 +1187,19 @@ def /(other) [a.frozen?, a[0].frozen?] == [true, false] } -# Ractor.make_shareable(a_proc) is not supported now. -assert_equal 'true', %q{ - pr = Proc.new{} +# Ractor.make_shareable(a_proc) requires a shareable receiver +assert_equal '[:ok, :error]', %q{ + pr1 = nil.instance_exec { Proc.new{} } + pr2 = Proc.new{} - begin - Ractor.make_shareable(pr) - rescue Ractor::Error - true - else - false + [pr1, pr2].map do |pr| + begin + Ractor.make_shareable(pr) + rescue Ractor::Error + :error + else + :ok + end end } From 94287b1e18edee04a0fd646fde84fe1178ce46d1 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Sat, 1 Nov 2025 11:41:23 +0100 Subject: [PATCH 11/13] Make the expectation more precise in Ractor.make_shareable(Proc) test --- bootstraptest/test_ractor.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bootstraptest/test_ractor.rb b/bootstraptest/test_ractor.rb index a437faeac1814f..f008bc82a4add1 100644 --- a/bootstraptest/test_ractor.rb +++ b/bootstraptest/test_ractor.rb @@ -1188,15 +1188,15 @@ def /(other) } # Ractor.make_shareable(a_proc) requires a shareable receiver -assert_equal '[:ok, :error]', %q{ +assert_equal '[:ok, "Proc\'s self is not shareable:"]', %q{ pr1 = nil.instance_exec { Proc.new{} } pr2 = Proc.new{} [pr1, pr2].map do |pr| begin Ractor.make_shareable(pr) - rescue Ractor::Error - :error + rescue Ractor::Error => e + e.message[/^.+?:/] else :ok end From ed7229eac8a5678211be8f1468af778d7beebf5c Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 1 Nov 2025 12:15:04 +0100 Subject: [PATCH 12/13] [ruby/json] parser.c: Use SWAR to skip consecutive spaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes: https://github.com/ruby/json/pull/881 If we encounter a newline, it is likely that the document is pretty printed, hence that the newline is followed by multiple spaces. In such case we can use SWAR to count up to eight consecutive spaces at once. ``` == Parsing activitypub.json (58160 bytes) ruby 3.4.6 (2025-09-16 revision https://github.com/ruby/json/commit/dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 1.118k i/100ms Calculating ------------------------------------- after 11.223k (± 0.7%) i/s (89.10 μs/i) - 57.018k in 5.080522s Comparison: before: 10834.4 i/s after: 11223.4 i/s - 1.04x faster == Parsing twitter.json (567916 bytes) ruby 3.4.6 (2025-09-16 revision https://github.com/ruby/json/commit/dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 118.000 i/100ms Calculating ------------------------------------- after 1.188k (± 1.0%) i/s (841.62 μs/i) - 6.018k in 5.065355s Comparison: before: 1094.8 i/s after: 1188.2 i/s - 1.09x faster == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.6 (2025-09-16 revision https://github.com/ruby/json/commit/dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 58.000 i/100ms Calculating ------------------------------------- after 570.506 (± 3.7%) i/s (1.75 ms/i) - 2.900k in 5.091529s Comparison: before: 419.6 i/s after: 570.5 i/s - 1.36x faster == Parsing float parsing (2251051 bytes) ruby 3.4.6 (2025-09-16 revision https://github.com/ruby/json/commit/dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 22.000 i/100ms Calculating ------------------------------------- after 212.010 (± 1.9%) i/s (4.72 ms/i) - 1.078k in 5.086885s Comparison: before: 189.4 i/s after: 212.0 i/s - 1.12x faster ``` https://github.com/ruby/json/commit/b3fd7b26be Co-Authored-By: Scott Myron --- ext/json/parser/parser.c | 46 ++++++++++++++++++++++++++++------------ ext/json/simd/simd.h | 3 +-- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index e591ca2c5a9f85..01234b5a860b0a 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -557,14 +557,6 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p static const rb_data_type_t JSON_ParserConfig_type; -static const bool whitespace[256] = { - [' '] = 1, - ['\t'] = 1, - ['\n'] = 1, - ['\r'] = 1, - ['/'] = 1, -}; - static void json_eat_comments(JSON_ParserState *state) { @@ -607,12 +599,38 @@ json_eat_comments(JSON_ParserState *state) static inline void json_eat_whitespace(JSON_ParserState *state) { - unsigned char cursor; - while (RB_UNLIKELY(whitespace[cursor = (unsigned char)peek(state)])) { - if (RB_UNLIKELY(cursor == '/')) { - json_eat_comments(state); - } else { - state->cursor++; + while (true) { + switch (peek(state)) { + case ' ': + state->cursor++; + break; + case '\n': + state->cursor++; + + // Heuristic: if we see a newline, there is likely consecutive spaces after it. +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + while (rest(state) > 8) { + uint64_t chunk; + memcpy(&chunk, state->cursor, sizeof(uint64_t)); + size_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + + state->cursor += consecutive_spaces; + if (consecutive_spaces != 8) { + break; + } + } +#endif + break; + case '\t': + case '\r': + state->cursor++; + break; + case '/': + json_eat_comments(state); + break; + + default: + return; } } } diff --git a/ext/json/simd/simd.h b/ext/json/simd/simd.h index 3abbdb020958a8..2aa6c3d046a764 100644 --- a/ext/json/simd/simd.h +++ b/ext/json/simd/simd.h @@ -4,8 +4,6 @@ typedef enum { SIMD_SSE2 } SIMD_Implementation; -#ifdef JSON_ENABLE_SIMD - #ifdef __clang__ # if __has_builtin(__builtin_ctzll) # define HAVE_BUILTIN_CTZLL 1 @@ -54,6 +52,7 @@ static inline int trailing_zeros(int input) #define FORCE_INLINE #endif +#ifdef JSON_ENABLE_SIMD #define SIMD_MINIMUM_THRESHOLD 6 From a97f4c627f2bd1fcc777f1d0314284210b43262f Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 1 Nov 2025 13:05:18 +0100 Subject: [PATCH 13/13] [ruby/json] parser.c: Appease GCC warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` ../../../../../../ext/json/ext/parser/parser.c:1142:40: warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses] 1142 | if (RB_UNLIKELY(first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0)) { | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~ ``` https://github.com/ruby/json/commit/ded62a5122 --- ext/json/parser/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 01234b5a860b0a..555652f42582b1 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1132,7 +1132,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig // Parse integer part and extract mantissa digits int mantissa_digits = json_parse_digits(state, &mantissa); - if (RB_UNLIKELY(first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0)) { + if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) { raise_parse_error_at("invalid number: %s", state, start); }