diff --git a/CHANGES.md b/CHANGES.md index eb86d86b..ba7010df 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ### Unreleased +* Fix a regression in parsing of unicode surogate pairs (`\uXX\uXX`) that could cause an invalid string to be returned. + ### 2025-12-03 (2.17.0) * Improve `JSON.load` and `JSON.unsafe_load` to allow passing options as second argument. diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 5b7cd835..c84c7ed6 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -651,7 +651,9 @@ static inline const char *json_next_backslash(const char *pe, const char *string positions->size--; const char *next_position = positions->positions[0]; positions->positions++; - return next_position; + if (next_position >= pe) { + return next_position; + } } if (positions->has_more) { diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 3e662bda..257e4f17 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -325,6 +325,13 @@ def test_invalid_unicode_escape assert_raise(JSON::ParserError) { parse('"\u111___"') } end + def test_unicode_followed_by_newline + # Ref: https://github.com/ruby/json/issues/912 + assert_equal "🌌\n".bytes, JSON.parse('"\ud83c\udf0c\n"').bytes + assert_equal "🌌\n", JSON.parse('"\ud83c\udf0c\n"') + assert_predicate JSON.parse('"\ud83c\udf0c\n"'), :valid_encoding? + end + def test_invalid_surogates assert_raise(JSON::ParserError) { parse('"\\uD800"') } assert_raise(JSON::ParserError) { parse('"\\uD800_________________"') }