Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 90 additions & 51 deletions ext/json/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -616,8 +616,10 @@ static inline bool json_string_cacheable_p(const char *string, size_t length)
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
}

static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
{
bool intern = is_name || config->freeze;
bool symbolize = is_name && config->symbolize_names;
size_t bufferSize = stringEnd - string;

if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
Expand All @@ -636,47 +638,71 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
return build_string(string, stringEnd, intern, symbolize);
}

static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
#define JSON_MAX_UNESCAPE_POSITIONS 16
typedef struct _json_unescape_positions {
long size;
const char **positions;
bool has_more;
} JSON_UnescapePositions;

static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
{
while (positions->size) {
positions->size--;
const char *next_position = positions->positions[0];
positions->positions++;
return next_position;
}

if (positions->has_more) {
return memchr(pe, '\\', stringEnd - pe);
}

return NULL;
}

static NOINLINE() VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
{
bool intern = is_name || config->freeze;
bool symbolize = is_name && config->symbolize_names;
size_t bufferSize = stringEnd - string;
const char *p = string, *pe = string, *unescape, *bufferStart;
const char *p = string, *pe = string, *bufferStart;
char *buffer;
int unescape_len;
char buf[4];

VALUE result = rb_str_buf_new(bufferSize);
rb_enc_associate_index(result, utf8_encindex);
buffer = RSTRING_PTR(result);
bufferStart = buffer;

while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
unescape = (char *) "?";
unescape_len = 1;
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;

while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
if (pe > p) {
MEMCPY(buffer, p, char, pe - p);
buffer += pe - p;
}
switch (*++pe) {
case '"':
case '/':
p = pe; // nothing to unescape just need to skip the backslash
break;
case '\\':
APPEND_CHAR('\\');
break;
case 'n':
unescape = (char *) "\n";
APPEND_CHAR('\n');
break;
case 'r':
unescape = (char *) "\r";
APPEND_CHAR('\r');
break;
case 't':
unescape = (char *) "\t";
break;
case '"':
unescape = (char *) "\"";
break;
case '\\':
unescape = (char *) "\\";
APPEND_CHAR('\t');
break;
case 'b':
unescape = (char *) "\b";
APPEND_CHAR('\b');
break;
case 'f':
unescape = (char *) "\f";
APPEND_CHAR('\f');
break;
case 'u':
if (pe > stringEnd - 5) {
Expand Down Expand Up @@ -714,18 +740,23 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
break;
}
}
unescape_len = convert_UTF32_to_UTF8(buf, ch);
unescape = buf;

char buf[4];
int unescape_len = convert_UTF32_to_UTF8(buf, ch);
MEMCPY(buffer, buf, char, unescape_len);
buffer += unescape_len;
p = ++pe;
}
break;
default:
p = pe;
continue;
if ((unsigned char)*pe < 0x20) {
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
}
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
break;
}
MEMCPY(buffer, unescape, char, unescape_len);
buffer += unescape_len;
p = ++pe;
}
#undef APPEND_CHAR

if (stringEnd > p) {
MEMCPY(buffer, p, char, stringEnd - p);
Expand Down Expand Up @@ -889,20 +920,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
return object;
}

static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
{
VALUE string;
bool intern = is_name || config->freeze;
bool symbolize = is_name && config->symbolize_names;
if (escaped) {
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
} else {
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
}

return string;
}

static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
{
if (RB_UNLIKELY(config->on_load_proc)) {
Expand Down Expand Up @@ -960,25 +977,30 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
return false;
}

static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
{
state->cursor++;
const char *start = state->cursor;
bool escaped = false;
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
JSON_UnescapePositions positions = {
.size = 0,
.positions = backslashes,
.has_more = false,
};

while (RB_UNLIKELY(string_scan(state))) {
do {
switch (*state->cursor) {
case '"': {
VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
state->cursor++;
return json_push_value(state, config, string);
}
case '\\': {
state->cursor++;
escaped = true;
if ((unsigned char)*state->cursor < 0x20) {
raise_parse_error("invalid ASCII control character in string: %s", state);
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
backslashes[positions.size] = state->cursor;
positions.size++;
} else {
positions.has_more = true;
}
state->cursor++;
break;
}
default:
Expand All @@ -987,12 +1009,29 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
}

state->cursor++;
}
} while (string_scan(state));

raise_parse_error("unexpected end of input, expected closing \"", state);
return Qfalse;
}

static ALWAYS_INLINE() VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
{
state->cursor++;
const char *start = state->cursor;

if (RB_UNLIKELY(!string_scan(state))) {
raise_parse_error("unexpected end of input, expected closing \"", state);
}

if (RB_LIKELY(*state->cursor == '"')) {
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
state->cursor++;
return json_push_value(state, config, string);
}
return json_parse_escaped_string(state, config, is_name, start);
}

#if JSON_CPU_LITTLE_ENDIAN_64BITS
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
// Additional References:
Expand Down
101 changes: 61 additions & 40 deletions ext/openssl/ossl_asn1.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,48 @@ asn1integer_to_num_i(VALUE arg)
return asn1integer_to_num((ASN1_INTEGER *)arg);
}

/*
* ASN1_OBJECT conversions
*/
VALUE
ossl_asn1obj_to_string_oid(const ASN1_OBJECT *a1obj)
{
VALUE str;
int len;

str = rb_usascii_str_new(NULL, 127);
len = OBJ_obj2txt(RSTRING_PTR(str), RSTRING_LENINT(str), a1obj, 1);
if (len <= 0 || len == INT_MAX)
ossl_raise(eOSSLError, "OBJ_obj2txt");
if (len > RSTRING_LEN(str)) {
/* +1 is for the \0 terminator added by OBJ_obj2txt() */
rb_str_resize(str, len + 1);
len = OBJ_obj2txt(RSTRING_PTR(str), len + 1, a1obj, 1);
if (len <= 0)
ossl_raise(eOSSLError, "OBJ_obj2txt");
}
rb_str_set_len(str, len);
return str;
}

VALUE
ossl_asn1obj_to_string(const ASN1_OBJECT *obj)
{
int nid = OBJ_obj2nid(obj);
if (nid != NID_undef)
return rb_str_new_cstr(OBJ_nid2sn(nid));
return ossl_asn1obj_to_string_oid(obj);
}

VALUE
ossl_asn1obj_to_string_long_name(const ASN1_OBJECT *obj)
{
int nid = OBJ_obj2nid(obj);
if (nid != NID_undef)
return rb_str_new_cstr(OBJ_nid2ln(nid));
return ossl_asn1obj_to_string_oid(obj);
}

/********/
/*
* ASN1 module
Expand All @@ -160,7 +202,7 @@ asn1integer_to_num_i(VALUE arg)
#define ossl_asn1_set_indefinite_length(o,v) rb_ivar_set((o),sivINDEFINITE_LENGTH,(v))

VALUE mASN1;
VALUE eASN1Error;
static VALUE eASN1Error;

VALUE cASN1Data;
static VALUE cASN1Primitive;
Expand Down Expand Up @@ -247,8 +289,8 @@ obj_to_asn1null(VALUE obj)
return null;
}

static ASN1_OBJECT*
obj_to_asn1obj(VALUE obj)
ASN1_OBJECT *
ossl_to_asn1obj(VALUE obj)
{
ASN1_OBJECT *a1obj;

Expand Down Expand Up @@ -393,32 +435,27 @@ decode_null(unsigned char* der, long length)
return Qnil;
}

VALUE
asn1obj_to_string_i(VALUE arg)
{
return ossl_asn1obj_to_string((const ASN1_OBJECT *)arg);
}

static VALUE
decode_obj(unsigned char* der, long length)
{
ASN1_OBJECT *obj;
const unsigned char *p;
VALUE ret;
int nid;
BIO *bio;
int state;

p = der;
if(!(obj = d2i_ASN1_OBJECT(NULL, &p, length)))
ossl_raise(eASN1Error, NULL);
if((nid = OBJ_obj2nid(obj)) != NID_undef){
ASN1_OBJECT_free(obj);
ret = rb_str_new2(OBJ_nid2sn(nid));
}
else{
if(!(bio = BIO_new(BIO_s_mem()))){
ASN1_OBJECT_free(obj);
ossl_raise(eASN1Error, NULL);
}
i2a_ASN1_OBJECT(bio, obj);
ASN1_OBJECT_free(obj);
ret = ossl_membio2str(bio);
}

if (!(obj = d2i_ASN1_OBJECT(NULL, &p, length)))
ossl_raise(eASN1Error, "d2i_ASN1_OBJECT");
ret = rb_protect(asn1obj_to_string_i, (VALUE)obj, &state);
ASN1_OBJECT_free(obj);
if (state)
rb_jump_tag(state);
return ret;
}

Expand Down Expand Up @@ -544,7 +581,7 @@ ossl_asn1_get_asn1type(VALUE obj)
free_func = (free_func_type *)ASN1_STRING_free;
break;
case V_ASN1_OBJECT:
ptr = obj_to_asn1obj(value);
ptr = ossl_to_asn1obj(value);
free_func = (free_func_type *)ASN1_OBJECT_free;
break;
case V_ASN1_UTCTIME:
Expand Down Expand Up @@ -1172,23 +1209,7 @@ ossl_asn1obj_get_ln(VALUE self)
static VALUE
asn1obj_get_oid_i(VALUE vobj)
{
ASN1_OBJECT *a1obj = (void *)vobj;
VALUE str;
int len;

str = rb_usascii_str_new(NULL, 127);
len = OBJ_obj2txt(RSTRING_PTR(str), RSTRING_LENINT(str), a1obj, 1);
if (len <= 0 || len == INT_MAX)
ossl_raise(eASN1Error, "OBJ_obj2txt");
if (len > RSTRING_LEN(str)) {
/* +1 is for the \0 terminator added by OBJ_obj2txt() */
rb_str_resize(str, len + 1);
len = OBJ_obj2txt(RSTRING_PTR(str), len + 1, a1obj, 1);
if (len <= 0)
ossl_raise(eASN1Error, "OBJ_obj2txt");
}
rb_str_set_len(str, len);
return str;
return ossl_asn1obj_to_string_oid((const ASN1_OBJECT *)vobj);
}

/*
Expand All @@ -1205,7 +1226,7 @@ ossl_asn1obj_get_oid(VALUE self)
ASN1_OBJECT *a1obj;
int state;

a1obj = obj_to_asn1obj(ossl_asn1_get_value(self));
a1obj = ossl_to_asn1obj(ossl_asn1_get_value(self));
str = rb_protect(asn1obj_get_oid_i, (VALUE)a1obj, &state);
ASN1_OBJECT_free(a1obj);
if (state)
Expand Down
Loading