From 319a347081557b1957537dc674c7213b081ff1e6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 1 Jun 2026 12:47:06 -0400 Subject: [PATCH] Make the JSON parse loop iterative As opposed to a recursive loop. We do this by keeping a stack of frames (very similar to how the stack of values was already stored). Each frame represents the state of a container. Since there are only 2 in JSON, it doesn't have to get too complex. Each frame in the iterative parser now holds an enum describing its "phase", in order to support suspending parsing. For performance to be on par with the previous implementation, we directly jump to the proper handler in most cases. `JSON_ParserState.stack` was renamed into `.value_stack`, as with the introduction of the frames stack, the naming became confusing. Co-Authored-By: Jean Boussier --- ext/json/ext/parser/parser.c | 650 +++++++++++++++++++++++++---------- 1 file changed, 462 insertions(+), 188 deletions(-) diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index d0482f68..0b0bfd71 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -309,31 +309,57 @@ static void rvalue_stack_eagerly_release(VALUE handle) } } -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) -{ - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; -} +/* frame stack */ + +// Iterative (non-recursive) parsing keeps an explicit stack of the containers +// currently being built, instead of relying on the C call stack. Each frame +// only needs enough bookkeeping to close its container: which kind it is, the +// rvalue_stack position where its children start (so we know how many to pop), +// and the cursor at its opening brace (used to rewind for duplicate key +// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses +// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that +// it's freed even if parsing raises. +// +// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release +// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts +// -- the element type and the absence of a mark function are the only real +// differences. Keep the two in sync: a fix to the spill/release or +// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the +// other. +#define JSON_FRAME_STACK_INITIAL_CAPA 32 + +enum json_frame_type { + JSON_FRAME_ROOT, + JSON_FRAME_ARRAY, + JSON_FRAME_OBJECT, +}; + +// Where a frame is within its container's grammar. This is the entirety of the +// parser's "what to do next" state: json_parse_any dispatches on the top +// frame's phase and holds no resume state in C locals, so a parse can stop at +// any value boundary and be resumed purely from the (persistable) frame stack. +enum json_frame_phase { + JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':') + JSON_PHASE_ARRAY_COMMA, // after a value: expecting ',' or the closing ']' + JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',') + JSON_PHASE_OBJECT_COMMA, // after a value: expecting ',' or the closing '}' + JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':' + JSON_PHASE_DONE, // root only: the document value has been parsed +}; + +typedef struct json_frame_struct { + enum json_frame_type type; + enum json_frame_phase phase; + long value_stack_head; // rvalue_stack->head when this container opened + const char *start_cursor; // object frames only (the '{'); NULL otherwise +} json_frame; + +typedef struct json_frame_stack_struct { + enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated + long capa; + long head; + json_frame *ptr; +} json_frame_stack; enum duplicate_key_action { JSON_DEPRECATED = 0, @@ -356,17 +382,143 @@ typedef struct JSON_ParserStruct { } JSON_ParserConfig; typedef struct JSON_ParserStateStruct { - VALUE *stack_handle; + VALUE *value_stack_handle; + VALUE *frame_stack_handle; const char *start; const char *cursor; const char *end; - rvalue_stack *stack; + rvalue_stack *value_stack; + json_frame_stack *frames; rvalue_cache name_cache; int in_array; int current_nesting; unsigned int emitted_deprecations; } JSON_ParserState; +static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref); + +static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = json_frame_stack_spill(stack, handle, stack_ref); + } else { + JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa); + stack->capa = required; + } + return stack; +} + +static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame) +{ + json_frame_stack *stack = state->frames; + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames); + } + + json_frame *frame_ptr = &stack->ptr[stack->head++]; + *frame_ptr = frame; + return frame_ptr; +} + +static inline json_frame *json_frame_stack_peek(json_frame_stack *stack) +{ + return &stack->ptr[stack->head - 1]; +} + +static inline void json_frame_stack_pop(json_frame_stack *stack) +{ + stack->head--; +} + +static void json_frame_stack_free_buffer(json_frame_stack *stack) +{ + JSON_SIZED_FREE_N(stack->ptr, stack->capa); + stack->ptr = NULL; +} + +static void json_frame_stack_free(void *ptr) +{ + json_frame_stack *stack = (json_frame_stack *)ptr; + if (stack) { + json_frame_stack_free_buffer(stack); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + JSON_SIZED_FREE(stack); +#endif + } +} + +static size_t json_frame_stack_memsize(const void *ptr) +{ + const json_frame_stack *stack = (const json_frame_stack *)ptr; + return sizeof(json_frame_stack) + sizeof(json_frame) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_frame_stack_type = { + .wrap_struct_name = "JSON::Ext::Parser/frame_stack", + .function = { + .dmark = NULL, + .dfree = json_frame_stack_free, + .dsize = json_frame_stack_memsize, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE, +}; + +static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref) +{ + json_frame_stack *stack; + *handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, json_frame_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(json_frame, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head); + return stack; +} + +static void json_frame_stack_eagerly_release(VALUE handle) +{ + if (handle) { + json_frame_stack *stack; + TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack); +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + json_frame_stack_free_buffer(stack); +#else + json_frame_stack_free(stack); + RTYPEDDATA_DATA(handle) = NULL; +#endif + } +} + +static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) +{ + int len = 1; + if (ch <= 0x7F) { + buf[0] = (char) ch; + } else if (ch <= 0x07FF) { + buf[0] = (char) ((ch >> 6) | 0xC0); + buf[1] = (char) ((ch & 0x3F) | 0x80); + len++; + } else if (ch <= 0xFFFF) { + buf[0] = (char) ((ch >> 12) | 0xE0); + buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); + buf[2] = (char) ((ch & 0x3F) | 0x80); + len += 2; + } else if (ch <= 0x1fffff) { + buf[0] =(char) ((ch >> 18) | 0xF0); + buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); + buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); + buf[3] =(char) ((ch & 0x3F) | 0x80); + len += 3; + } else { + buf[0] = '?'; + } + return len; +} + static inline size_t rest(JSON_ParserState *state) { return state->end - state->cursor; } @@ -890,8 +1042,8 @@ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantis static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count) { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); - rvalue_stack_pop(state->stack, count); + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count)); + rvalue_stack_pop(state->value_stack, count); if (config->freeze) { RB_OBJ_FREEZE(array); @@ -945,7 +1097,7 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi { size_t entries_count = count / 2; VALUE object = rb_hash_new_capa(entries_count); - const VALUE *pairs = rvalue_stack_peek(state->stack, count); + const VALUE *pairs = rvalue_stack_peek(state->value_stack, count); rb_hash_bulk_insert(count, pairs, object); if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) { @@ -966,7 +1118,7 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi } } - rvalue_stack_pop(state->stack, count); + rvalue_stack_pop(state->value_stack, count); if (config->freeze) { RB_OBJ_FREEZE(object); @@ -980,7 +1132,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig * if (RB_UNLIKELY(config->on_load_proc)) { value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil); } - rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack); + rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack); return value; } @@ -1247,92 +1399,241 @@ static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_Par return json_parse_number(state, config, true, start); } +// How many values (array elements, or interleaved object keys+values) have been +// pushed onto the rvalue stack since this container opened. Used to size the +// bulk decode on close, and to tell the first key/colon from later ones. +static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack) +{ + return value_stack->head - frame->value_stack_head; +} + +// A complete value now sits on top of the rvalue stack. Advance the frame that +// was waiting for it: the root document is done, or the enclosing container +// moves on to expecting a ',' or its closing bracket. The caller passes the +// frame it already has in hand -- the one that was expecting the value -- which +// after a container close is the freshly re-exposed parent. +static inline void json_value_completed(json_frame *frame) +{ + // TODO: consider a lookup table? + switch (frame->type) { + case JSON_FRAME_ROOT: + frame->phase = JSON_PHASE_DONE; + break; + case JSON_FRAME_ARRAY: + frame->phase = JSON_PHASE_ARRAY_COMMA; + break; + case JSON_FRAME_OBJECT: + frame->phase = JSON_PHASE_OBJECT_COMMA; + break; + } +} + +// Parse an arbitrary JSON value iteratively. This is a state machine driven +// entirely by the top frame's phase so it can stop at any value boundary and +// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the +// bottom of the stack, so the stack is never empty mid-parse and the document +// itself is just another frame whose value, once parsed, leaves its phase DONE. static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) { - json_eat_whitespace(state); + while (true) { + json_frame *frame = json_frame_stack_peek(state->frames); - switch (peek(state)) { - case 'n': - if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) { - state->cursor += 4; - return json_push_value(state, config, Qnil); + switch (frame->phase) { + case JSON_PHASE_DONE: { + // The root document value is parsed; it is the lone survivor on + // the rvalue stack. + return *rvalue_stack_peek(state->value_stack, 1); } - raise_parse_error("unexpected token %s", state); - break; - case 't': - if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) { - state->cursor += 4; - return json_push_value(state, config, Qtrue); - } + case JSON_PHASE_VALUE: { + JSON_PHASE_VALUE: + json_eat_whitespace(state); - raise_parse_error("unexpected token %s", state); - break; - case 'f': - // Note: memcmp with a small power of two compile to an integer comparison - if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) { - state->cursor += 5; - return json_push_value(state, config, Qfalse); - } + switch (peek(state)) { + case 'n': + if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) { + state->cursor += 4; + json_push_value(state, config, Qnil); + json_value_completed(frame); + break; + } - raise_parse_error("unexpected token %s", state); - break; - case 'N': - // Note: memcmp with a small power of two compile to an integer comparison - if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) { - state->cursor += 3; - return json_push_value(state, config, CNaN); - } + raise_parse_error("unexpected token %s", state); + case 't': + if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) { + state->cursor += 4; + json_push_value(state, config, Qtrue); + json_value_completed(frame); + break; + } - raise_parse_error("unexpected token %s", state); - break; - case 'I': - if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) { - state->cursor += 8; - return json_push_value(state, config, CInfinity); + raise_parse_error("unexpected token %s", state); + case 'f': + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) { + state->cursor += 5; + json_push_value(state, config, Qfalse); + json_value_completed(frame); + break; + } + + raise_parse_error("unexpected token %s", state); + case 'N': + // Note: memcmp with a small power of two compile to an integer comparison + if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) { + state->cursor += 3; + json_push_value(state, config, CNaN); + json_value_completed(frame); + break; + } + + raise_parse_error("unexpected token %s", state); + case 'I': + if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) { + state->cursor += 8; + json_push_value(state, config, CInfinity); + json_value_completed(frame); + break; + } + + raise_parse_error("unexpected token %s", state); + case '-': { + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { + if (config->allow_nan) { + state->cursor += 9; + json_push_value(state, config, CMinusInfinity); + json_value_completed(frame); + break; + } else { + raise_parse_error("unexpected token %s", state); + } + } + json_push_value(state, config, json_parse_negative_number(state, config)); + json_value_completed(frame); + break; + } + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + json_push_value(state, config, json_parse_positive_number(state, config)); + json_value_completed(frame); + break; + case '"': + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + json_parse_string(state, config, false); + json_value_completed(frame); + break; + case '[': { + state->cursor++; + json_eat_whitespace(state); + + if (peek(state) == ']') { + state->cursor++; + json_push_value(state, config, json_decode_array(state, config, 0)); + json_value_completed(frame); + break; + } + + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + state->in_array++; + + // Phase stays VALUE: the next iteration reads the first element. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_ARRAY, + .phase = JSON_PHASE_VALUE, + .value_stack_head = state->value_stack->head, + }); + goto JSON_PHASE_VALUE; + break; + } + case '{': { + const char *object_start_cursor = state->cursor; + + state->cursor++; + json_eat_whitespace(state); + + if (peek(state) == '}') { + state->cursor++; + json_push_value(state, config, json_decode_object(state, config, 0)); + json_value_completed(frame); + break; + } + + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + + // Phase KEY: the next iteration reads the first key. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_OBJECT, + .phase = JSON_PHASE_OBJECT_KEY, + .value_stack_head = state->value_stack->head, + .start_cursor = object_start_cursor, + }); + goto JSON_PHASE_OBJECT_KEY; + break; + } + + case 0: + raise_parse_error("unexpected end of input", state); + + default: + raise_parse_error("unexpected character: %s", state); + } + break; } - raise_parse_error("unexpected token %s", state); - break; - case '-': { - // Note: memcmp with a small power of two compile to an integer comparison - if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { - if (config->allow_nan) { - state->cursor += 9; - return json_push_value(state, config, CMinusInfinity); + case JSON_PHASE_OBJECT_KEY: { + JSON_PHASE_OBJECT_KEY: + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + + json_eat_whitespace(state); + + if (RB_LIKELY(peek(state) == '"')) { + json_parse_string(state, config, true); + frame->phase = JSON_PHASE_OBJECT_COLON; + goto JSON_PHASE_OBJECT_COLON; } else { - raise_parse_error("unexpected token %s", state); + // The message differs for the first key vs. a key after a + // ',': the first is the only one reached with nothing pushed + // for this object yet. + if (json_frame_entry_count(frame, state->value_stack) == 0) { + raise_parse_error("expected object key, got %s", state); + } else { + raise_parse_error("expected object key, got: %s", state); + } } + break; } - return json_push_value(state, config, json_parse_negative_number(state, config)); - break; - } - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - return json_push_value(state, config, json_parse_positive_number(state, config)); - break; - case '"': { - // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} - return json_parse_string(state, config, false); - break; - } - case '[': { - state->cursor++; - json_eat_whitespace(state); - long stack_head = state->stack->head; - if (peek(state) == ']') { - state->cursor++; - return json_push_value(state, config, json_decode_array(state, config, 0)); - } else { - state->current_nesting++; - if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { - rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + case JSON_PHASE_OBJECT_COLON: { + JSON_PHASE_OBJECT_COLON: + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + + json_eat_whitespace(state); + + if (RB_LIKELY(peek(state) == ':')) { + state->cursor++; + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else { + // First colon (only the first pair's key is pushed, nothing + // else) vs. a later one. + if (json_frame_entry_count(frame, state->value_stack) == 1) { + raise_parse_error("expected ':' after object key", state); + } else { + raise_parse_error("expected ':' after object key, got: %s", state); + } } - state->in_array++; - json_parse_any(state, config); + break; } - while (true) { + case JSON_PHASE_ARRAY_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_ARRAY); + json_eat_whitespace(state); const char next_char = peek(state); @@ -1342,115 +1643,68 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) if (config->allow_trailing_comma) { json_eat_whitespace(state); if (peek(state) == ']') { - continue; + // Trailing comma: stay in COMMA to close on the next iteration. + break; } } - json_parse_any(state, config); - continue; - } - - if (next_char == ']') { + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else if (next_char == ']') { state->cursor++; - long count = state->stack->head - stack_head; + long count = json_frame_entry_count(frame, state->value_stack); state->current_nesting--; state->in_array--; - return json_push_value(state, config, json_decode_array(state, config, count)); + json_frame_stack_pop(state->frames); + json_push_value(state, config, json_decode_array(state, config, count)); + json_value_completed(json_frame_stack_peek(state->frames)); + } else { + raise_parse_error("expected ',' or ']' after array value", state); } - - raise_parse_error("expected ',' or ']' after array value", state); + break; } - break; - } - case '{': { - const char *object_start_cursor = state->cursor; - state->cursor++; - json_eat_whitespace(state); - long stack_head = state->stack->head; + case JSON_PHASE_OBJECT_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - if (peek(state) == '}') { - state->cursor++; - return json_push_value(state, config, json_decode_object(state, config, 0)); - } else { - state->current_nesting++; - if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { - rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); - } - - if (peek(state) != '"') { - raise_parse_error("expected object key, got %s", state); - } - json_parse_string(state, config, true); - - json_eat_whitespace(state); - if (peek(state) != ':') { - raise_parse_error("expected ':' after object key", state); - } - state->cursor++; - - json_parse_any(state, config); - } - - while (true) { json_eat_whitespace(state); - const char next_char = peek(state); - if (next_char == '}') { - state->cursor++; - state->current_nesting--; - size_t count = state->stack->head - stack_head; - // Temporary rewind cursor in case an error is raised - const char *final_cursor = state->cursor; - state->cursor = object_start_cursor; - VALUE object = json_decode_object(state, config, count); - state->cursor = final_cursor; - - return json_push_value(state, config, object); - } - - if (next_char == ',') { + if (RB_LIKELY(next_char == ',')) { state->cursor++; json_eat_whitespace(state); if (config->allow_trailing_comma) { if (peek(state) == '}') { - continue; + // Trailing comma: stay in COMMA to close on the next iteration. + break; } } - if (RB_UNLIKELY(peek(state) != '"')) { - raise_parse_error("expected object key, got: %s", state); - } - json_parse_string(state, config, true); + frame->phase = JSON_PHASE_OBJECT_KEY; + goto JSON_PHASE_OBJECT_KEY; - json_eat_whitespace(state); - if (RB_UNLIKELY(peek(state) != ':')) { - raise_parse_error("expected ':' after object key, got: %s", state); - } + break; + } else if (next_char == '}') { state->cursor++; + state->current_nesting--; + size_t count = json_frame_entry_count(frame, state->value_stack); - json_parse_any(state, config); + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = frame->start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; - continue; + json_frame_stack_pop(state->frames); + json_push_value(state, config, object); + json_value_completed(json_frame_stack_peek(state->frames)); + break; + } else { + raise_parse_error("expected ',' or '}' after object value, got: %s", state); } - - raise_parse_error("expected ',' or '}' after object value, got: %s", state); } - break; } - - case 0: - raise_parse_error("unexpected end of input", state); - break; - - default: - raise_parse_error("unexpected character: %s", state); - break; } - - raise_parse_error("unreachable: %s", state); - return Qundef; } static void json_ensure_eof(JSON_ParserState *state) @@ -1616,24 +1870,42 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src) } VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { + rvalue_stack value_stack = { .type = RVALUE_STACK_STACK_ALLOCATED, .ptr = rvalue_stack_buffer, .capa = RVALUE_STACK_INITIAL_CAPA, }; + // Seed the frame stack with the root frame, establishing the invariant that + // json_parse_any always has a top frame to dispatch on (so the stack is never + // empty mid-parse). + json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA]; + frame_stack_buffer[0] = (json_frame){ + .type = JSON_FRAME_ROOT, + .phase = JSON_PHASE_VALUE, + }; + json_frame_stack frames = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = frame_stack_buffer, + .capa = JSON_FRAME_STACK_INITIAL_CAPA, + .head = 1, + }; + long len; const char *start; RSTRING_GETMEM(Vsource, start, len); - VALUE stack_handle = 0; + VALUE value_stack_handle = 0; + VALUE frame_stack_handle = 0; JSON_ParserState _state = { .start = start, .cursor = start, .end = start + len, - .stack = &stack, - .stack_handle = &stack_handle, + .value_stack = &value_stack, + .value_stack_handle = &value_stack_handle, + .frames = &frames, + .frame_stack_handle = &frame_stack_handle, }; JSON_ParserState *state = &_state; @@ -1641,8 +1913,10 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src) // This may be skipped in case of exception, but // it won't cause a leak. - rvalue_stack_eagerly_release(stack_handle); - RB_GC_GUARD(stack_handle); + rvalue_stack_eagerly_release(value_stack_handle); + json_frame_stack_eagerly_release(frame_stack_handle); + RB_GC_GUARD(value_stack_handle); + RB_GC_GUARD(frame_stack_handle); RB_GC_GUARD(Vsource); json_ensure_eof(state);