Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cpp/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ extern "C" {
#include <iostream>

int main() {
pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, reinterpret_cast<const uint8_t *>("1 + 2"), 5, NULL);
pm_parser_init(&arena, &parser, reinterpret_cast<const uint8_t *>("1 + 2"), 5, NULL);

pm_node_t *root = pm_parse(&parser);
pm_buffer_t buffer = { 0 };
Expand All @@ -17,8 +18,8 @@ int main() {
std::cout << buffer.value << std::endl;

pm_buffer_free(&buffer);
pm_node_destroy(&parser, root);
pm_parser_free(&parser);
pm_arena_free(&arena);

return 0;
}
41 changes: 24 additions & 17 deletions ext/prism/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,16 +374,17 @@ dump_input(pm_string_t *input, const pm_options_t *options) {
rb_raise(rb_eNoMemError, "failed to allocate memory");
}

pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options);

pm_node_t *node = pm_parse(&parser);
pm_serialize(&parser, node, &buffer);

VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
pm_node_destroy(&parser, node);
pm_buffer_free(&buffer);
pm_parser_free(&parser);
pm_arena_free(&arena);

return result;
}
Expand Down Expand Up @@ -736,8 +737,9 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
*/
static VALUE
parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);

VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
Expand Down Expand Up @@ -789,8 +791,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze);
}

pm_node_destroy(&parser, node);
pm_parser_free(&parser);
pm_arena_free(&arena);

return result;
}
Expand Down Expand Up @@ -848,8 +850,9 @@ lex_file(int argc, VALUE *argv, VALUE self) {
*/
static VALUE
parse_input(pm_string_t *input, const pm_options_t *options) {
pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options);

pm_node_t *node = pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
Expand All @@ -862,8 +865,8 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
rb_obj_freeze(source);
}

pm_node_destroy(&parser, node);
pm_parser_free(&parser);
pm_arena_free(&arena);

return result;
}
Expand Down Expand Up @@ -965,12 +968,13 @@ parse_file(int argc, VALUE *argv, VALUE self) {
*/
static void
profile_input(pm_string_t *input, const pm_options_t *options) {
pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options);

pm_node_t *node = pm_parse(&parser);
pm_node_destroy(&parser, node);
pm_parse(&parser);
pm_parser_free(&parser);
pm_arena_free(&arena);
}

/**
Expand Down Expand Up @@ -1065,19 +1069,20 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
pm_options_t options = { 0 };
extract_options(&options, Qnil, keywords);

pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_buffer_t buffer;

pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options);
pm_node_t *node = pm_parse_stream(&arena, &parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);

VALUE source = pm_source_new(&parser, encoding, options.freeze);
VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze);
VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze);

pm_node_destroy(&parser, node);
pm_buffer_free(&buffer);
pm_parser_free(&parser);
pm_arena_free(&arena);

return result;
}
Expand All @@ -1087,17 +1092,18 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
*/
static VALUE
parse_input_comments(pm_string_t *input, const pm_options_t *options) {
pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options);

pm_node_t *node = pm_parse(&parser);
pm_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding->name);

VALUE source = pm_source_new(&parser, encoding, options->freeze);
VALUE comments = parser_comments(&parser, source, options->freeze);

pm_node_destroy(&parser, node);
pm_parser_free(&parser);
pm_arena_free(&arena);

return comments;
}
Expand Down Expand Up @@ -1209,14 +1215,15 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) {
*/
static VALUE
parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
pm_parser_init(&arena, &parser, pm_string_source(input), pm_string_length(input), options);

pm_node_t *node = pm_parse(&parser);
pm_node_destroy(&parser, node);
pm_parse(&parser);

VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
pm_parser_free(&parser);
pm_arena_free(&arena);

return result;
}
Expand Down
4 changes: 3 additions & 1 deletion fuzz/regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ regexp_error_callback(const uint8_t *start, const uint8_t *end, const char *mess

void
harness(const uint8_t *input, size_t size) {
pm_arena_t arena = { 0 };
pm_parser_t parser;
pm_parser_init(&parser, input, size, NULL);
pm_parser_init(&arena, &parser, input, size, NULL);

pm_regexp_parse(&parser, input, size, false, regexp_name_callback, NULL, regexp_error_callback, NULL);

pm_parser_free(&parser);
pm_arena_free(&arena);
}
24 changes: 16 additions & 8 deletions include/prism.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ extern "C" {
#endif

#include "prism/defines.h"
#include "prism/util/pm_arena.h"
#include "prism/util/pm_buffer.h"
#include "prism/util/pm_char.h"
#include "prism/util/pm_integer.h"
Expand Down Expand Up @@ -52,8 +53,11 @@ PRISM_EXPORTED_FUNCTION const char * pm_version(void);
/**
* Initialize a parser with the given start and end pointers.
*
* The resulting parser must eventually be freed with `pm_parser_free()`.
* The resulting parser must eventually be freed with `pm_parser_free()`. The
* arena is caller-owned and must outlive the parser — `pm_parser_free()` does
* not free the arena.
*
* @param arena The arena to use for all AST-lifetime allocations.
* @param parser The parser to initialize.
* @param source The source to parse.
* @param size The size of the source.
Expand All @@ -62,7 +66,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_version(void);
*
* \public \memberof pm_parser
*/
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);

/**
* Register a callback that will be called whenever prism changes the encoding
Expand Down Expand Up @@ -114,6 +118,7 @@ typedef int (pm_parse_stream_feof_t)(void *stream);
/**
* Parse a stream of Ruby source and return the tree.
*
* @param arena The arena to use for all AST-lifetime allocations.
* @param parser The parser to use.
* @param buffer The buffer to use.
* @param stream The stream to parse.
Expand All @@ -124,7 +129,7 @@ typedef int (pm_parse_stream_feof_t)(void *stream);
*
* \public \memberof pm_parser
*/
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options);
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options);

// We optionally support serializing to a binary string. For systems that don't
// want or need this functionality, it can be turned off with the
Expand Down Expand Up @@ -333,24 +338,26 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
* In order to parse Ruby code, the structures and functions that you're going
* to want to use and be aware of are:
*
* * `pm_arena_t` - the arena allocator for AST-lifetime memory
* * `pm_parser_t` - the main parser structure
* * `pm_parser_init()` - initialize a parser
* * `pm_parse()` - parse and return the root node
* * `pm_node_destroy()` - deallocate the root node returned by `pm_parse()`
* * `pm_parser_free()` - free the internal memory of the parser
* * `pm_arena_free()` - free all AST-lifetime memory
*
* Putting all of this together would look something like:
*
* ```c
* void parse(const uint8_t *source, size_t length) {
* pm_arena_t arena = { 0 };
* pm_parser_t parser;
* pm_parser_init(&parser, source, length, NULL);
* pm_parser_init(&arena, &parser, source, length, NULL);
*
* pm_node_t *root = pm_parse(&parser);
* printf("PARSED!\n");
*
* pm_node_destroy(&parser, root);
* pm_parser_free(&parser);
* pm_arena_free(&arena);
* }
* ```
*
Expand Down Expand Up @@ -391,8 +398,9 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
*
* ```c
* void prettyprint(const uint8_t *source, size_t length) {
* pm_arena_t arena = { 0 };
* pm_parser_t parser;
* pm_parser_init(&parser, source, length, NULL);
* pm_parser_init(&arena, &parser, source, length, NULL);
*
* pm_node_t *root = pm_parse(&parser);
* pm_buffer_t buffer = { 0 };
Expand All @@ -401,8 +409,8 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
* printf("%*.s\n", (int) buffer.length, buffer.value);
*
* pm_buffer_free(&buffer);
* pm_node_destroy(&parser, root);
* pm_parser_free(&parser);
* pm_arena_free(&arena);
* }
* ```
*/
Expand Down
12 changes: 12 additions & 0 deletions include/prism/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,18 @@
#define PRISM_FALLTHROUGH
#endif

/**
* A macro for defining a flexible array member. C99 supports `data[]`, GCC
* supports `data[0]` as an extension, and older compilers require `data[1]`.
*/
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
#define PM_FLEX_ARY_LEN /* data[] */
#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define PM_FLEX_ARY_LEN 0 /* data[0] */
#else
#define PM_FLEX_ARY_LEN 1 /* data[1] */
#endif

/**
* We need to align nodes in the AST to a pointer boundary so that it can be
* safely cast to different node types. Use PRISM_ALIGNAS/PRISM_ALIGNOF to
Expand Down
29 changes: 9 additions & 20 deletions include/prism/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,41 +20,29 @@
/**
* Append a new node onto the end of the node list.
*
* @param arena The arena to allocate from.
* @param list The list to append to.
* @param node The node to append.
*/
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
void pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);

/**
* Prepend a new node onto the beginning of the node list.
*
* @param arena The arena to allocate from.
* @param list The list to prepend to.
* @param node The node to prepend.
*/
void pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node);
void pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);

/**
* Concatenate the given node list onto the end of the other node list.
*
* @param arena The arena to allocate from.
* @param list The list to concatenate onto.
* @param other The list to concatenate.
*/
void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other);

/**
* Free the internal memory associated with the given node list.
*
* @param list The list to free.
*/
void pm_node_list_free(pm_node_list_t *list);

/**
* Deallocate a node and all of its children.
*
* @param parser The parser that owns the node.
* @param node The node to deallocate.
*/
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
void pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other);

/**
* Returns a string representation of the given node type.
Expand Down Expand Up @@ -93,18 +81,19 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
* const char *source = "1 + 2; 3 + 4";
* size_t size = strlen(source);
*
* pm_arena_t arena = { 0 };
* pm_parser_t parser;
* pm_options_t options = { 0 };
* pm_parser_init(&parser, (const uint8_t *) source, size, &options);
* pm_parser_init(&arena, &parser, (const uint8_t *) source, size, &options);
*
* size_t indent = 0;
* pm_node_t *node = pm_parse(&parser);
*
* size_t *data = &indent;
* pm_visit_node(node, visit, data);
*
* pm_node_destroy(&parser, node);
* pm_parser_free(&parser);
* pm_arena_free(&arena);
* return EXIT_SUCCESS;
* }
* ```
Expand Down
4 changes: 4 additions & 0 deletions include/prism/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "prism/encoding.h"
#include "prism/options.h"
#include "prism/static_literals.h"
#include "prism/util/pm_arena.h"
#include "prism/util/pm_constant_pool.h"
#include "prism/util/pm_list.h"
#include "prism/util/pm_line_offset_list.h"
Expand Down Expand Up @@ -635,6 +636,9 @@ typedef uint32_t pm_state_stack_t;
* it's considering.
*/
struct pm_parser {
/** The arena used for all AST-lifetime allocations. Caller-owned. */
pm_arena_t *arena;

/**
* The next node identifier that will be assigned. This is a unique
* identifier used to track nodes such that the syntax tree can be dropped
Expand Down
Loading
Loading