Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 81 additions & 3 deletions internal/cbm/extract_defs.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,31 @@ static TSNode resolve_ocaml_func_name(TSNode node) {
return null_node;
}

// SQL: resolve create_function name from object_reference→identifier or direct identifier.
// Last identifier (DFS pre-order) under `node`. For a schema-qualified
// object_reference (schema.table) this is the table name; the schema prefix is
// ignored. Leaves *found false and returns `best` unchanged if none is present.
static TSNode sql_last_identifier(TSNode node, TSNode best, bool *found) {
if (strcmp(ts_node_type(node), "identifier") == 0) {
best = node;
*found = true;
}
uint32_t cc = ts_node_child_count(node);
for (uint32_t i = 0; i < cc; i++) {
best = sql_last_identifier(ts_node_child(node, i), best, found);
}
return best;
}

// SQL: resolve create_function / create_table / create_view name. The name sits
// on an object_reference; for a schema-qualified name (schema.table) take the
// last identifier (the table), not the first (the schema).
static TSNode resolve_sql_func_name(TSNode node) {
TSNode obj_ref = cbm_find_child_by_kind(node, "object_reference");
if (!ts_node_is_null(obj_ref)) {
TSNode id = cbm_find_child_by_kind(obj_ref, "identifier");
if (!ts_node_is_null(id)) {
bool found = false;
TSNode empty = {0};
TSNode id = sql_last_identifier(obj_ref, empty, &found);
if (found) {
return id;
}
}
Expand Down Expand Up @@ -2964,13 +2983,72 @@ static bool extract_config_class_def(CBMExtractCtx *ctx, TSNode node, const char
return true;
}

// Collect FROM/JOIN table references (tree-sitter-sql `relation` nodes) anywhere
// under `node` and emit them as usages scoped to enclosing_qn. pass_usages then
// resolves each ref_name to the referenced Table/View def and creates a USAGE
// lineage edge (e.g. a view -> the tables it selects from). Emitting them here
// (rather than via the generic identifier walker) sets the correct enclosing
// scope and bypasses the is_definition_name suppression that drops them.
static void collect_sql_relation_usages(CBMExtractCtx *ctx, TSNode node, const char *enclosing_qn) {
if (strcmp(ts_node_type(node), "relation") == 0) {
TSNode nm = resolve_sql_func_name(node); // object_reference -> identifier
if (!ts_node_is_null(nm)) {
char *tname = cbm_node_text(ctx->arena, nm, ctx->source);
if (tname && tname[0]) {
CBMUsage usage;
usage.ref_name = tname;
usage.enclosing_func_qn = enclosing_qn;
cbm_usages_push(&ctx->result->usages, ctx->arena, usage);
}
}
}
uint32_t n = ts_node_child_count(node);
for (uint32_t i = 0; i < n; i++) {
collect_sql_relation_usages(ctx, ts_node_child(node, i), enclosing_qn);
}
}

// Handle SQL DDL relation defs: CREATE TABLE / VIEW / MATERIALIZED VIEW become
// first-class Table/View nodes rather than generic Variable nodes. The relation
// name sits on an object_reference child (the same shape create_function uses),
// so resolve_sql_func_name locates it. Also emits FROM/JOIN dependencies as
// usages so lineage edges form. Returns true if handled.
static bool extract_sql_ddl_class_def(CBMExtractCtx *ctx, TSNode node, const char *kind) {
if (ctx->language != CBM_LANG_SQL) {
return false;
}
const char *label;
if (strcmp(kind, "create_table") == 0) {
label = "Table";
} else if (strcmp(kind, "create_view") == 0 || strcmp(kind, "create_materialized_view") == 0) {
label = "View";
} else {
return false;
}
TSNode name_node = resolve_sql_func_name(node);
if (ts_node_is_null(name_node)) {
return false;
}
char *name = cbm_node_text(ctx->arena, name_node, ctx->source);
if (!name || !name[0]) {
return false;
}
push_simple_class_def(ctx, node, name, label);
const char *qn = cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name);
collect_sql_relation_usages(ctx, node, qn);
return true;
}

static void extract_class_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec) {
CBMArena *a = ctx->arena;
const char *kind = ts_node_type(node);

if (extract_config_class_def(ctx, node, kind)) {
return;
}
if (extract_sql_ddl_class_def(ctx, node, kind)) {
return;
}

TSNode name_node = ts_node_child_by_field_name(node, TS_FIELD("name"));
// ObjC: class name is first identifier child
Expand Down
9 changes: 5 additions & 4 deletions internal/cbm/lang_specs.c
Original file line number Diff line number Diff line change
Expand Up @@ -658,13 +658,14 @@ static const char *hcl_call_types[] = {"function_call", NULL};
static const char *hcl_var_types[] = {"attribute", NULL};

// ==================== SQL ====================
static const char *sql_func_types[] = {"create_function", "function_declaration", NULL};
static const char *sql_func_types[] = {"create_function", "function_declaration",
"create_procedure", NULL};
static const char *sql_field_types[] = {"column_definition", NULL};
static const char *sql_class_types[] = {"custom_type", NULL};
static const char *sql_class_types[] = {"custom_type", "create_table", "create_view",
"create_materialized_view", NULL};
static const char *sql_module_types[] = {"program", NULL};
static const char *sql_call_types[] = {"function_call", "invocation", "command", NULL};
static const char *sql_branch_types[] = {"if_statement", "case_expression", NULL};
static const char *sql_var_types[] = {"create_table", "create_view", NULL};

// ==================== DOCKERFILE ====================
static const char *dockerfile_module_types[] = {"source_file", NULL};
Expand Down Expand Up @@ -1779,7 +1780,7 @@ static const CBMLangSpec lang_specs[CBM_LANG_COUNT] = {
// CBM_LANG_SQL
[CBM_LANG_SQL] = {CBM_LANG_SQL, sql_func_types, sql_class_types, sql_field_types,
sql_module_types, sql_call_types, empty_types, empty_types, sql_branch_types,
sql_var_types, empty_types, empty_types, NULL, empty_types, NULL, NULL,
empty_types, empty_types, empty_types, NULL, empty_types, NULL, NULL,
tree_sitter_sql, NULL},

// CBM_LANG_DOCKERFILE
Expand Down
3 changes: 2 additions & 1 deletion src/pipeline/pass_definitions.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ static void process_def(cbm_pipeline_ctx_t *ctx, const CBMDefinition *def, const
if (node_id > 0 && def->label &&
(strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0 ||
strcmp(def->label, "Class") == 0 || strcmp(def->label, "Interface") == 0 ||
strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0)) {
strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0 ||
strcmp(def->label, "Table") == 0 || strcmp(def->label, "View") == 0)) {
cbm_registry_add(ctx->registry, def->name, def->qualified_name, def->label);
}
char *file_qn = cbm_pipeline_fqn_compute(ctx->project_name, rel, "__file__");
Expand Down
3 changes: 2 additions & 1 deletion src/pipeline/pass_parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,8 @@ static int register_and_link_def(cbm_pipeline_ctx_t *ctx, const CBMDefinition *d
* Variable/Field defs are registered too so READS/WRITES can resolve. */
if (strcmp(def->label, "Function") == 0 || strcmp(def->label, "Method") == 0 ||
strcmp(def->label, "Class") == 0 || strcmp(def->label, "Interface") == 0 ||
strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0) {
strcmp(def->label, "Variable") == 0 || strcmp(def->label, "Field") == 0 ||
strcmp(def->label, "Table") == 0 || strcmp(def->label, "View") == 0) {
cbm_registry_add(ctx->registry, def->name, def->qualified_name, def->label);
(*reg_entries)++;
}
Expand Down
55 changes: 55 additions & 0 deletions tests/test_extraction.c
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,58 @@ TEST(sql_function) {
PASS();
}

TEST(sql_ddl_node_labels) {
CBMFileResult *r = extract("CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);\n"
"CREATE VIEW active_users AS SELECT * FROM users;\n",
CBM_LANG_SQL, "t", "schema.sql");
ASSERT_NOT_NULL(r);
ASSERT_FALSE(r->has_error);
ASSERT(has_def(r, "Table", "users"));
ASSERT(has_def(r, "View", "active_users"));
cbm_free_result(r);
PASS();
}

TEST(sql_view_lineage_usages) {
/* A view's FROM/JOIN relations are emitted as usages (ref_name = table),
* which pass_usages later resolves into view -> table USAGE lineage edges. */
CBMFileResult *r = extract("CREATE TABLE users (id INTEGER);\n"
"CREATE VIEW active_users AS SELECT * FROM users;\n",
CBM_LANG_SQL, "t", "schema.sql");
ASSERT_NOT_NULL(r);
ASSERT_FALSE(r->has_error);
int found_users = 0;
for (int i = 0; i < r->usages.count; i++) {
if (r->usages.items[i].ref_name && strcmp(r->usages.items[i].ref_name, "users") == 0) {
found_users = 1;
}
}
ASSERT(found_users);
cbm_free_result(r);
PASS();
}

TEST(sql_schema_qualified_name) {
/* schema-qualified DDL (schema.table) is named by the table, not the schema,
* and FROM schema.table resolves to that table for lineage. */
CBMFileResult *r = extract("CREATE TABLE app.users (id INTEGER);\n"
"CREATE VIEW app.active AS SELECT * FROM app.users;\n",
CBM_LANG_SQL, "t", "schema.sql");
ASSERT_NOT_NULL(r);
ASSERT_FALSE(r->has_error);
ASSERT(has_def(r, "Table", "users"));
ASSERT(has_def(r, "View", "active"));
int found_users = 0;
for (int i = 0; i < r->usages.count; i++) {
if (r->usages.items[i].ref_name && strcmp(r->usages.items[i].ref_name, "users") == 0) {
found_users = 1;
}
}
ASSERT(found_users);
cbm_free_result(r);
PASS();
}

/* --- Meson project --- */
TEST(meson_project) {
CBMFileResult *r = extract(
Expand Down Expand Up @@ -3048,6 +3100,9 @@ SUITE(extraction) {
/* Config/Markup */
RUN_TEST(html_elements);
RUN_TEST(sql_function);
RUN_TEST(sql_ddl_node_labels);
RUN_TEST(sql_view_lineage_usages);
RUN_TEST(sql_schema_qualified_name);
RUN_TEST(meson_project);
RUN_TEST(css_rules);
RUN_TEST(scss_rules);
Expand Down
2 changes: 1 addition & 1 deletion tests/test_grammar_labels.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ static const LabelGolden LABEL_GOLDENS[] = {
{"toml", "Class:1,Module:1,Variable:1"},
{"ini", "Class:1,Module:1,Variable:1"},
{"csv", "Module:1"},
{"sql", "Module:1,Variable:1"},
{"sql", "Module:1,Table:1"},
{"xml", "Class:2,Module:1"},
{"html", "Module:1"},
{"css", "Module:1"},
Expand Down
16 changes: 9 additions & 7 deletions tests/test_grammar_probe_g.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ typedef struct {
int modules;
int classes;
int variables;
int tables;
int sections;
int imports; /* IMPORTS edges */
int depends; /* DEPENDS_ON edges */
Expand All @@ -172,6 +173,7 @@ static GpgMetrics gpg_metrics_files(const GpgFile *files, int nfiles) {
m.modules = gpg_count_label(store, lp.project, "Module");
m.classes = gpg_count_label(store, lp.project, "Class");
m.variables = gpg_count_label(store, lp.project, "Variable");
m.tables = gpg_count_label(store, lp.project, "Table");
m.sections = gpg_count_label(store, lp.project, "Section");
m.imports = cbm_store_count_edges_by_type(store, lp.project, "IMPORTS");
m.depends = cbm_store_count_edges_by_type(store, lp.project, "DEPENDS_ON");
Expand Down Expand Up @@ -605,21 +607,21 @@ TEST(probe_csv_module_only) {
/* ══════════════════════════════════════════════════════════════════
* GROUP 13 — SQL (.sql)
*
* SQL golden histogram: Module:1, Variable:1
* Table references (e.g. CREATE TABLE / SELECT FROM) produce Variable nodes.
* SQL golden histogram: Module:1, Table:1
* CREATE TABLE / CREATE VIEW produce first-class Table / View nodes.
* ══════════════════════════════════════════════════════════════════ */

/* SQL: CREATE TABLE + SELECT → at least 1 Variable node. */
TEST(probe_sql_variable_node) {
/* SQL: CREATE TABLE → a first-class Table node. */
TEST(probe_sql_table_node) {
GpgMetrics m = gpg_metrics("schema.sql", "CREATE TABLE users (\n"
" id INTEGER PRIMARY KEY,\n"
" name TEXT NOT NULL\n"
");\n"
"\n"
"SELECT id, name FROM users WHERE id = 1;\n");
ASSERT_TRUE(m.ok);
/* GREEN: SQL table reference produces at least 1 Variable node. */
ASSERT_TRUE(m.variables >= 1);
/* GREEN: CREATE TABLE produces a first-class Table node (was Variable). */
ASSERT_TRUE(m.tables >= 1);
ASSERT_TRUE(m.modules >= 1);
PASS();
}
Expand Down Expand Up @@ -1082,7 +1084,7 @@ SUITE(grammar_probe_g) {
RUN_TEST(probe_csv_module_only);

/* SQL */
RUN_TEST(probe_sql_variable_node);
RUN_TEST(probe_sql_table_node);
RUN_TEST(probe_sql_insert_select);

/* SOQL */
Expand Down
Loading