Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 250 additions & 18 deletions src/discover/discover.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,19 @@

#include "foundation/constants.h"
#include "foundation/compat_fs.h"
#include "foundation/platform.h"
#ifdef _WIN32
#include "foundation/win_utf8.h"
#endif
#include <ctype.h>
#include <stdint.h> // int64_t
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // strdup
#include <sys/stat.h>

int cbm_gitignore_match_result(const cbm_gitignore_t *gi, const char *rel_path, bool is_dir);

/* ── Hardcoded always-skip directories ──────────────────────────── */

static const char *ALWAYS_SKIP_DIRS[] = {
Expand Down Expand Up @@ -131,6 +135,205 @@ static bool str_contains(const char *s, const char *sub) {
return strstr(s, sub) != NULL;
}

/* ── Git global excludes resolution ───────────────────────────── */

enum { GIT_TILDE_PREFIX_LEN = 2 }; /* "~/". */

static bool ascii_ieq(const char *a, const char *b) {
while (*a && *b) {
if (tolower((unsigned char)*a) != tolower((unsigned char)*b)) {
return false;
}
a++;
b++;
}
return *a == '\0' && *b == '\0';
}

static char *trim_ws(char *s) {
while (*s && isspace((unsigned char)*s)) {
s++;
}
char *end = s + strlen(s);
while (end > s && isspace((unsigned char)end[-1])) {
end--;
}
*end = '\0';
return s;
}

static void strip_inline_comment(char *s) {
bool in_quote = false;
char quote = '\0';
for (char *p = s; *p; p++) {
if ((*p == '"' || *p == '\'') && (p == s || p[-1] != '\\')) {
if (!in_quote) {
in_quote = true;
quote = *p;
} else if (*p == quote) {
in_quote = false;
}
continue;
}
if (!in_quote && (*p == '#' || *p == ';') && (p == s || isspace((unsigned char)p[-1]))) {
*p = '\0';
return;
}
}
}

static char *strip_matching_quotes(char *s) {
size_t len = strlen(s);
if (len >= CBM_QUOTE_PAIR && ((s[0] == '"' && s[len - SKIP_ONE] == '"') ||
(s[0] == '\'' && s[len - SKIP_ONE] == '\''))) {
s[len - SKIP_ONE] = '\0';
return s + SKIP_ONE;
}
return s;
}

static bool has_trailing_sep(const char *path) {
size_t len = strlen(path);
return len > 0 && (path[len - SKIP_ONE] == '/' || path[len - SKIP_ONE] == '\\');
}

static void path_join(char *out, size_t out_sz, const char *base, const char *rel) {
if (!out || out_sz == 0) {
return;
}
if (!base || base[0] == '\0') {
snprintf(out, out_sz, "%s", rel ? rel : "");
} else if (!rel || rel[0] == '\0') {
snprintf(out, out_sz, "%s", base);
} else if (has_trailing_sep(base)) {
snprintf(out, out_sz, "%s%s", base, rel);
} else {
snprintf(out, out_sz, "%s/%s", base, rel);
}
cbm_normalize_path_sep(out);
}

static bool expand_git_path(const char *path, char *out, size_t out_sz) {
if (!path || !path[0] || !out || out_sz == 0) {
return false;
}
char normalized[CBM_SZ_4K];
snprintf(normalized, sizeof(normalized), "%s", path);
cbm_normalize_path_sep(normalized);

if (normalized[0] != '~') {
snprintf(out, out_sz, "%s", normalized);
cbm_normalize_path_sep(out);
return out[0] != '\0';
}

if (normalized[1] != '\0' && normalized[1] != '/') {
return false; /* ~user expansion is intentionally not supported. */
}

const char *home = cbm_get_home_dir();
if (!home || home[0] == '\0') {
return false;
}
if (normalized[1] == '\0') {
snprintf(out, out_sz, "%s", home);
cbm_normalize_path_sep(out);
} else {
path_join(out, out_sz, home, normalized + GIT_TILDE_PREFIX_LEN);
}
return out[0] != '\0';
}

static bool read_core_excludes_file(const char *config_path, char *out, size_t out_sz) {
FILE *f = fopen(config_path, "r");
if (!f) {
return false;
}

bool in_core = false;
bool found = false;
char line[CBM_SZ_4K];
while (fgets(line, sizeof(line), f)) {
char *s = trim_ws(line);
if (s[0] == '\0' || s[0] == '#' || s[0] == ';') {
continue;
}

if (s[0] == '[') {
char *end = strchr(s, ']');
if (!end) {
in_core = false;
continue;
}
*end = '\0';
in_core = ascii_ieq(trim_ws(s + SKIP_ONE), "core");
continue;
}

if (!in_core) {
continue;
}

char *eq = strchr(s, '=');
if (!eq) {
continue;
}
*eq = '\0';
char *key = trim_ws(s);
char *value = trim_ws(eq + SKIP_ONE);
strip_inline_comment(value);
value = strip_matching_quotes(trim_ws(value));

if (ascii_ieq(key, "excludesfile") && value[0] != '\0' &&
expand_git_path(value, out, out_sz)) {
found = true;
}
}

fclose(f);
return found;
}

static bool resolve_xdg_git_config_dir(char *out, size_t out_sz) {
char env[CBM_SZ_4K];
if (cbm_safe_getenv("XDG_CONFIG_HOME", env, sizeof(env), NULL) && env[0] != '\0') {
snprintf(out, out_sz, "%s", env);
cbm_normalize_path_sep(out);
return true;
}

const char *home = cbm_get_home_dir();
if (!home || home[0] == '\0') {
return false;
}
path_join(out, out_sz, home, ".config");
return out[0] != '\0';
}

static bool resolve_global_excludes_path(char *out, size_t out_sz) {
char config_path[CBM_SZ_4K];

const char *home = cbm_get_home_dir();
if (home && home[0] != '\0') {
path_join(config_path, sizeof(config_path), home, ".gitconfig");
if (read_core_excludes_file(config_path, out, out_sz)) {
return true;
}
}

char xdg_config[CBM_SZ_4K];
if (resolve_xdg_git_config_dir(xdg_config, sizeof(xdg_config))) {
path_join(config_path, sizeof(config_path), xdg_config, "git/config");
if (read_core_excludes_file(config_path, out, out_sz)) {
return true;
}
path_join(out, out_sz, xdg_config, "git/ignore");
return out[0] != '\0';
}

return false;
}

/* ── Public filter functions ─────────────────────── */

bool cbm_should_skip_dir(const char *dirname, cbm_index_mode_t mode) {
Expand Down Expand Up @@ -273,6 +476,7 @@ static const char *local_rel_path(const char *rel_path, const char *local_prefix
/* Check if a directory entry should be skipped (hardcoded dirs + gitignore). */
static bool should_skip_directory(const char *entry_name, const char *rel_path,
const cbm_discover_opts_t *opts, const cbm_gitignore_t *gitignore,
const cbm_gitignore_t *global_gi,
const cbm_gitignore_t *cbmignore, const cbm_gitignore_t *local_gi,
const char *local_gi_prefix) {
if (cbm_should_skip_dir(entry_name, opts ? opts->mode : CBM_MODE_FULL)) {
Expand All @@ -281,23 +485,31 @@ static bool should_skip_directory(const char *entry_name, const char *rel_path,
if (gitignore && cbm_gitignore_matches(gitignore, rel_path, true)) {
return true;
}
bool global_ignored = global_gi && cbm_gitignore_matches(global_gi, rel_path, true);
if (local_gi) {
const char *lrel = local_rel_path(rel_path, local_gi_prefix);
if (cbm_gitignore_matches(local_gi, lrel, true)) {
return true;
}
}
if (cbmignore && cbm_gitignore_matches(cbmignore, rel_path, true)) {
return true;
if (cbmignore) {
int cbm_result = cbm_gitignore_match_result(cbmignore, rel_path, true);
if (cbm_result > 0) {
return true;
}
if (cbm_result < 0 && global_ignored) {
return false;
}
}
return false;
return global_ignored;
}

/* Check if a regular file should be skipped (filters + gitignore + size). */
static bool should_skip_file(const char *entry_name, const char *rel_path,
const cbm_discover_opts_t *opts, const cbm_gitignore_t *gitignore,
const cbm_gitignore_t *cbmignore, const cbm_gitignore_t *local_gi,
const char *local_gi_prefix, off_t file_size) {
const cbm_gitignore_t *global_gi, const cbm_gitignore_t *cbmignore,
const cbm_gitignore_t *local_gi, const char *local_gi_prefix,
off_t file_size) {
cbm_index_mode_t mode = opts ? opts->mode : CBM_MODE_FULL;
if (cbm_has_ignored_suffix(entry_name, mode)) {
return true;
Expand All @@ -311,19 +523,26 @@ static bool should_skip_file(const char *entry_name, const char *rel_path,
if (gitignore && cbm_gitignore_matches(gitignore, rel_path, false)) {
return true;
}
bool global_ignored = global_gi && cbm_gitignore_matches(global_gi, rel_path, false);
if (local_gi) {
const char *lrel = local_rel_path(rel_path, local_gi_prefix);
if (cbm_gitignore_matches(local_gi, lrel, false)) {
return true;
}
}
if (cbmignore && cbm_gitignore_matches(cbmignore, rel_path, false)) {
return true;
if (cbmignore) {
int cbm_result = cbm_gitignore_match_result(cbmignore, rel_path, false);
if (cbm_result > 0) {
return true;
}
if (cbm_result < 0 && global_ignored) {
global_ignored = false;
}
}
if (opts && opts->max_file_size > 0 && file_size > opts->max_file_size) {
return true;
}
return false;
return global_ignored;
}

/* Detect language for a file, handling .m disambiguation and JSON filtering. */
Expand Down Expand Up @@ -384,10 +603,11 @@ static int safe_stat(const char *abs_path, struct stat *st) {
/* Process a single regular file entry during directory walk. */
static void walk_dir_process_file(const char *abs_path, const char *rel_path, const char *name,
const cbm_discover_opts_t *opts, const cbm_gitignore_t *gitignore,
const cbm_gitignore_t *global_gi,
const cbm_gitignore_t *cbmignore, const cbm_gitignore_t *local_gi,
const char *local_gi_prefix, off_t size, file_list_t *out) {
if (should_skip_file(name, rel_path, opts, gitignore, cbmignore, local_gi, local_gi_prefix,
size)) {
if (should_skip_file(name, rel_path, opts, gitignore, global_gi, cbmignore, local_gi,
local_gi_prefix, size)) {
return;
}
CBMLanguage lang = detect_file_language(name, abs_path);
Expand Down Expand Up @@ -435,6 +655,7 @@ static void walk_push_subdir(walk_frame_t *stack, int *top, const char *abs_path
static void walk_dir_process_entry(cbm_dirent_t *entry, const walk_frame_t *frame,
const cbm_discover_opts_t *opts,
const cbm_gitignore_t *gitignore,
const cbm_gitignore_t *global_gi,
const cbm_gitignore_t *cbmignore, walk_frame_t *stack, int *top,
file_list_t *out) {
char abs_path[CBM_SZ_4K];
Expand All @@ -452,24 +673,24 @@ static void walk_dir_process_entry(cbm_dirent_t *entry, const walk_frame_t *fram
}

if (S_ISDIR(st.st_mode)) {
if (!should_skip_directory(entry->name, rel_path, opts, gitignore, cbmignore,
if (!should_skip_directory(entry->name, rel_path, opts, gitignore, global_gi, cbmignore,
frame->local_gi, frame->local_gi_prefix)) {
walk_push_subdir(stack, top, abs_path, rel_path, frame);
} else {
/* Record the excluded subtree root so callers can report it (#411). */
file_list_add_excluded(out, rel_path);
}
} else if (S_ISREG(st.st_mode)) {
walk_dir_process_file(abs_path, rel_path, entry->name, opts, gitignore, cbmignore,
frame->local_gi, frame->local_gi_prefix, st.st_size, out);
walk_dir_process_file(abs_path, rel_path, entry->name, opts, gitignore, global_gi,
cbmignore, frame->local_gi, frame->local_gi_prefix, st.st_size, out);
}
}

enum { GI_OWNED_CAP = 64 };

static void walk_dir(const char *dir_path, const char *rel_prefix, const cbm_discover_opts_t *opts,
const cbm_gitignore_t *gitignore, const cbm_gitignore_t *cbmignore,
file_list_t *out) {
const cbm_gitignore_t *gitignore, const cbm_gitignore_t *global_gi,
const cbm_gitignore_t *cbmignore, file_list_t *out) {
walk_frame_t *stack = calloc(WALK_STACK_CAP, sizeof(walk_frame_t));
if (!stack) {
return;
Expand Down Expand Up @@ -503,7 +724,8 @@ static void walk_dir(const char *dir_path, const char *rel_prefix, const cbm_dis

cbm_dirent_t *entry;
while ((entry = cbm_readdir(d)) != NULL) {
walk_dir_process_entry(entry, &frame, opts, gitignore, cbmignore, stack, &top, out);
walk_dir_process_entry(entry, &frame, opts, gitignore, global_gi, cbmignore, stack,
&top, out);
}
cbm_closedir(d);
}
Expand Down Expand Up @@ -546,11 +768,20 @@ int cbm_discover_ex(const char *repo_path, const cbm_discover_opts_t *opts, cbm_
char gi_path[CBM_SZ_4K];
snprintf(gi_path, sizeof(gi_path), "%s/.git", repo_path);
struct stat gi_stat;
if (wide_stat(gi_path, &gi_stat) == 0 && S_ISDIR(gi_stat.st_mode)) {
bool is_git_repo = wide_stat(gi_path, &gi_stat) == 0 && S_ISDIR(gi_stat.st_mode);
bool has_git_config = false;
if (is_git_repo) {
snprintf(gi_path, sizeof(gi_path), "%s/.git/config", repo_path);
has_git_config = wide_stat(gi_path, &gi_stat) == 0 && S_ISREG(gi_stat.st_mode);
snprintf(gi_path, sizeof(gi_path), "%s/.gitignore", repo_path);
gitignore = cbm_gitignore_load(gi_path);
}

cbm_gitignore_t *global_gi = NULL;
if (has_git_config && resolve_global_excludes_path(gi_path, sizeof(gi_path))) {
global_gi = cbm_gitignore_load(gi_path);
}

/* Load cbmignore if specified or exists at repo root */
cbm_gitignore_t *cbmignore = NULL;
if (opts && opts->ignore_file) {
Expand All @@ -562,10 +793,11 @@ int cbm_discover_ex(const char *repo_path, const cbm_discover_opts_t *opts, cbm_

/* Walk */
file_list_t fl = {0};
walk_dir(repo_path, "", opts, gitignore, cbmignore, &fl);
walk_dir(repo_path, "", opts, gitignore, global_gi, cbmignore, &fl);

/* Cleanup */
cbm_gitignore_free(gitignore);
cbm_gitignore_free(global_gi);
cbm_gitignore_free(cbmignore);

*out = fl.files;
Expand Down
Loading
Loading