-
-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathfile-tools-shared.ts
More file actions
369 lines (327 loc) · 10.4 KB
/
file-tools-shared.ts
File metadata and controls
369 lines (327 loc) · 10.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
// Copyright 2023-present Eser Ozvataf and other contributors. All rights reserved. Apache-2.0 license.
/**
* Shared utilities for file-based codebase tools.
*
* Provides file walking, binary detection, content loading, and pattern matching.
* Used by `createFileTool()` factory and individual tools.
*
* @module
*/
import { runtime } from "@eserstack/standards/cross-runtime";
import { hasExtension } from "@eserstack/standards/patterns";
import * as shellExec from "@eserstack/shell/exec";
import { ensureLib, getLib } from "./ffi-client.ts";
// =============================================================================
// Types
// =============================================================================
/**
* File metadata entry — returned by the walker.
* Content is loaded on demand via `loadContent()`.
*/
export type FileEntry = {
/** Absolute path to the file */
readonly path: string;
/** File name (basename only) */
readonly name: string;
/** File size in bytes */
readonly size: number;
/** Whether this is a symlink */
readonly isSymlink: boolean;
/** Cached text content (populated by loadContent) */
textContent?: string;
/** Cached raw bytes (populated by loadBytes) */
rawBytes?: Uint8Array;
};
/**
* A mutation returned by a fixer tool.
* Edit-tool-shaped: inspectable, diffable, approvable.
*/
export type FileMutation = {
/** Path to the file */
readonly path: string;
/** Original content (for diff/preview) */
readonly oldContent: string;
/** New content after fix */
readonly newContent: string;
};
/**
* Options for walking source files.
*/
export type WalkOptions = {
/** Root directory (default: ".") */
readonly root?: string;
/** File extensions to include (e.g., ["ts", "json"]). Empty = all. */
readonly extensions?: readonly string[];
/** Glob/regex patterns to exclude */
readonly exclude?: readonly (string | RegExp)[];
/** Include directories in results (default: false) */
readonly includeDirs?: boolean;
/** When set, only include files matching these paths (incremental mode). */
readonly includeOnly?: readonly string[];
};
// =============================================================================
// Default excludes
// =============================================================================
/** Directories always excluded from file tool scanning */
export const DEFAULT_EXCLUDES: readonly RegExp[] = [
/node_modules/,
/\.git\//,
/\/dist\//,
/etc\/coverage/,
/etc\/temp/,
/\.output\//,
];
// =============================================================================
// File walking
// =============================================================================
/**
* Get list of files tracked by git (respects .gitignore).
* Returns null if not in a git repo or git is unavailable.
*/
const getGitTrackedFiles = async (
root: string,
): Promise<string[] | null> => {
try {
const files = await shellExec
.exec`git ls-files --cached --others --exclude-standard`
.cwd(root)
.noThrow()
.lines();
return files.length > 0 ? files : null;
} catch {
return null;
}
};
/**
* Walk source files in a directory, respecting excludes.
*
* @param options - Walk options
* @returns Array of file entries (metadata only, no content)
*/
export const walkSourceFiles = async (
options: WalkOptions = {},
): Promise<FileEntry[]> => {
const { root = ".", extensions, exclude = [] } = options;
await ensureLib();
const lib = getLib();
if (lib !== null) {
try {
// Go expects dot-prefixed extensions (".ts"), TS uses bare ("ts")
const goExtensions = extensions !== undefined && extensions.length > 0
? extensions.map((e) => e.startsWith(".") ? e : `.${e}`)
: undefined;
// Pass only string patterns to Go; regex patterns are applied post-fetch
const goExcludes = exclude.filter((e): e is string => typeof e === "string");
const raw = lib.symbols.EserAjanCodebaseWalkFiles(
JSON.stringify({ dir: root, extensions: goExtensions, exclude: goExcludes, gitAware: true }),
);
const parsed = JSON.parse(raw) as {
files?: Array<{ path: string; name: string; size: number; isSymlink: boolean }>;
error?: string;
};
if (!parsed.error && parsed.files !== undefined) {
const allExcludes = [
...DEFAULT_EXCLUDES,
...exclude.map((e) => (typeof e === "string" ? new RegExp(e) : e)),
];
let files: FileEntry[] = parsed.files
.filter((f) => !allExcludes.some((re) => re.test(f.path)))
.map((f) => ({ path: f.path, name: f.name, size: f.size, isSymlink: f.isSymlink }));
if (options.includeOnly !== undefined && options.includeOnly.length > 0) {
const allowList = options.includeOnly;
files = files.filter((file) =>
allowList.some((entry) => file.path.endsWith(entry) || file.path.includes(entry))
);
}
return files;
}
} catch { /* fall through to TS */ }
}
const allExcludes = [
...DEFAULT_EXCLUDES,
...exclude.map((e) => (typeof e === "string" ? new RegExp(e) : e)),
];
const files: FileEntry[] = [];
// Try git-aware file listing first (respects .gitignore)
const gitFiles = await getGitTrackedFiles(root);
if (gitFiles !== null) {
// Git-aware path: iterate over git-tracked files
for (const relativePath of gitFiles) {
const fullPath = runtime.path.join(root, relativePath);
const name = runtime.path.basename(relativePath);
// Apply extension filter (bare extensions: "ts", "json", etc.)
if (extensions !== undefined && extensions.length > 0) {
if (!hasExtension(relativePath, extensions)) {
continue;
}
}
// Apply exclude patterns
if (
allExcludes.some((re) => re.test(fullPath) || re.test(relativePath))
) {
continue;
}
let size = 0;
let isSymlink = false;
try {
const stat = await runtime.fs.lstat(fullPath);
size = stat.size;
isSymlink = stat.isSymlink;
// Skip directories
if (stat.isDirectory && !(options.includeDirs ?? false)) {
continue;
}
} catch {
continue;
}
files.push({ path: fullPath, name, size, isSymlink });
}
} else {
// Fallback: filesystem walk (existing behavior)
for await (
const entry of runtime.fs.walk(root, {
includeDirs: options.includeDirs ?? false,
includeFiles: true,
exts: extensions as string[] | undefined,
skip: allExcludes,
})
) {
if (!entry.isFile && !entry.isSymlink) {
continue;
}
let size = 0;
try {
const stat = await runtime.fs.stat(entry.path);
size = stat.size;
} catch {
continue;
}
files.push({
path: entry.path,
name: entry.name,
size,
isSymlink: entry.isSymlink,
});
}
}
// Incremental mode: filter to only changed files when includeOnly is set
if (
options.includeOnly !== undefined && options.includeOnly.length > 0
) {
const allowList = options.includeOnly;
return files.filter((file) =>
allowList.some((entry) =>
file.path.endsWith(entry) || file.path.includes(entry)
)
);
}
return files;
};
// =============================================================================
// Content loading (two-phase)
// =============================================================================
/**
* Load text content for a file entry. Caches the result.
* Returns undefined for binary files.
*/
export const loadContent = async (
file: FileEntry,
): Promise<string | undefined> => {
if (file.textContent !== undefined) {
return file.textContent;
}
try {
const bytes = await loadBytes(file);
if (isBinaryBytes(bytes)) {
return undefined;
}
const text = new TextDecoder().decode(bytes);
(file as { textContent?: string }).textContent = text;
return text;
} catch {
return undefined;
}
};
/**
* Load raw bytes for a file entry. Caches the result.
*/
export const loadBytes = async (file: FileEntry): Promise<Uint8Array> => {
if (file.rawBytes !== undefined) {
return file.rawBytes;
}
const bytes = await runtime.fs.readFile(file.path);
(file as { rawBytes?: Uint8Array }).rawBytes = bytes;
return bytes;
};
// =============================================================================
// Binary detection
// =============================================================================
/**
* Check if raw bytes represent a binary file.
* Scans the first 8KB for null bytes.
*/
export const isBinaryBytes = (bytes: Uint8Array): boolean => {
const scanLength = Math.min(bytes.length, 8192);
for (let i = 0; i < scanLength; i++) {
if (bytes[i] === 0) {
return true;
}
}
return false;
};
// =============================================================================
// Pattern matching
// =============================================================================
/**
* Check if a file path matches any of the given patterns.
*/
export const matchesAnyPattern = (
path: string,
patterns: readonly (string | RegExp)[],
): boolean => {
for (const pattern of patterns) {
if (typeof pattern === "string") {
if (path.includes(pattern) || path.endsWith(pattern)) {
return true;
}
} else if (pattern.test(path)) {
return true;
}
}
return false;
};
/**
* Apply mutations to the in-memory file list.
* Updates textContent and rawBytes for mutated files.
*/
export const applyMutations = (
files: FileEntry[],
mutations: readonly FileMutation[],
): void => {
const mutationMap = new Map<string, FileMutation>();
for (const mutation of mutations) {
mutationMap.set(mutation.path, mutation);
}
for (const file of files) {
const mutation = mutationMap.get(file.path);
if (mutation !== undefined) {
(file as { textContent?: string }).textContent = mutation.newContent;
(file as { rawBytes?: Uint8Array }).rawBytes = undefined; // invalidate bytes cache
}
}
};
/**
* Write all accumulated mutations to disk.
*/
export const writeMutations = async (
mutations: readonly FileMutation[],
): Promise<number> => {
let written = 0;
for (const mutation of mutations) {
if (mutation.oldContent !== mutation.newContent) {
await runtime.fs.writeTextFile(mutation.path, mutation.newContent);
written++;
}
}
return written;
};