Skip to content

Commit a887872

Browse files
committed
Update behavior of read-files to truncate after 100k chars
1 parent 6aed18d commit a887872

File tree

2 files changed

+82
-28
lines changed

2 files changed

+82
-28
lines changed

sdk/src/__tests__/read-files.test.ts

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,13 @@ describe('getFiles', () => {
186186
})
187187

188188
describe('file too large', () => {
189-
test('should return TOO_LARGE for files over 1MB', async () => {
189+
test('should truncate files over 100k chars to 1k chars with message', async () => {
190+
const largeContent = 'x'.repeat(101_000) // 101k chars - over limit
190191
const mockFs = createMockFs({
191192
files: {
192193
'/project/large.bin': {
193-
content: 'x',
194-
size: 2 * 1024 * 1024, // 2MB
194+
content: largeContent,
195+
size: largeContent.length,
195196
},
196197
},
197198
})
@@ -202,28 +203,75 @@ describe('getFiles', () => {
202203
fs: mockFs,
203204
})
204205

205-
expect(result['large.bin']).toContain(FILE_READ_STATUS.TOO_LARGE)
206-
expect(result['large.bin']).toContain('2.00MB')
206+
// Should contain first 1k chars
207+
expect(result['large.bin']).toContain('x'.repeat(1000))
208+
// Should contain truncation message
209+
expect(result['large.bin']).toContain('FILE_TOO_LARGE')
210+
expect(result['large.bin']).toContain('101,000 chars')
211+
})
212+
213+
test('should read files at exactly 100k chars', async () => {
214+
const exactly100kContent = 'x'.repeat(100_000) // exactly 100k chars
215+
const mockFs = createMockFs({
216+
files: {
217+
'/project/exactly100k.bin': {
218+
content: exactly100kContent,
219+
size: exactly100kContent.length,
220+
},
221+
},
222+
})
223+
224+
const result = await getFiles({
225+
filePaths: ['exactly100k.bin'],
226+
cwd: '/project',
227+
fs: mockFs,
228+
})
229+
230+
// Should be read fully (no truncation message)
231+
expect(result['exactly100k.bin']).toBe(exactly100kContent)
232+
expect(result['exactly100k.bin']).not.toContain('FILE_TOO_LARGE')
207233
})
208234

209-
test('should read files exactly at 1MB limit', async () => {
210-
const oneMBContent = 'x'.repeat(1024 * 1024)
235+
test('should reject files over 10MB without reading them', async () => {
211236
const mockFs = createMockFs({
212237
files: {
213-
'/project/exactly1mb.bin': {
214-
content: oneMBContent,
215-
size: 1024 * 1024, // exactly 1MB
238+
'/project/huge.bin': {
239+
content: 'x',
240+
size: 15 * 1024 * 1024, // 15MB
216241
},
217242
},
218243
})
219244

220245
const result = await getFiles({
221-
filePaths: ['exactly1mb.bin'],
246+
filePaths: ['huge.bin'],
222247
cwd: '/project',
223248
fs: mockFs,
224249
})
225250

226-
expect(result['exactly1mb.bin']).toBe(oneMBContent)
251+
expect(result['huge.bin']).toContain(FILE_READ_STATUS.TOO_LARGE)
252+
expect(result['huge.bin']).toContain('15.0MB')
253+
})
254+
255+
test('should read files just under 100k chars', async () => {
256+
const justUnder100k = 'x'.repeat(99_000) // under limit
257+
const mockFs = createMockFs({
258+
files: {
259+
'/project/underlimit.bin': {
260+
content: justUnder100k,
261+
size: justUnder100k.length,
262+
},
263+
},
264+
})
265+
266+
const result = await getFiles({
267+
filePaths: ['underlimit.bin'],
268+
cwd: '/project',
269+
fs: mockFs,
270+
})
271+
272+
// Should be read fully (no truncation message)
273+
expect(result['underlimit.bin']).toBe(justUnder100k)
274+
expect(result['underlimit.bin']).not.toContain('FILE_TOO_LARGE')
227275
})
228276
})
229277

@@ -347,18 +395,6 @@ describe('getFiles', () => {
347395
},
348396
})
349397

350-
// Need to also make stat fail with same error
351-
const originalStat = mockFs.stat
352-
Object.assign(mockFs, {
353-
stat: async (filePath: PathLike) => {
354-
const pathStr = String(filePath)
355-
if (pathStr === '/project/broken.ts') {
356-
throw createNodeError('Permission denied', 'EACCES')
357-
}
358-
return originalStat(pathStr)
359-
},
360-
})
361-
362398
const result = await getFiles({
363399
filePaths: ['broken.ts'],
364400
cwd: '/project',

sdk/src/tools/read-files.ts

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ export async function getFiles(params: {
2828
const hasCustomFilter = fileFilter !== undefined
2929

3030
const result: Record<string, string | null> = {}
31-
const MAX_FILE_SIZE = 1024 * 1024 // 1MB in bytes
31+
const MAX_FILE_BYTES = 10 * 1024 * 1024 // 10MB - skip reading entirely
32+
const MAX_CHARS = 100_000 // 100k characters threshold
33+
const TRUNCATE_TO_CHARS = 1_000 // Show first 1k chars when over limit
34+
const numFmt = new Intl.NumberFormat('en-US')
35+
const fmtNum = (n: number) => numFmt.format(n)
3236

3337
for (const filePath of filePaths) {
3438
if (!filePath) {
@@ -68,13 +72,27 @@ export async function getFiles(params: {
6872
}
6973

7074
try {
75+
// Safety check: skip reading files over 10MB to avoid OOM
7176
const stats = await fs.stat(fullPath)
72-
if (stats.size > MAX_FILE_SIZE) {
77+
if (stats.size > MAX_FILE_BYTES) {
7378
result[relativePath] =
7479
FILE_READ_STATUS.TOO_LARGE +
75-
` [${(stats.size / (1024 * 1024)).toFixed(2)}MB]`
80+
` [${(stats.size / (1024 * 1024)).toFixed(1)}MB exceeds 10MB limit. Use code_search or glob to find specific content.]`
81+
continue
82+
}
83+
84+
const content = await fs.readFile(fullPath, 'utf8')
85+
86+
if (content.length > MAX_CHARS) {
87+
const truncated = content.slice(0, TRUNCATE_TO_CHARS)
88+
result[relativePath] =
89+
truncated +
90+
'\n\n[FILE_TOO_LARGE: This file is ' +
91+
fmtNum(content.length) +
92+
' chars, exceeding the 100k char limit. Only the first ' +
93+
fmtNum(TRUNCATE_TO_CHARS) +
94+
' chars are shown. Use other tools to read sections of the file.]'
7695
} else {
77-
const content = await fs.readFile(fullPath, 'utf8')
7896
// Prepend TEMPLATE marker for example files
7997
result[relativePath] = isExampleFile
8098
? FILE_READ_STATUS.TEMPLATE + '\n' + content

0 commit comments

Comments
 (0)