@@ -57,6 +57,7 @@ async function runTask(options: {
5757 printEvents : boolean
5858 finalCheckCommands ?: string [ ]
5959 disableAnalysis ?: boolean
60+ saveTraces ?: boolean
6061} ) {
6162 const {
6263 client,
@@ -74,6 +75,7 @@ async function runTask(options: {
7475 printEvents,
7576 finalCheckCommands,
7677 disableAnalysis,
78+ saveTraces = false ,
7779 } = options
7880
7981 console . log (
@@ -173,6 +175,21 @@ async function runTask(options: {
173175 finalCheckOutputs : agentResult . finalCheckOutputs ,
174176 } )
175177
178+ // Save judge traces to separate files if saveTraces is enabled
179+ if ( saveTraces ) {
180+ const tracesDir = path . join ( logsDir , 'traces' )
181+ if ( ! fs . existsSync ( tracesDir ) ) {
182+ fs . mkdirSync ( tracesDir , { recursive : true } )
183+ }
184+
185+ // Save agent trace only (not judge traces)
186+ const agentTracePath = path . join (
187+ tracesDir ,
188+ `${ index + 1 } -${ safeTaskId } -${ safeAgentId } -${ safeCommitShort } -agent.json` ,
189+ )
190+ fs . writeFileSync ( agentTracePath , JSON . stringify ( agentResult . trace , null , 2 ) )
191+ }
192+
176193 fs . writeFileSync (
177194 tracePath ,
178195 JSON . stringify ( commitTraces [ commitTraces . length - 1 ] , null , 2 ) ,
@@ -300,6 +317,7 @@ export async function runBuffBench(options: {
300317 taskIds ?: string [ ]
301318 extractLessons ?: boolean
302319 disableAnalysis ?: boolean
320+ saveTraces ?: boolean
303321} ) {
304322 const {
305323 evalDataPaths,
@@ -308,6 +326,7 @@ export async function runBuffBench(options: {
308326 taskIds,
309327 extractLessons = false ,
310328 disableAnalysis = false ,
329+ saveTraces = false ,
311330 } = options
312331
313332 if ( evalDataPaths . length === 0 ) {
@@ -453,6 +472,7 @@ export async function runBuffBench(options: {
453472 printEvents : agents . length === 1 && taskConcurrency === 1 ,
454473 finalCheckCommands : evalData . finalCheckCommands ,
455474 disableAnalysis,
475+ saveTraces,
456476 } ) ,
457477 )
458478 } )
0 commit comments