From d62e6d1488a636510addfb786e095ae31d24b6bb Mon Sep 17 00:00:00 2001 From: elainefan331 Date: Wed, 1 Apr 2026 10:36:28 -0400 Subject: [PATCH 1/5] fix: increase token limit sent to LLM --- .../Dashboard/DatasetOrganizer/LLMPanel.tsx | 197 ++++++++++++++---- .../utils/filenameTokenizer.ts | 116 ++++++++++- .../DatasetOrganizer/utils/llmPrompts.ts | 6 +- 3 files changed, 265 insertions(+), 54 deletions(-) diff --git a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx index d9d6366..a8fd052 100644 --- a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx +++ b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx @@ -184,6 +184,7 @@ const LLMPanel: React.FC = ({ }); setEvidenceBundle(bundle); + setSubjectAnalysis(null); // ← add this line downloadJSON(bundle, "evidence_bundle.json"); setStatus("✓ Evidence bundle generated and downloaded!"); } catch (err: any) { @@ -380,6 +381,43 @@ const LLMPanel: React.FC = ({ setStatus("3/3 Generating participants.tsv..."); const partsPrompt = getParticipantsPrompt(userText); + // ← ADD HERE: compute subject analysis before try block so it's in scope + const currentSubjectAnalysis = extractSubjectAnalysis( + evidenceBundle?.all_files || [], + evidenceBundle?.user_hints?.n_subjects, + evidenceBundle?.filename_analysis?.python_statistics + ?.dominant_prefixes + ); + + console.log("=== PARTICIPANTS DEBUG ==="); + console.log("method:", currentSubjectAnalysis?.method); + console.log("subject_count:", currentSubjectAnalysis?.subject_count); + console.log( + "id_mapping:", + currentSubjectAnalysis?.id_mapping?.id_mapping + ); + console.log( + "reverse_mapping:", + currentSubjectAnalysis?.id_mapping?.reverse_mapping + ); + console.log( + "subject_records sample:", + currentSubjectAnalysis?.subject_records?.slice(0, 3) + ); + const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping; + const expectedCount = evidenceBundle?.user_hints?.n_subjects; + const subjectLabels: string[] = + idMap && + Object.keys(idMap).length > 0 && + (!expectedCount || Object.keys(idMap).length === expectedCount) + ? Object.values(idMap).map((id: string) => `sub-${id}`) + : Array.from( + { + length: expectedCount || Object.keys(idMap || {}).length || 1, + }, + (_, i) => `sub-${String(i + 1).padStart(2, "0")}` + ); + let partsResponse; if (currentProvider.isAnthropic) { partsResponse = await fetch(currentProvider.baseUrl, { @@ -435,52 +473,130 @@ const LLMPanel: React.FC = ({ : partsData.choices[0].message.content; // Build TSV from schema + // try { + // const schemaText = participantsRaw + // .replace(/^```json\n?/g, "") + // .replace(/\n?```$/g, "") + // .trim(); + // const schema = JSON.parse(schemaText); + // const columns: string[] = schema.columns.map((c: any) => c.name); + + // // Get subject IDs from evidence bundle (extracted by Python-style analysis) + // // const idMapping = + // // evidenceBundle?.subject_analysis?.id_mapping?.id_mapping; + // // const subjectLabels: string[] = idMapping + // // ? Object.values(idMapping).map((id) => `sub-${id}`) + // // : ["sub-01"]; // fallback if no subject analysis + // // Get subject IDs from subjectAnalysis state (computed at plan stage) + // // Fall back to computing fresh if plan hasn't been run yet + // const currentSubjectAnalysis = + // subjectAnalysis || + // extractSubjectAnalysis( + // evidenceBundle?.all_files || [], + // evidenceBundle?.user_hints?.n_subjects, + // evidenceBundle?.filename_analysis?.python_statistics + // ?.dominant_prefixes + // ); + // const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping; + // const subjectLabels: string[] = + // idMap && Object.keys(idMap).length > 0 + // ? Object.values(idMap).map((id) => `sub-${id}`) + // : Array.from( + // { length: evidenceBundle?.user_hints?.n_subjects || 1 }, + // (_, i) => `sub-${String(i + 1).padStart(2, "0")}` + // ); + + // const header = columns.join("\t"); + // // ====origin==== + // // const rows = subjectLabels.map((subId) => + // // columns + // // .map((col: string) => (col === "participant_id" ? subId : "n/a")) + // // .join("\t") + // // ); + // //====== end ====== + // // =====update start===== + // const reverseMap = + // currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; + // const subjectRecords = currentSubjectAnalysis?.subject_records || []; + + // const rows = subjectLabels.map((subId) => { + // const bareId = subId.replace(/^sub-/, ""); + // const originalId = reverseMap[bareId]; + // const record = subjectRecords.find( + // (r: any) => r.original_id === originalId + // ); + // return columns + // .map((col: string) => { + // if (col === "participant_id") return subId; + // if (col === "original_id") return originalId || "n/a"; + // if (col === "group") return (record as any)?.group || "n/a"; + // return "n/a"; + // }) + // .join("\t"); + // }); + // //====update end====== + // participantsContent = [header, ...rows].join("\n"); + // } catch (e) { + // // Fallback: LLM didn't return valid JSON schema, use raw content + // participantsContent = participantsRaw + // .replace(/^```\n?/g, "") + // .replace(/\n?```$/g, "") + // .trim(); + // } + // Build TSV from schema + subject analysis + // Mirrors _generate_participants_tsv_from_python() in planner.py try { const schemaText = participantsRaw .replace(/^```json\n?/g, "") .replace(/\n?```$/g, "") .trim(); const schema = JSON.parse(schemaText); - const columns: string[] = schema.columns.map((c: any) => c.name); - - // Get subject IDs from evidence bundle (extracted by Python-style analysis) - // const idMapping = - // evidenceBundle?.subject_analysis?.id_mapping?.id_mapping; - // const subjectLabels: string[] = idMapping - // ? Object.values(idMapping).map((id) => `sub-${id}`) - // : ["sub-01"]; // fallback if no subject analysis - // Get subject IDs from subjectAnalysis state (computed at plan stage) - // Fall back to computing fresh if plan hasn't been run yet - const currentSubjectAnalysis = - subjectAnalysis || - extractSubjectAnalysis( - evidenceBundle?.all_files || [], - evidenceBundle?.user_hints?.n_subjects, - evidenceBundle?.filename_analysis?.python_statistics - ?.dominant_prefixes + + // LLM decides extra demographic columns (sex, age, group etc.) + // but we always add participant_id and original_id ourselves + const extraColumns: string[] = schema.columns + .map((c: any) => c.name) + .filter( + (name: string) => + name !== "participant_id" && name !== "original_id" ); - const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping; - const subjectLabels: string[] = - idMap && Object.keys(idMap).length > 0 - ? Object.values(idMap).map((id) => `sub-${id}`) - : Array.from( - { length: evidenceBundle?.user_hints?.n_subjects || 1 }, - (_, i) => `sub-${String(i + 1).padStart(2, "0")}` - ); + + // Always start with participant_id and original_id + const columns = ["participant_id", "original_id", ...extraColumns]; + + const reverseMap = + currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; + const subjectRecords = currentSubjectAnalysis?.subject_records || []; const header = columns.join("\t"); - const rows = subjectLabels.map((subId) => - columns - .map((col: string) => (col === "participant_id" ? subId : "n/a")) - .join("\t") - ); + const rows = subjectLabels.map((subId) => { + const bareId = subId.replace(/^sub-/, ""); + const originalId = reverseMap[bareId] || "n/a"; + const record = subjectRecords.find( + (r: any) => r.original_id === originalId + ); + return columns + .map((col: string) => { + if (col === "participant_id") return subId; + if (col === "original_id") return originalId; + if (col === "group") return (record as any)?.group || "n/a"; + return "n/a"; + }) + .join("\t"); + }); + participantsContent = [header, ...rows].join("\n"); } catch (e) { - // Fallback: LLM didn't return valid JSON schema, use raw content - participantsContent = participantsRaw - .replace(/^```\n?/g, "") - .replace(/\n?```$/g, "") - .trim(); + // Fallback: generate minimal TSV directly from subject analysis + const reverseMap = + currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; + const header = "participant_id\toriginal_id"; + const rows = subjectLabels.map((subId) => { + const bareId = subId.replace(/^sub-/, ""); + const originalId = reverseMap[bareId] || "n/a"; + return `${subId}\t${originalId}`; + }); + participantsContent = [header, ...rows].join("\n"); } } // ========================================== @@ -632,11 +748,6 @@ const LLMPanel: React.FC = ({ const filePatterns = analyzeFilePatterns(files); const userContext = getUserContext(files); const annotations = getFileAnnotations(files); - // console.log("=== PROMPT BEING SENT TO LLM ==="); - // console.log(fileSummary); - // console.log(filePatterns); - // console.log(userContext); - // console.log("================================="); // UPDATED: Improved prompt that uses trio files const prompt = getConversionScriptPrompt( @@ -788,6 +899,7 @@ const LLMPanel: React.FC = ({ userNSubjects, dominantPrefixes ); + setSubjectAnalysis(computedSubjectAnalysis); const fileSummary = buildFileSummary(files); @@ -801,11 +913,6 @@ const LLMPanel: React.FC = ({ .map((s: any) => ` - ${s.relpath}`) .join("\n") || ""; - // console.log("=== SAMPLE FILES ==="); - // console.log(sampleFiles); - // console.log("=== COUNTS BY EXT ==="); - // console.log(evidenceBundle?.counts_by_ext); - const prompt = getBIDSPlanPrompt( fileSummary, filePatterns, diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts index 1c1820b..4708c13 100644 --- a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts +++ b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts @@ -7,6 +7,7 @@ export interface SubjectRecord { site: string | null; pattern_name: string; file_count: number; + group?: string; } export interface SubjectAnalysis { @@ -359,14 +360,52 @@ const extractNumericIdFromIdentifier = (identifier: string): string | null => { // ── Step 1: Directory structure patterns // Mirrors _extract_subjects_from_directory_structure() in planner.py +const SKIP_DIRS = new Set([ + "anat", + "func", + "dwi", + "fmap", + "nirs", + "meg", + "eeg", + "beh", + "perf", + "derivatives", + "sourcedata", + "stimuli", + "walking", + "resting", + "resting_state", + "run", + "ses", + "pd", + "control", + "hc", + "task", + "sub", + "dataset", + "data", + "raw", + "bids", + "output", + "outputs", + "staging", + "_staging", + "mri", + "fnirs", + "edf", + "dicom", +]); + const extractFromDirectoryStructure = ( allFiles: string[] ): Omit | null => { const patterns: Array<[RegExp, boolean, number, number | null, string]> = [ - [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"], - [/^sub-(\w+)$/, false, 1, null, "standard_bids"], // directory named sub-01 - [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"], - [/^\d{3,}$/, false, 1, null, "numeric_only"], // directory named 001 + [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"], // Beijing_sub82352 + [/^sub-(\w+)$/, false, 1, null, "standard_bids"], // sub-01 + [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"], // subject_01 + [/^\d{3,}$/, false, 1, null, "numeric_only"], // 001 + [/^([A-Za-z]+\d+)$/, false, 1, null, "alphanum_id"], // PD01, Control01, HC03 ]; const subjectRecords: SubjectRecord[] = []; @@ -374,11 +413,14 @@ const extractFromDirectoryStructure = ( for (const filepath of allFiles) { const parts = filepath.split("/"); - // Only check the first 2 path parts (directory levels), not the filename - // mirrors: for part in parts[:2] - const dirsOnly = parts.slice(0, Math.min(2, parts.length - 1)); // exclude filename + // Check ALL directory levels (not just first 2) + const dirsOnly = parts.slice(0, parts.length - 1); + // const dirsOnly = parts.slice(0, Math.min(2, parts.length - 1)); // only first 2 levels for (const part of dirsOnly) { + // Skip known non-subject directory names + // if (SKIP_DIRS.has(part.toLowerCase())) continue; + for (const [ regex, hasSite, @@ -393,7 +435,7 @@ const extractFromDirectoryStructure = ( seenIds.add(originalId); subjectRecords.push({ original_id: originalId, - numeric_id: match[idGroup], + numeric_id: match[idGroup] || match[0], site: hasSite && siteGroup ? match[siteGroup] : null, pattern_name: patternName, file_count: 0, @@ -407,11 +449,41 @@ const extractFromDirectoryStructure = ( if (subjectRecords.length === 0) return null; subjectRecords.sort((a, b) => { + // const na = parseInt(a.numeric_id) || 0; + // const nb = parseInt(b.numeric_id) || 0; + // return na - nb; + const aMatch = a.original_id.match(/^([A-Za-z]+)(\d+)$/); + const bMatch = b.original_id.match(/^([A-Za-z]+)(\d+)$/); + + if (aMatch && bMatch) { + const prefixCompare = aMatch[1].localeCompare(bMatch[1]); + if (prefixCompare !== 0) return prefixCompare; + return parseInt(aMatch[2]) - parseInt(bMatch[2]); + } + const na = parseInt(a.numeric_id) || 0; const nb = parseInt(b.numeric_id) || 0; return na - nb; }); + // Build group map: subject originalId → parent directory name + // const groupMap: Record = {}; + // for (const filepath of allFiles) { + // const parts = filepath.split("/"); + // for (let i = 1; i < parts.length - 1; i++) { + // if (seenIds.has(parts[i]) && !SKIP_DIRS.has(parts[i - 1].toLowerCase())) { + // groupMap[parts[i]] = parts[i - 1]; + // } + // } + // } + + // // Attach group to each record + // for (const rec of subjectRecords) { + // if (groupMap[rec.original_id]) { + // rec.group = groupMap[rec.original_id]; + // } + // } + return { success: true, method: "directory_structure", @@ -551,6 +623,34 @@ export const extractSubjectAnalysis = ( python_generated_filename_rules: [], }; } + // bug fix for subject mapping + // === original + // const idMapping = generateIdMapping(subjectInfo); + // return { ...subjectInfo, id_mapping: idMapping }; + // ==== end + // ==== updates + // CRITICAL: n_subjects is authoritative (mirrors planner.py PROMPT_BIDS_PLAN) + // If analysis count doesn't match user input, fall back to sequential numbering + const expectedCount = userNSubjects; + if (expectedCount && subjectInfo.subject_count !== expectedCount) { + const idMap: Record = {}; + const reverseMap: Record = {}; + for (let i = 1; i <= expectedCount; i++) { + const bidsId = String(i).padStart(2, "0"); + idMap[`sub-${bidsId}`] = bidsId; + reverseMap[bidsId] = `sub-${bidsId}`; + } + return { + ...subjectInfo, + subject_count: expectedCount, + id_mapping: { + id_mapping: idMap, + reverse_mapping: reverseMap, + strategy_used: "numeric_fallback", + metadata_columns: [], + }, + }; + } const idMapping = generateIdMapping(subjectInfo); return { ...subjectInfo, id_mapping: idMapping }; diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts index 6d6a796..be03f83 100644 --- a/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts +++ b/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts @@ -9,9 +9,13 @@ export const getDatasetDescriptionPrompt = ( evidenceBundle?: any ): string => { const documentsContext = + // evidenceBundle?.documents + // ?.map((d: any) => `[${d.filename}]:\n${d.content}`) + // .join("\n\n") || ""; evidenceBundle?.documents - ?.map((d: any) => `[${d.filename}]:\n${d.content}`) + ?.map((d: any) => `[${d.filename}]:\n${(d.content || "").slice(0, 500)}`) .join("\n\n") || ""; + return `You are a BIDS dataset_description.json generator. CRITICAL: Use the following user-provided content to extract dataset information! From 843f0a21d95bc273bb2374f57647a6b6bc69cd95 Mon Sep 17 00:00:00 2001 From: elainefan331 Date: Wed, 1 Apr 2026 15:05:56 -0400 Subject: [PATCH 2/5] fix: resolve leaks in previewModal 3D viewer --- src/components/PreviewModal.tsx | 36 +- src/utils/preview.js | 600 +++++++++++++++++++++++--------- 2 files changed, 454 insertions(+), 182 deletions(-) diff --git a/src/components/PreviewModal.tsx b/src/components/PreviewModal.tsx index 4a3ce82..bf81318 100644 --- a/src/components/PreviewModal.tsx +++ b/src/components/PreviewModal.tsx @@ -28,30 +28,14 @@ const PreviewModal: React.FC<{ // fix end--------------------- useEffect(() => { - if (!isOpen) return; - //add spinner - // if (!isOpen || isLoading) return; - - // fix start-----------: Get the container element from the ref. - // const container = canvasContainerRef.current; - // if (!container) { - // // This can happen briefly on the first render, so we just wait for the next render. - // return; - // } - // // 3. Check for the required legacy functions on the window object. - // if ( - // typeof window.previewdata !== "function" || - // typeof window.initcanvas_with_container !== "function" - // ) { - // console.error( - // "❌ Legacy preview script functions are not available on the window object." - // ); - // return; - // } - - // window.previewdata(dataKey, previewIndex, isInternal, false); - // fix end--------------------------------- - // clear old canvas + // if (!isOpen) return; + if (!isOpen) { + // Modal just closed — clean up Three.js immediately + if (typeof window.destroyPreview === "function") { + window.destroyPreview(); + } + return; + } const canvasDiv = document.getElementById("canvas"); if (canvasDiv) while (canvasDiv.firstChild) canvasDiv.removeChild(canvasDiv.firstChild); @@ -69,6 +53,10 @@ const PreviewModal: React.FC<{ return () => { clearInterval(interval); + // Component unmounting — clean up Three.js + if (typeof window.destroyPreview === "function") { + window.destroyPreview(); + } }; }, [isOpen, dataKey, previewIndex, isInternal]); diff --git a/src/utils/preview.js b/src/utils/preview.js index 912c8f8..496e855 100644 --- a/src/utils/preview.js +++ b/src/utils/preview.js @@ -74,18 +74,55 @@ var typedfun = { BigUint64Array: null, }; +// function destroyPreview() { +// if (window.scene) { +// while (window.scene.children.length > 0) { +// const obj = window.scene.children[0]; +// window.scene.remove(obj); +// if (obj.geometry) obj.geometry.dispose(); +// if (obj.material) obj.material.dispose(); +// } +// } + +// if (window.renderer && window.renderer.domElement) { +// window.renderer.domElement.remove(); +// window.renderer.dispose(); +// } + +// window.scene = undefined; +// window.camera = undefined; +// window.renderer = undefined; +// window.controls = undefined; +// window.reqid = undefined; +// } function destroyPreview() { + // Cancel animation loop first + if (reqid !== undefined) { + cancelAnimationFrame(reqid); + reqid = undefined; + window.reqid = undefined; + } + if (window.scene) { while (window.scene.children.length > 0) { const obj = window.scene.children[0]; window.scene.remove(obj); if (obj.geometry) obj.geometry.dispose(); - if (obj.material) obj.material.dispose(); + if (obj.material) { + // Dispose any textures stored in shader uniforms + if (obj.material.uniforms) { + Object.values(obj.material.uniforms).forEach((u) => { + if (u.value && u.value.isTexture) u.value.dispose(); + }); + } + obj.material.dispose(); + } } } if (window.renderer && window.renderer.domElement) { window.renderer.domElement.remove(); + window.renderer.forceContextLoss(); window.renderer.dispose(); } @@ -93,7 +130,9 @@ function destroyPreview() { window.camera = undefined; window.renderer = undefined; window.controls = undefined; - window.reqid = undefined; + lastvolume = null; + lastvolumedata = null; + texture = undefined; } function drawpreview(cfg) { @@ -967,183 +1006,428 @@ function initcanvas() { panel.appendChild(stats.domElement); } - $("#camera-near").on("input", function () { - camera.near = parseFloat($(this).val()); - renderer.render(scene, camera); - controls.update(); - renderer.updateComplete = false; - }); + // $("#camera-near").on("input", function () { + // camera.near = parseFloat($(this).val()); + // renderer.render(scene, camera); + // controls.update(); + // renderer.updateComplete = false; + // }); - $("#camera-far").on("input", function () { - camera.far = parseFloat($(this).val()); - renderer.render(scene, camera); - controls.update(); - renderer.updateComplete = false; - }); + // $("#camera-far").on("input", function () { + // camera.far = parseFloat($(this).val()); + // renderer.render(scene, camera); + // controls.update(); + // renderer.updateComplete = false; + // }); - $("#clim-low").on("input", function () { - $(this).prop( - "title", - "" + - $(this).val() + - " [" + - $(this).prop("min") + - "," + - $(this).prop("max") + - "]" - ); - if (lastvolume !== null) { - let val = lastvolume.material.uniforms["u_clim"].value; - lastvolume.material.uniforms["u_clim"].value.set( - parseFloat($(this).val()), - val.y - ); - renderer.updateComplete = false; - } - }); + // $("#clim-low").on("input", function () { + // $(this).prop( + // "title", + // "" + + // $(this).val() + + // " [" + + // $(this).prop("min") + + // "," + + // $(this).prop("max") + + // "]" + // ); + // if (lastvolume !== null) { + // let val = lastvolume.material.uniforms["u_clim"].value; + // lastvolume.material.uniforms["u_clim"].value.set( + // parseFloat($(this).val()), + // val.y + // ); + // renderer.updateComplete = false; + // } + // }); - $("#clim-hi").on("input", function () { - $(this).prop( - "title", - "" + - $(this).val() + - " [" + - $(this).prop("min") + - "," + - $(this).prop("max") + - "]" - ); - if (lastvolume !== null) { - let val = lastvolume.material.uniforms["u_clim"].value; - lastvolume.material.uniforms["u_clim"].value.set( - val.x, - parseFloat($(this).val()) - ); - renderer.updateComplete = false; - } - }); + // $("#clim-hi").on("input", function () { + // $(this).prop( + // "title", + // "" + + // $(this).val() + + // " [" + + // $(this).prop("min") + + // "," + + // $(this).prop("max") + + // "]" + // ); + // if (lastvolume !== null) { + // let val = lastvolume.material.uniforms["u_clim"].value; + // lastvolume.material.uniforms["u_clim"].value.set( + // val.x, + // parseFloat($(this).val()) + // ); + // renderer.updateComplete = false; + // } + // }); - $("#isothreshold").on("input", function () { - $(this).prop( - "title", - "" + - $(this).val() + - " [" + - $(this).prop("min") + - "," + - $(this).prop("max") + - "]" - ); - if (lastvolume !== null) { - let val = lastvolume.material.uniforms["u_renderthreshold"].value; - lastvolume.material.uniforms["u_renderthreshold"].value = parseFloat( - $(this).val() - ); - renderer.updateComplete = false; - } - }); + // $("#isothreshold").on("input", function () { + // $(this).prop( + // "title", + // "" + + // $(this).val() + + // " [" + + // $(this).prop("min") + + // "," + + // $(this).prop("max") + + // "]" + // ); + // if (lastvolume !== null) { + // let val = lastvolume.material.uniforms["u_renderthreshold"].value; + // lastvolume.material.uniforms["u_renderthreshold"].value = parseFloat( + // $(this).val() + // ); + // renderer.updateComplete = false; + // } + // }); - $("#mip-radio-button").on("change", function () { - if (lastvolume !== null) { - const unfs = lastvolume.material.uniforms; - lastvolume.material = new THREE.ShaderMaterial({ - uniforms: THREE.UniformsUtils.clone(MipRenderShader.uniforms), - vertexShader: MipRenderShader.vertexShader, - fragmentShader: MipRenderShader.fragmentShader, - side: THREE.BackSide, - }); - lastvolume.material.uniforms = unfs; - renderer.updateComplete = false; - } - }); + // $("#mip-radio-button").on("change", function () { + // if (lastvolume !== null) { + // const unfs = lastvolume.material.uniforms; + // lastvolume.material = new THREE.ShaderMaterial({ + // uniforms: THREE.UniformsUtils.clone(MipRenderShader.uniforms), + // vertexShader: MipRenderShader.vertexShader, + // fragmentShader: MipRenderShader.fragmentShader, + // side: THREE.BackSide, + // }); + // lastvolume.material.uniforms = unfs; + // renderer.updateComplete = false; + // } + // }); - $("#iso-radio-button").on("change", function () { - if (lastvolume !== null) { - const unfs = lastvolume.material.uniforms; - lastvolume.material = new THREE.ShaderMaterial({ - uniforms: THREE.UniformsUtils.clone(IsoRenderShader.uniforms), - vertexShader: IsoRenderShader.vertexShader, - fragmentShader: IsoRenderShader.fragmentShader, - side: THREE.BackSide, - }); - lastvolume.material.uniforms = unfs; - renderer.updateComplete = false; - } - }); + // $("#iso-radio-button").on("change", function () { + // if (lastvolume !== null) { + // const unfs = lastvolume.material.uniforms; + // lastvolume.material = new THREE.ShaderMaterial({ + // uniforms: THREE.UniformsUtils.clone(IsoRenderShader.uniforms), + // vertexShader: IsoRenderShader.vertexShader, + // fragmentShader: IsoRenderShader.fragmentShader, + // side: THREE.BackSide, + // }); + // lastvolume.material.uniforms = unfs; + // renderer.updateComplete = false; + // } + // }); - $("#interp-radio-button").on("change", function () { - if (lastvolume !== null) { - const unfs = lastvolume.material.uniforms; - lastvolume.material = new THREE.RawShaderMaterial(InterpRenderShader()); - lastvolume.material.uniforms = unfs; - lastvolume.material.uniforms.cameraPos.value.copy(camera.position); - renderer.updateComplete = false; - } - }); + // $("#interp-radio-button").on("change", function () { + // if (lastvolume !== null) { + // const unfs = lastvolume.material.uniforms; + // lastvolume.material = new THREE.RawShaderMaterial(InterpRenderShader()); + // lastvolume.material.uniforms = unfs; + // lastvolume.material.uniforms.cameraPos.value.copy(camera.position); + // renderer.updateComplete = false; + // } + // }); - $("#cross-x-low").on("input", function () { - setcrosssectionsizes(this); - }); + // $("#cross-x-low").on("input", function () { + // setcrosssectionsizes(this); + // }); - $("#cross-y-low").on("input", function () { - setcrosssectionsizes(this); - }); + // $("#cross-y-low").on("input", function () { + // setcrosssectionsizes(this); + // }); - $("#cross-z-low").on("input", function () { - setcrosssectionsizes(this); - }); + // $("#cross-z-low").on("input", function () { + // setcrosssectionsizes(this); + // }); - $("#cross-x-hi").on("input", function () { - setcrosssectionsizes(this); - }); + // $("#cross-x-hi").on("input", function () { + // setcrosssectionsizes(this); + // }); - $("#cross-y-hi").on("input", function () { - setcrosssectionsizes(this); - }); + // $("#cross-y-hi").on("input", function () { + // setcrosssectionsizes(this); + // }); - $("#cross-z-hi").on("input", function () { - setcrosssectionsizes(this); - }); + // $("#cross-z-hi").on("input", function () { + // setcrosssectionsizes(this); + // }); - $("#x_thickness, #y_thickness, #z_thickness").on("input", function () { - let eid = $(this).attr("id"); - let linkedeid1 = eid.replace(/_thickness/, "-low").replace(/^/, "cross-"); - let linkedeid2 = eid.replace(/_thickness/, "-hi").replace(/^/, "cross-"); - if ($(this).val() == 0) { - $("#" + linkedeid1).val(0); - $("#" + linkedeid2).val(1); - } else { - $("#" + linkedeid1).val( - ($("#" + linkedeid1).val() + $("#" + linkedeid2).val()) * 0.5 + // $("#x_thickness, #y_thickness, #z_thickness").on("input", function () { + // let eid = $(this).attr("id"); + // let linkedeid1 = eid.replace(/_thickness/, "-low").replace(/^/, "cross-"); + // let linkedeid2 = eid.replace(/_thickness/, "-hi").replace(/^/, "cross-"); + // if ($(this).val() == 0) { + // $("#" + linkedeid1).val(0); + // $("#" + linkedeid2).val(1); + // } else { + // $("#" + linkedeid1).val( + // ($("#" + linkedeid1).val() + $("#" + linkedeid2).val()) * 0.5 + // ); + // } + // setcrosssectionsizes($("#" + linkedeid1)); + // }); + + // $("#pos-x-view").on("click", function () { + // setControlAngles((Math.PI * 90) / 180, (Math.PI * 90) / 180); + // renderer.updateComplete = false; + // }); + + // $("#neg-x-view").on("click", function () { + // setControlAngles((Math.PI * 90) / 180, (Math.PI * 270) / 180); + // }); + + // $("#pos-y-view").on("click", function () { + // setControlAngles((Math.PI * 90) / 180, (Math.PI * 180) / 180); + // }); + + // $("#neg-y-view").on("click", function () { + // setControlAngles((Math.PI * 90) / 180, (Math.PI * 0) / 180); + // }); + + // $("#pos-z-view").on("click", function () { + // setControlAngles(0, 0); + // }); + + // $("#neg-z-view").on("click", function () { + // setControlAngles((Math.PI * 180) / 180, 0); + // }); + + $("#camera-near") + .off("input") + .on("input", function () { + camera.near = parseFloat($(this).val()); + renderer.render(scene, camera); + controls.update(); + renderer.updateComplete = false; + }); + + $("#camera-far") + .off("input") + .on("input", function () { + camera.far = parseFloat($(this).val()); + renderer.render(scene, camera); + controls.update(); + renderer.updateComplete = false; + }); + + $("#clim-low") + .off("input") + .on("input", function () { + $(this).prop( + "title", + "" + + $(this).val() + + " [" + + $(this).prop("min") + + "," + + $(this).prop("max") + + "]" ); - } - setcrosssectionsizes($("#" + linkedeid1)); - }); + if (lastvolume !== null) { + let val = lastvolume.material.uniforms["u_clim"].value; + lastvolume.material.uniforms["u_clim"].value.set( + parseFloat($(this).val()), + val.y + ); + renderer.updateComplete = false; + } + }); - $("#pos-x-view").on("click", function () { - setControlAngles((Math.PI * 90) / 180, (Math.PI * 90) / 180); - renderer.updateComplete = false; - }); + $("#clim-hi") + .off("input") + .on("input", function () { + $(this).prop( + "title", + "" + + $(this).val() + + " [" + + $(this).prop("min") + + "," + + $(this).prop("max") + + "]" + ); + if (lastvolume !== null) { + let val = lastvolume.material.uniforms["u_clim"].value; + lastvolume.material.uniforms["u_clim"].value.set( + val.x, + parseFloat($(this).val()) + ); + renderer.updateComplete = false; + } + }); - $("#neg-x-view").on("click", function () { - setControlAngles((Math.PI * 90) / 180, (Math.PI * 270) / 180); - }); + $("#isothreshold") + .off("input") + .on("input", function () { + $(this).prop( + "title", + "" + + $(this).val() + + " [" + + $(this).prop("min") + + "," + + $(this).prop("max") + + "]" + ); + if (lastvolume !== null) { + lastvolume.material.uniforms["u_renderthreshold"].value = parseFloat( + $(this).val() + ); + renderer.updateComplete = false; + } + }); - $("#pos-y-view").on("click", function () { - setControlAngles((Math.PI * 90) / 180, (Math.PI * 180) / 180); - }); + $("#mip-radio-button") + .off("change") + .on("change", function () { + if (lastvolume !== null) { + const unfs = lastvolume.material.uniforms; + lastvolume.material = new THREE.ShaderMaterial({ + uniforms: THREE.UniformsUtils.clone(MipRenderShader.uniforms), + vertexShader: MipRenderShader.vertexShader, + fragmentShader: MipRenderShader.fragmentShader, + side: THREE.BackSide, + }); + lastvolume.material.uniforms = unfs; + renderer.updateComplete = false; + } + }); - $("#neg-y-view").on("click", function () { - setControlAngles((Math.PI * 90) / 180, (Math.PI * 0) / 180); - }); + $("#iso-radio-button") + .off("change") + .on("change", function () { + if (lastvolume !== null) { + const unfs = lastvolume.material.uniforms; + lastvolume.material = new THREE.ShaderMaterial({ + uniforms: THREE.UniformsUtils.clone(IsoRenderShader.uniforms), + vertexShader: IsoRenderShader.vertexShader, + fragmentShader: IsoRenderShader.fragmentShader, + side: THREE.BackSide, + }); + lastvolume.material.uniforms = unfs; + renderer.updateComplete = false; + } + }); - $("#pos-z-view").on("click", function () { - setControlAngles(0, 0); - }); + $("#interp-radio-button") + .off("change") + .on("change", function () { + if (lastvolume !== null) { + const unfs = lastvolume.material.uniforms; + lastvolume.material = new THREE.RawShaderMaterial(InterpRenderShader()); + lastvolume.material.uniforms = unfs; + lastvolume.material.uniforms.cameraPos.value.copy(camera.position); + renderer.updateComplete = false; + } + }); - $("#neg-z-view").on("click", function () { - setControlAngles((Math.PI * 180) / 180, 0); - }); + $("#cross-x-low") + .off("input") + .on("input", function () { + setcrosssectionsizes(this); + }); + $("#cross-y-low") + .off("input") + .on("input", function () { + setcrosssectionsizes(this); + }); + $("#cross-z-low") + .off("input") + .on("input", function () { + setcrosssectionsizes(this); + }); + $("#cross-x-hi") + .off("input") + .on("input", function () { + setcrosssectionsizes(this); + }); + $("#cross-y-hi") + .off("input") + .on("input", function () { + setcrosssectionsizes(this); + }); + $("#cross-z-hi") + .off("input") + .on("input", function () { + setcrosssectionsizes(this); + }); + + $("#x_thickness, #y_thickness, #z_thickness") + .off("input") + .on("input", function () { + let eid = $(this).attr("id"); + let linkedeid1 = eid.replace(/_thickness/, "-low").replace(/^/, "cross-"); + let linkedeid2 = eid.replace(/_thickness/, "-hi").replace(/^/, "cross-"); + if ($(this).val() == 0) { + $("#" + linkedeid1).val(0); + $("#" + linkedeid2).val(1); + } else { + $("#" + linkedeid1).val( + ($("#" + linkedeid1).val() + $("#" + linkedeid2).val()) * 0.5 + ); + } + setcrosssectionsizes($("#" + linkedeid1)); + }); + + $("#pos-x-view") + .off("click") + .on("click", function () { + setControlAngles((Math.PI * 90) / 180, (Math.PI * 90) / 180); + renderer.updateComplete = false; + }); + $("#neg-x-view") + .off("click") + .on("click", function () { + setControlAngles((Math.PI * 90) / 180, (Math.PI * 270) / 180); + }); + $("#pos-y-view") + .off("click") + .on("click", function () { + setControlAngles((Math.PI * 90) / 180, (Math.PI * 180) / 180); + }); + $("#neg-y-view") + .off("click") + .on("click", function () { + setControlAngles((Math.PI * 90) / 180, (Math.PI * 0) / 180); + }); + $("#pos-z-view") + .off("click") + .on("click", function () { + setControlAngles(0, 0); + }); + $("#neg-z-view") + .off("click") + .on("click", function () { + setControlAngles((Math.PI * 180) / 180, 0); + }); + + $("#cross-t") + .off("mouseup") + .on("mouseup", function () { + $(this).prop( + "title", + "" + + $(this).val() + + " [" + + $(this).prop("min") + + "," + + $(this).prop("max") + + "]" + ); + if (lastvolume !== null && lastvolumedata !== undefined) { + let dim = lastvolumedim; + let offset = + Math.min($(this).val(), dim[3] - 2) * dim[0] * dim[1] * dim[2]; + let texture = new THREE.Data3DTexture( + lastvolumedata.selection.data.slice( + offset - 1, + offset + dim[0] * dim[1] * dim[2] - 1 + ), + dim[0], + dim[1], + dim[2] + ); + texture.format = THREE.RedFormat; + texture.type = texture_dtype[lastvolumedata.dtype]; + texture.minFilter = texture.magFilter = THREE.LinearFilter; + texture.unpackAlignment = 1; + texture.needsUpdate = true; + lastvolume.material.uniforms["u_data"].value = texture; + renderer.updateComplete = false; + } + }); $("#cross-t").on("mouseup", function () { $(this).prop( From e54ea6f058496d17201552a454f8c83d3e9458fe Mon Sep 17 00:00:00 2001 From: elainefan331 Date: Thu, 2 Apr 2026 09:32:15 -0400 Subject: [PATCH 3/5] fix: resolve memory leaks in 2D plot viewer --- src/utils/preview.js | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/utils/preview.js b/src/utils/preview.js index 496e855..02f3908 100644 --- a/src/utils/preview.js +++ b/src/utils/preview.js @@ -33,6 +33,7 @@ var lastvolume = null; var lastvolumedata = null; var lastvolumedim = []; var lastclim = 0; +var uplotInstance = null; var reqid = undefined; var canvas = null; @@ -133,6 +134,12 @@ function destroyPreview() { lastvolume = null; lastvolumedata = null; texture = undefined; + + if (uplotInstance !== null) { + uplotInstance.destroy(); + uplotInstance = null; + } + $("#chartpanel").hide().html(""); } function drawpreview(cfg) { @@ -428,22 +435,32 @@ function dopreview(key, idx, isinternal, hastime) { ? "y" + i : hastime[i]; } - let u = new uPlot(opts, plotdata, document.getElementById("plotchart")); + // let u = new uPlot(opts, plotdata, document.getElementById("plotchart")); + if (uplotInstance !== null) { + uplotInstance.destroy(); + uplotInstance = null; + } + uplotInstance = new uPlot( + opts, + plotdata, + document.getElementById("plotchart") + ); } else { - let u = new uPlot( + // let u = new uPlot( + // opts, + // [[...Array(dataroot.length).keys()], dataroot], + // document.getElementById("plotchart") + // ); + if (uplotInstance !== null) { + uplotInstance.destroy(); + uplotInstance = null; + } + uplotInstance = new uPlot( opts, [[...Array(dataroot.length).keys()], dataroot], document.getElementById("plotchart") ); } - // add spinner - // --- NEW LOGIC for 2D plot --- - // Signal that the 2D plot has just been created and is now visible. - // if (typeof window.__onPreviewReady === "function") { - // window.__onPreviewReady(); - // window.__onPreviewReady = null; // Clean up to prevent accidental re-firing - // } - // --- END NEW LOGIC --- // for spinner // --- Signal React that 2D preview is ready --- From 66f6e435ead30e55958fba297e4255b34a2283e9 Mon Sep 17 00:00:00 2001 From: elainefan331 Date: Thu, 2 Apr 2026 13:51:51 -0400 Subject: [PATCH 4/5] feat: add executorHelpers and plannerHelpers --- .../DatasetOrganizer/utils/executorHelpers.ts | 522 ++++++++++++++++++ .../DatasetOrganizer/utils/fileAnalyzers.ts | 332 ++--------- .../utils/filenameTokenizer.ts | 505 +++++++++++++++-- .../DatasetOrganizer/utils/plannerHelpers.ts | 0 4 files changed, 1030 insertions(+), 329 deletions(-) create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts new file mode 100644 index 0000000..782dee5 --- /dev/null +++ b/src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts @@ -0,0 +1,522 @@ +// src/components/DatasetOrganizer/utils/executorHelpers.ts +// +// Portable helper functions from autobidsify/converters/executor.py +// +// What is NOT here (intentionally — requires server-side CLI): +// execute_bids_plan() — file copy/conversion operations +// convert_mat_to_snirf() — binary .mat read + .snirf write +// run_dcm2niix_batch() — dcm2niix subprocess +// convert_jnifti_to_nifti() — nibabel NIfTI write +// +// What IS here (useful client-side for plan validation + preview): +// sanitizeBidsLabel() mirrors _sanitize_bids_label() +// normalizeFilename() mirrors _normalize_filename() +// extractAcqLabel() mirrors _extract_acq_label() +// selectPreferredFile() mirrors _select_preferred_file() +// matchGlobPattern() mirrors _match_glob_pattern() +// inferScanType() mirrors infer_scan_type_from_filepath() +// inferSubdirectory() mirrors infer_subdirectory_from_suffix() +// categorizeScanType() mirrors categorize_scan_type() +// analyzeFilepathUniversal() mirrors analyze_filepath_universal() +// validatePlanCoverage() NEW — uses matchGlobPattern to check LLM patterns + +// ============================================================================ +// sanitizeBidsLabel() +// Mirrors _sanitize_bids_label() in executor.py +// Removes all non-alphanumeric characters from a BIDS entity value +// e.g. "mental_arithmetic" → "mentalarithmetic" +// ============================================================================ + +export const sanitizeBidsLabel = (label: string): string => + label.replace(/[^a-zA-Z0-9]/g, ""); + +// ============================================================================ +// normalizeFilename() +// Mirrors _normalize_filename() in executor.py +// +// Strips extensions and trailing sequence numbers. +// Used to identify DICOM series and detect format duplicates. +// +// Examples: +// "VHFCT1mm-Hip (134).dcm" → "vhfct1mm-hip" +// "scan_mprage_anonymized.nii.gz" → "scan_mprage_anonymized" +// "scan_001.dcm" → "scan" +// ============================================================================ + +export const normalizeFilename = (filepath: string): string => { + let name = filepath.split("/").pop()!; + + // Strip all extensions (up to 6 chars) + while (name.includes(".") && name.split(".").pop()!.length <= 6) { + name = name.substring(0, name.lastIndexOf(".")); + } + + // Strip trailing " (N)" + name = name.replace(/\s*\(\d+\)\s*$/, ""); + // Strip trailing _NNN or -NNN + name = name.replace(/[_\-]\d+$/, ""); + + return name.trim().toLowerCase(); +}; + +// ============================================================================ +// extractAcqLabel() +// Mirrors _extract_acq_label() in executor.py +// +// Derives a short, clean acq- label from a normalized DICOM filename. +// Keeps the last meaningful alphabetic token (body part or scan descriptor). +// +// Examples: +// "vhfct1mmankle" → "ankle" +// "vhfct1mmhead" → "head" +// "vhmct1mmhip" → "hip" +// "scanmprage" → "mprage" +// ============================================================================ + +export const extractAcqLabel = (normalizedFname: string): string => { + const skip = new Set(["vhf", "vhm", "ct", "mr", "mri", "mm", "scan", "the"]); + const tokens = normalizedFname.match(/[a-z]+/g) || []; + const meaningful = tokens.filter((t) => t.length > 2 && !skip.has(t)); + + if (meaningful.length > 0) { + return meaningful[meaningful.length - 1]; // last = body part + } + return normalizedFname.slice(0, 20); // fallback: cap at 20 chars +}; + +// ============================================================================ +// selectPreferredFile() +// Mirrors _select_preferred_file() in executor.py +// +// Priority: NIfTI dir > non-BRIK > shortest path > alphabetical +// ============================================================================ + +export const selectPreferredFile = (files: string[]): string | null => { + if (files.length === 0) return null; + if (files.length === 1) return files[0]; + + const priority = (f: string): [number, number, number, string] => { + const parts = f.toLowerCase().split("/"); + return [ + parts.some((p) => p.includes("nifti")) ? 0 : 1, + parts.some((p) => p.includes("brik")) ? 1 : 0, + parts.length, + f, + ]; + }; + + return [...files].sort((a, b) => { + const [a0, a1, a2, a3] = priority(a); + const [b0, b1, b2, b3] = priority(b); + if (a0 !== b0) return a0 - b0; + if (a1 !== b1) return a1 - b1; + if (a2 !== b2) return a2 - b2; + return a3.localeCompare(b3); + })[0]; +}; + +// ============================================================================ +// matchGlobPattern() +// Mirrors _match_glob_pattern() in executor.py +// +// Supported patterns: +// "**/*.nii.gz" → any .nii.gz at any depth +// "**/BRIK/**" → any file inside a BRIK directory +// "*token*" → filepath contains token +// "*.ext" → filename ends with extension +// "token*" → filename starts with token +// "plain" → substring anywhere in path (fallback) +// ============================================================================ + +export const matchGlobPattern = ( + filepath: string, + pattern: string +): boolean => { + const fp = filepath.toLowerCase(); + const pat = pattern.toLowerCase(); + const parts = fp.split("/"); + const filename = parts[parts.length - 1]; + + // **/TOKEN/** — directory component match + if (pat.startsWith("**/") && pat.endsWith("/**")) { + const token = pat.slice(3, -3); + return parts.slice(0, -1).includes(token); + } + + // **/*.ext — any depth extension match + if (pat.startsWith("**/")) { + const suffix = pat.slice(3); + if (suffix.startsWith("*.")) return fp.endsWith(suffix.slice(1)); + return fp.includes(suffix); + } + + // *token* — substring in full path + if (pat.startsWith("*") && pat.endsWith("*")) { + return fp.includes(pat.slice(1, -1)); + } + + // *.ext — extension match on filename only + if (pat.startsWith("*.")) { + return filename.endsWith(pat.slice(1)); + } + + // token* — filename prefix + if (pat.endsWith("*")) { + return filename.startsWith(pat.slice(0, -1)); + } + + // fallback — substring anywhere in path + return fp.includes(pat); +}; + +// ============================================================================ +// inferScanType() +// Mirrors infer_scan_type_from_filepath() in executor.py +// +// Priority: +// 1. LLM filename_rules from BIDSPlan +// 2. BIDS entities already in filename (ses-, task-, acq-, run-) +// 3. Keyword detection in path +// 4. Extension fallback +// ============================================================================ + +interface ScanTypeResult { + suffix: string; + subdirectory: string; + category: string; +} + +export const inferScanType = ( + filepath: string, + filenameRules: any[] = [] +): ScanTypeResult => { + const pathLower = filepath.toLowerCase(); + const filename = filepath.split("/").pop()!; + const fnameLow = filename.toLowerCase(); + + // ── Priority 1: LLM filename_rules ────────────────────────────────── + for (const rule of filenameRules) { + try { + const mp = (rule.match_pattern || "").replace(/\\\\/g, "\\"); + if (!new RegExp(mp, "i").test(filename)) continue; + + const template: string = rule.bids_template || ""; + const m = template.match(/sub-[^_]+_(.*?)\.(nii\.gz|snirf|nii)/); + if (!m) continue; + + let raw = m[1]; + // Remove placeholder entities + raw = raw + .replace(/ses-X_?/g, "") + .replace(/task-X_?/g, "") + .replace(/^_|_$/g, ""); + + // Remove spurious ses- if no ses- dir in path + if ( + /ses-[A-Za-z0-9]+/.test(raw) && + !/\/ses-[A-Za-z0-9]+\//.test(filepath) + ) { + raw = raw.replace(/ses-[A-Za-z0-9]+_?/g, "").replace(/^_|_$/g, ""); + } + + if (raw) { + // Sanitize entity values — mirrors _sanitize_suffix() in executor.py + // "task-mental_arithmetic_nirs" → "task-mentalarithmetic_nirs" + raw = raw.replace( + /([a-zA-Z]+-)(.+?)(?=_[a-zA-Z]+-|_[a-zA-Z]+$|$)/g, + (_match, key, val) => key + sanitizeBidsLabel(val) + ); + const subdir = inferSubdirectory(raw); + return { + suffix: raw, + subdirectory: subdir, + category: categorizeScanType(raw), + }; + } + } catch { + continue; + } + } + + // ── Priority 2: BIDS entities already in filename ──────────────────── + const entities: Record = {}; + for (const [key, pattern] of [ + ["ses", /ses-([A-Za-z0-9]+)/], + ["task", /task-([A-Za-z0-9]+)/], + ["acq", /acq-([A-Za-z0-9]+)/], + ["run", /run-([A-Za-z0-9]+)/], + ] as [string, RegExp][]) { + const match = filename.match(pattern); + if (match) entities[key] = match[1]; + } + + // Infer task from filename keywords when no task- entity present + if (!entities.task) { + const nameNoExt = fnameLow.replace(/\.[^.]+$/, ""); + if (/rest|resting/.test(nameNoExt)) entities.task = "rest"; + else if (/finger|tapping|fingertap/.test(nameNoExt)) + entities.task = "fingertapping"; + else if (/walking|walk/.test(nameNoExt)) entities.task = "walking"; + else if (/motor|tap/.test(nameNoExt)) entities.task = "motor"; + } + + let modalityLabel: string | null = null; + let subdir = "anat"; + + if (fnameLow.endsWith(".snirf") || fnameLow.includes("nirs")) { + modalityLabel = "nirs"; + subdir = "nirs"; + } else if (/t1w|t1/.test(fnameLow)) { + modalityLabel = "T1w"; + subdir = "anat"; + } else if (/t2w|t2/.test(fnameLow)) { + modalityLabel = "T2w"; + subdir = "anat"; + } else if (/bold|func/.test(fnameLow)) { + modalityLabel = "bold"; + subdir = "func"; + } else if (/dwi/.test(fnameLow)) { + modalityLabel = "dwi"; + subdir = "dwi"; + } + + // BIDS rule: task-* scans go in func/ (unless nirs) + if (subdir !== "nirs" && (entities.task || pathLower.includes("func/"))) { + subdir = "func"; + if (!modalityLabel) modalityLabel = "bold"; + } + + if (Object.keys(entities).length > 0 || modalityLabel) { + const parts: string[] = []; + for (const key of ["ses", "task", "acq", "run"]) { + if (entities[key]) + parts.push(`${key}-${sanitizeBidsLabel(entities[key])}`); + } + if (modalityLabel) parts.push(modalityLabel); + if (parts.length > 0) { + const suffix = parts.join("_"); + return { + suffix, + subdirectory: subdir, + category: categorizeScanType(suffix), + }; + } + } + + // ── Priority 3: Heuristic path keywords ───────────────────────────── + if (/anat|mprage|t1w/.test(pathLower)) + return { suffix: "T1w", subdirectory: "anat", category: "anatomical" }; + if (/func|bold/.test(pathLower)) { + const m = pathLower.match(/task[_-]([a-z0-9]+)/); + const suffix = m ? `task-${m[1]}_bold` : "task-rest_bold"; + return { suffix, subdirectory: "func", category: "functional" }; + } + if (pathLower.includes("rest")) + return { + suffix: "task-rest_bold", + subdirectory: "func", + category: "functional", + }; + if (/nirs|fnirs|\.snirf/.test(pathLower)) + return { suffix: "nirs", subdirectory: "nirs", category: "functional" }; + if (pathLower.includes("dwi")) + return { suffix: "dwi", subdirectory: "dwi", category: "diffusion" }; + + // ── Priority 4: Extension fallback ────────────────────────────────── + if (fnameLow.endsWith(".snirf")) + return { suffix: "nirs", subdirectory: "nirs", category: "functional" }; + if (fnameLow.endsWith(".nii") || fnameLow.endsWith(".nii.gz")) + return { suffix: "T1w", subdirectory: "anat", category: "anatomical" }; + + return { suffix: "unknown", subdirectory: "anat", category: "unknown" }; +}; + +// ============================================================================ +// inferSubdirectory() +// Mirrors infer_subdirectory_from_suffix() in executor.py +// ============================================================================ + +export const inferSubdirectory = (suffix: string): string => { + const s = suffix.toLowerCase(); + if (s.includes("t1w") || s.includes("t2w")) return "anat"; + if (s.includes("bold")) return "func"; + if (s.includes("nirs")) return "nirs"; + if (s.includes("dwi")) return "dwi"; + return "anat"; +}; + +// ============================================================================ +// categorizeScanType() +// Mirrors categorize_scan_type() in executor.py +// ============================================================================ + +export const categorizeScanType = (suffix: string): string => { + const s = suffix.toLowerCase(); + if (s.includes("t1w") || s.includes("t2w")) return "anatomical"; + if (s.includes("bold") || s.includes("nirs")) return "functional"; + if (s.includes("dwi")) return "diffusion"; + return "unknown"; +}; + +// ============================================================================ +// analyzeFilepathUniversal() +// Mirrors analyze_filepath_universal() in executor.py +// +// Determines BIDS subject ID and output filename for one source file. +// Used for plan preview — shows user what each file will become. +// ============================================================================ + +export interface FilepathAnalysis { + subject_id: string; + scan_type_suffix: string; + bids_filename: string; + subdirectory: string; + scan_category: string; + original_filepath: string; + modality: string; +} + +export const analyzeFilepathUniversal = ( + filepath: string, + assignmentRules: any[], + filenameRules: any[], + modality: string = "mri" +): FilepathAnalysis => { + const filename = filepath.split("/").pop()!; + const pathParts = filepath.split("/"); + let subjectId: string | null = null; + + // Priority 1: match glob patterns + for (const rule of assignmentRules) { + for (const pat of rule.match || []) { + if (matchGlobPattern(filepath, pat)) { + subjectId = rule.subject; + break; + } + } + if (subjectId) break; + } + + // Priority 2: original substring match + if (!subjectId) { + for (const rule of assignmentRules) { + const orig: string = rule.original || ""; + if (orig && filepath.toLowerCase().includes(orig.toLowerCase())) { + subjectId = rule.subject; + break; + } + } + } + + // Priority 3: prefix match + if (!subjectId) { + for (const rule of assignmentRules) { + const pfx: string = rule.prefix || ""; + if (pfx && filename.toLowerCase().startsWith(pfx.toLowerCase())) { + subjectId = rule.subject; + break; + } + } + } + + // Priority 4: sub-XX already in path + if (!subjectId) { + for (const part of pathParts) { + const m = part.match(/sub[_-]?(\w+)/i); + if (m) { + subjectId = m[1]; + break; + } + } + } + + // Fallback + if (!subjectId) subjectId = "unknown"; + + // Strip accidental sub- prefix + if (subjectId.startsWith("sub-")) subjectId = subjectId.slice(4); + + const scanInfo = inferScanType(filepath, filenameRules); + const ext = modality === "nirs" ? ".snirf" : ".nii.gz"; + const bidsFilename = `sub-${subjectId}_${scanInfo.suffix}${ext}`; + + return { + subject_id: subjectId, + scan_type_suffix: scanInfo.suffix, + bids_filename: bidsFilename, + subdirectory: scanInfo.subdirectory, + scan_category: scanInfo.category, + original_filepath: filepath, + modality, + }; +}; + +// ============================================================================ +// validatePlanCoverage() +// NEW — not in Python (Python validates at runtime, we validate at plan-time) +// +// Checks that the LLM's match patterns in BIDSPlan actually cover the +// sample files from the evidence bundle. Warns about uncovered files. +// +// Used in plannerHelpers.ts after buildBidsPlan() to surface issues +// before the user downloads the ZIP. +// ============================================================================ + +export interface PlanCoverageResult { + covered: string[]; + uncovered: string[]; + coveragePercent: number; + warnings: string[]; +} + +export const validatePlanCoverage = ( + sampleFiles: string[], + mappings: any[] +): PlanCoverageResult => { + const covered: string[] = []; + const uncovered: string[] = []; + const warnings: string[] = []; + + for (const filepath of sampleFiles) { + let isCovered = false; + + for (const mapping of mappings) { + const patterns: string[] = mapping.match || []; + const excludes: string[] = mapping.exclude || []; + + const isExcluded = excludes.some((ex) => matchGlobPattern(filepath, ex)); + if (isExcluded) continue; + + const isMatched = patterns.some((pat) => matchGlobPattern(filepath, pat)); + if (isMatched) { + isCovered = true; + break; + } + } + + if (isCovered) covered.push(filepath); + else uncovered.push(filepath); + } + + if (uncovered.length > 0) { + warnings.push( + `${uncovered.length} sample file(s) not covered by any mapping pattern.` + ); + for (const f of uncovered.slice(0, 5)) { + warnings.push(` Uncovered: ${f}`); + } + if (uncovered.length > 5) { + warnings.push(` ... and ${uncovered.length - 5} more`); + } + } + + return { + covered, + uncovered, + coveragePercent: + sampleFiles.length > 0 + ? Math.round((covered.length / sampleFiles.length) * 100) + : 100, + warnings, + }; +}; diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts index 76142c0..c3c89b0 100644 --- a/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts +++ b/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts @@ -1,54 +1,60 @@ // src/components/DatasetOrganizer/utils/fileAnalyzers.ts +// VFS adapter layer because NeuroJSON.io needs to work with FileItem[] objects +// No single Python mirror. Functions map to: +// categorizeFile() → executor.py (infer_subdirectory_from_suffix, categorize_scan_type) +// detectModality() → evidence.py (detect_kind) + constants.py (MODALITY_*) +// getCountsByExtension() → evidence.py (by_ext dict construction) +// getUserContextText() → evidence.py (_extract_document_content + documents[] assembly) import { FileItem } from "redux/projects/types/projects.interface"; -/** - * Categorize a file based on its name and type - * Returns detailed scan category (anatomical-T1w, functional-bold, etc.) - */ +// ============================================================================ +// categorizeFile() +// UI display function — determines file label/color in FileTree. +// +// NOT the same as inferScanType() in executorHelpers.ts: +// categorizeFile() → "what to show in the UI" +// inferScanType() → "what BIDS filename to generate" +// +// Partial mirror of executor.py → infer_subdirectory_from_suffix() +// + categorize_scan_type() +// ============================================================================ export const categorizeFile = (file: FileItem): string => { const name = file.name.toLowerCase(); // Functional scans (task-based) - if (name.includes("task-") && name.includes("bold")) { - return "functional-bold"; - } - if (name.endsWith(".snirf")) { - return "functional-nirs"; - } - + if (name.includes("task-") && name.includes("bold")) return "functional-bold"; + if (name.endsWith(".snirf")) return "functional-nirs"; if (name.endsWith(".nirs")) return "functional-nirs"; if (name.endsWith(".mat")) return "functional-nirs"; // Anatomical scans - if (name.includes("t1w")) { - return "anatomical-T1w"; - } - if (name.includes("t2w") || name.includes("inplanet2")) { + if (name.includes("t1w")) return "anatomical-T1w"; + if (name.includes("t2w") || name.includes("inplanet2")) return "anatomical-T2w"; - } - if (name.includes("flair")) { - return "anatomical-FLAIR"; - } - + if (name.includes("flair")) return "anatomical-FLAIR"; if (name.endsWith(".dcm")) return "anatomical-dicom"; + // JNIfTI — mirrors JNIFTI_EXT in constants.py: {'.jnii', '.bnii'} + if (name.endsWith(".jnii") || name.endsWith(".bnii")) + return "anatomical-jnifti"; + // Diffusion - if (name.includes("dwi") || name.includes("diffusion")) { - return "diffusion"; - } + if (name.includes("dwi") || name.includes("diffusion")) return "diffusion"; // Field maps - if (name.includes("fieldmap") || name.includes("fmap")) { - return "fieldmap"; - } + if (name.includes("fieldmap") || name.includes("fmap")) return "fieldmap"; - // Fall back to file type + // Array/HDF5 (non-SNIRF) + if (name.endsWith(".h5") || name.endsWith(".hdf5")) return "array"; + + // Fall back to fileType from fileProcessors.ts return file.fileType || "unknown"; }; -/** - * Detect modality from file collection - */ +// ============================================================================ +// Detect modality from file collection +// Rough equivalent of evidence.py → detect_kind() + constants.py MODALITY_* +// ============================================================================ export const detectModality = (files: FileItem[]): string => { const counts: Record = {}; files.forEach((f) => { @@ -57,31 +63,29 @@ export const detectModality = (files: FileItem[]): string => { }); if (counts.nifti > 0 || counts.dicom > 0) return "mri"; + // FIX: fileProcessors.ts returns "nirs" for .nirs files, not "homer3" if ( counts.hdf5 > 0 || counts.matlab > 0 || - counts.homer3 > 0 || + counts.nirs > 0 || files.some((f) => f.name.endsWith(".snirf")) ) return "nirs"; return "mixed"; }; -/** - * Get file extension counts - */ +// ============================================================================ +// Get file extension counts +// Mirrors evidence.py → by_ext dict construction. +// Uses ".nii.gz" as a single key — mirrors Python: p.name.lower().endswith(".nii.gz") +// ============================================================================ export const getCountsByExtension = ( files: FileItem[] ): Record => { const counts: Record = {}; - // files.forEach((f) => { - // const ext = f.fileType || "unknown"; - // counts[ext] = (counts[ext] || 0) + 1; - // }); files .filter((f) => f.source === "user" && f.type === "file") .forEach((f) => { - // Mirror Python: use ".nii.gz" as a single key for .nii.gz files const name = f.name.toLowerCase(); const ext = name.endsWith(".nii.gz") ? ".nii.gz" @@ -91,9 +95,13 @@ export const getCountsByExtension = ( return counts; }; -/** - * Extract user context from metadata files - */ +// ============================================================================ +// Extract user context from metadata files +// Partial mirror of evidence.py → _extract_document_content() + +// the documents[] assembly in _build_evidence_bundle_internal(). +// +// Python reads files from disk; this reads from VFS FileItem.content. +// ============================================================================ export const getUserContextText = (files: FileItem[]): string => { const readme = files.find((f) => f.name.toLowerCase().includes("readme")); const instructions = files.find( @@ -130,243 +138,3 @@ export const getUserContextText = (files: FileItem[]): string => { }); return parts.join("\n\n"); }; - -/** (not using yet) - * Analyze filename patterns to detect subjects - * (Simplified version inspired by auto-bidsify's filename_tokenizer) - */ -export const analyzeFilenamePatterns = ( - files: FileItem[] -): { - subjectCount: number; - subjectIds: string[]; - hasRunNumbers: boolean; - hasTaskNames: boolean; -} => { - const dataFiles = files.filter((f) => f.type === "file" && !f.isUserMeta); - const subjectIds = new Set(); - let hasRunNumbers = false; - let hasTaskNames = false; - - dataFiles.forEach((f) => { - const name = f.name; - - // Extract subject ID (sub-01, sub-02, etc.) - const subMatch = name.match(/sub-(\d+)/i); - if (subMatch) { - subjectIds.add(subMatch[1]); - } - - // Check for run numbers - if (name.includes("_run-")) { - hasRunNumbers = true; - } - - // Check for task names - if (name.includes("task-")) { - hasTaskNames = true; - } - }); - - return { - subjectCount: subjectIds.size, - subjectIds: Array.from(subjectIds).sort(), - hasRunNumbers, - hasTaskNames, - }; -}; - -// add to fileAnalyzers.ts - -// export interface SubjectRecord { -// original_id: string; -// numeric_id: string; -// site: string | null; -// pattern_name: string; -// file_count: number; -// } - -// export interface SubjectAnalysis { -// success: boolean; -// method: string; -// subject_records: SubjectRecord[]; -// subject_count: number; -// has_site_info: boolean; -// variants_by_subject: Record; -// python_generated_filename_rules: any[]; -// id_mapping: { -// id_mapping: Record; -// reverse_mapping: Record; -// strategy_used: string; -// metadata_columns: string[]; -// }; -// } - -// // mirrors _extract_subjects_from_directory_structure -// const extractFromDirectoryStructure = ( -// allFiles: string[] -// ): Omit | null => { -// const patterns: Array<[RegExp, boolean, number, number | null, string]> = [ -// [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"], -// [/^sub-(\d+)$/i, false, 1, null, "standard_bids"], -// [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"], -// [/^(\d{3,})$/, false, 1, null, "numeric_only"], -// ]; - -// const subjectRecords: SubjectRecord[] = []; -// const seenIds = new Set(); - -// for (const filepath of allFiles) { -// const parts = filepath.split("/"); -// for (const part of parts.slice(0, 2)) { -// for (const [ -// regex, -// hasSite, -// idGroup, -// siteGroup, -// patternName, -// ] of patterns) { -// const match = part.match(regex); -// if (match) { -// const originalId = match[0]; -// if (seenIds.has(originalId)) break; -// seenIds.add(originalId); -// subjectRecords.push({ -// original_id: originalId, -// numeric_id: match[idGroup], -// site: hasSite && siteGroup ? match[siteGroup] : null, -// pattern_name: patternName, -// file_count: 0, -// }); -// break; -// } -// } -// } -// } - -// if (subjectRecords.length === 0) return null; - -// subjectRecords.sort((a, b) => { -// const na = parseInt(a.numeric_id) || 0; -// const nb = parseInt(b.numeric_id) || 0; -// return na - nb; -// }); - -// return { -// success: true, -// method: "directory_structure", -// subject_records: subjectRecords, -// subject_count: subjectRecords.length, -// has_site_info: subjectRecords.some((r) => r.site !== null), -// variants_by_subject: {}, -// python_generated_filename_rules: [], -// }; -// }; - -// // mirrors _extract_subjects_from_flat_filenames -// const extractFromFlatFilenames = ( -// allFiles: string[] -// ): Omit | null => { -// const identifierToFiles: Record = {}; - -// for (const filepath of allFiles) { -// const filename = filepath.split("/").pop() || ""; -// const nameNoExt = filename -// .replace(/\.[^/.]+$/, "") -// .replace(/\.nii\.gz$/, ""); -// const match = nameNoExt.match(/^([A-Za-z0-9\-]+)/); -// if (match) { -// const identifier = match[1]; -// if (!identifierToFiles[identifier]) identifierToFiles[identifier] = []; -// identifierToFiles[identifier].push(filepath); -// } -// } - -// if (Object.keys(identifierToFiles).length === 0) return null; - -// const extractNumeric = (id: string): number => { -// const nums = id.match(/\d+/g); -// return nums ? parseInt(nums[nums.length - 1]) : 999999; -// }; - -// const sortedIdentifiers = Object.keys(identifierToFiles).sort( -// (a, b) => extractNumeric(a) - extractNumeric(b) -// ); - -// const subjectRecords: SubjectRecord[] = sortedIdentifiers.map((id, i) => ({ -// original_id: id, -// numeric_id: String(i + 1), -// site: null, -// pattern_name: "dominant_prefix", -// file_count: identifierToFiles[id].length, -// })); - -// return { -// success: true, -// method: "dominant_prefix_fallback", -// subject_records: subjectRecords, -// subject_count: subjectRecords.length, -// has_site_info: false, -// variants_by_subject: {}, -// python_generated_filename_rules: [], -// }; -// }; - -// // mirrors _generate_subject_id_mapping -// const generateIdMapping = ( -// subjectInfo: Omit -// ): SubjectAnalysis["id_mapping"] => { -// const records = subjectInfo.subject_records; -// const idMapping: Record = {}; -// const reverseMapping: Record = {}; - -// // detect already-BIDS format (sub-01, sub-02...) -// const allAlreadyBids = records.every((r) => /^sub-\w+$/i.test(r.original_id)); - -// if (allAlreadyBids) { -// for (const rec of records) { -// const bidsId = rec.original_id.replace(/^sub-/i, ""); -// idMapping[rec.original_id] = bidsId; -// reverseMapping[bidsId] = rec.original_id; -// } -// return { -// id_mapping: idMapping, -// reverse_mapping: reverseMapping, -// strategy_used: "already_bids", -// metadata_columns: [], -// }; -// } - -// // numeric strategy -// for (let i = 0; i < records.length; i++) { -// const orig = records[i].original_id; -// const bidsId = String(i + 1); -// idMapping[orig] = bidsId; -// reverseMapping[bidsId] = orig; -// } - -// return { -// id_mapping: idMapping, -// reverse_mapping: reverseMapping, -// strategy_used: "numeric", -// metadata_columns: ["original_id"], -// }; -// }; - -// // main export — call this from llmHelpers -// export const extractSubjectAnalysis = (allFiles: string[]): SubjectAnalysis => { -// const fromDir = extractFromDirectoryStructure(allFiles); -// const base = fromDir ?? -// extractFromFlatFilenames(allFiles) ?? { -// success: false, -// method: "none", -// subject_records: [], -// subject_count: 0, -// has_site_info: false, -// variants_by_subject: {}, -// python_generated_filename_rules: [], -// }; - -// const idMapping = generateIdMapping(base); -// return { ...base, id_mapping: idMapping }; -// }; diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts index 4708c13..24aa930 100644 --- a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts +++ b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts @@ -1,6 +1,6 @@ // src/components/DatasetOrganizer/utils/filenameTokenizer.ts -// Port of autobidsify's filename_tokenizer.py -// Philosophy: Python stats → dominant prefixes → subject IDs (no LLM needed for this part) +// Mirrors filename_tokenizer.py + export interface SubjectRecord { original_id: string; numeric_id: string; @@ -52,40 +52,21 @@ const COMMON_WORDS = new Set([ "experiment", ]); +const DATA_EXTENSIONS = + /\.(snirf|nii|nii\.gz|dcm|mat|nirs|jnii|bnii|h5|hdf5|edf|bdf)$/i; + // ============================================================================ -// FilenameTokenizer — mirrors FilenameTokenizer class in filename_tokenizer.py +// FilenamePatternAnalyzer — mirrors FilenameTokenizer class // ============================================================================ - -/** - * Advanced split: CamelCase + number boundaries - * "VHMCT" → ["VHM", "CT"] - * "CT1mm" → ["CT", "1", "mm"] - * "sub82352" → ["sub", "82352"] - */ -const splitAdvanced = (text: string): string[] => { - if (!text) return []; - - // Keep known neuroimaging terms together - if (NEUROIMAGING_TERMS.has(text)) return [text]; - - // Split on type boundaries: - // - Uppercase sequence before uppercase+lowercase: "VHM" before "CT" - // - CamelCase: uppercase followed by lowercase - // - Letter/digit boundaries - const pattern = /([A-Z]+(?=[A-Z][a-z]|\b|[0-9])|[A-Z][a-z]+|[a-z]+|[0-9]+)/g; - const tokens = text.match(pattern) || []; - return tokens.filter((t) => t.length > 0); -}; - -/** +/* * Tokenize a filename into meaningful tokens. - * Mirrors FilenameTokenizer.tokenize() in filename_tokenizer.py * * Examples: * "VHMCT1mm-Hip (134).dcm" → ["VHM", "CT", "1", "mm", "Hip", "134"] * "Beijing_sub82352" → ["Beijing", "sub", "82352"] * "scan_001_T1w.nii" → ["scan", "001", "T1w"] */ + export const tokenizeFilename = (filename: string): string[] => { // Step 1: Remove all extensions (up to 6 chars) let name = filename; @@ -111,28 +92,60 @@ export const tokenizeFilename = (filename: string): string[] => { return tokens.filter((t) => t.trim().length >= 1); }; +/* + * Advanced split: CamelCase + number boundaries + * "VHMCT" → ["VHM", "CT"] + * "CT1mm" → ["CT", "1", "mm"] + * "sub82352" → ["sub", "82352"] + */ +const splitAdvanced = (text: string): string[] => { + if (!text) return []; + + // Keep known neuroimaging terms together + if (NEUROIMAGING_TERMS.has(text)) return [text]; + + // Split on type boundaries: + // - Uppercase sequence before uppercase+lowercase: "VHM" before "CT" + // - CamelCase: uppercase followed by lowercase + // - Letter/digit boundaries + const pattern = /([A-Z]+(?=[A-Z][a-z]|\b|[0-9])|[A-Z][a-z]+|[a-z]+|[0-9]+)/g; + const tokens = text.match(pattern) || []; + return tokens.filter((t) => t.length > 0); +}; + // ============================================================================ // FilenamePatternAnalyzer — mirrors FilenamePatternAnalyzer class // ============================================================================ +interface TokenStatistics { + totalFiles: number; + tokenFrequency: Record; + prefixFrequency: Record; + dominantPrefixes: DominantPrefix[]; + tokenPositions: Record>; // NEW — mirrors token_positions + insights: string[]; // NEW — mirrors _generate_insights() + uniqueTokenCount: number; // NEW + uniquePrefixCount: number; // NEW +} + interface DominantPrefix { prefix: string; count: number; percentage: number; } -interface TokenStatistics { - totalFiles: number; - tokenFrequency: Record; - prefixFrequency: Record; - dominantPrefixes: DominantPrefix[]; +interface LLMPayload { + task: string; + statistics: TokenStatistics; + filenameSamples: string[]; + userHints: Record; + instructions: string; } -/** - * Find dominant prefixes — tokens appearing in >5% of files - * that are not common words. - * Mirrors FilenamePatternAnalyzer._find_dominant_prefixes() - */ +// ───────────────────────────────────────────────────────────────────────────── +// Mirrors FilenamePatternAnalyzer._find_dominant_prefixes() +// ───────────────────────────────────────────────────────────────────────────── + const findDominantPrefixes = ( prefixCounter: Record, totalFiles: number @@ -154,18 +167,109 @@ const findDominantPrefixes = ( })); }; -/** - * Analyze token statistics across all filenames. - * Mirrors FilenamePatternAnalyzer.analyze_token_statistics() - */ +// ───────────────────────────────────────────────────────────────────────────── +// _generate_insights() +// Mirrors FilenamePatternAnalyzer._generate_insights() +// ───────────────────────────────────────────────────────────────────────────── + +const generateInsights = ( + allTokens: Record, + prefixTokens: Record, + dominantPrefixes: DominantPrefix[] +): string[] => { + const insights: string[] = []; + const uniqueTokenCount = Object.keys(allTokens).length; + + // Insight 1: token diversity + if (uniqueTokenCount < 20) { + insights.push( + `Low token diversity: only ${uniqueTokenCount} unique tokens across all files` + ); + } else if (uniqueTokenCount > 100) { + insights.push( + `High token diversity: ${uniqueTokenCount} unique tokens detected` + ); + } + + // Insight 2: prefix distribution + if (dominantPrefixes.length === 0) { + insights.push("No dominant filename prefixes detected"); + } else if (dominantPrefixes.length === 1) { + const p = dominantPrefixes[0]; + insights.push( + `Single dominant prefix '${p.prefix}' in ${p.percentage}% of files` + ); + } else if (dominantPrefixes.length === 2) { + const [p1, p2] = dominantPrefixes; + insights.push( + `Two major prefixes detected: '${p1.prefix}' (${p1.percentage}%) and '${p2.prefix}' (${p2.percentage}%)` + ); + } else { + insights.push( + `${dominantPrefixes.length} dominant prefixes detected, suggesting possible subject groupings` + ); + } + + // Insight 3: most common tokens + const topTokens = Object.entries(allTokens) + .sort((a, b) => b[1] - a[1]) + .slice(0, 3); + if (topTokens.length > 0) { + const commonList = topTokens.map(([t, c]) => `'${t}' (${c})`).join(", "); + insights.push(`Most frequent tokens: ${commonList}`); + } + + return insights; +}; + +// ───────────────────────────────────────────────────────────────────────────── +// _sample_diverse_filenames() +// Mirrors FilenamePatternAnalyzer._sample_diverse_filenames() +// ───────────────────────────────────────────────────────────────────────────── + +const sampleDiverseFilenames = ( + filenames: string[], + maxSamples: number = 30 +): string[] => { + if (filenames.length <= maxSamples) return [...filenames].sort(); + + // Group by first token (mirrors Python: prefix_groups[prefix].append(filename)) + const prefixGroups: Record = {}; + for (const filename of filenames) { + const tokens = tokenizeFilename(filename); + const prefix = tokens.length > 0 ? tokens[0] : "none"; + if (!prefixGroups[prefix]) prefixGroups[prefix] = []; + prefixGroups[prefix].push(filename); + } + + const groupCount = Object.keys(prefixGroups).length; + const samplesPerGroup = Math.max(1, Math.floor(maxSamples / groupCount)); + + const samples: string[] = []; + for (const prefix of Object.keys(prefixGroups).sort()) { + const groupFiles = prefixGroups[prefix]; + const n = Math.min(groupFiles.length, samplesPerGroup); + samples.push(...[...groupFiles].sort().slice(0, n)); + if (samples.length >= maxSamples) break; + } + + return samples.slice(0, maxSamples); +}; + +// ───────────────────────────────────────────────────────────────────────────── +// analyze_token_statistics() +// Mirrors FilenamePatternAnalyzer.analyze_token_statistics() +// ───────────────────────────────────────────────────────────────────────────── + export const analyzeTokenStatistics = ( filenames: string[] ): TokenStatistics => { const allTokens: Record = {}; - const prefixTokens: Record = {}; // first token only + const prefixTokens: Record = {}; + const positionTokens: Record> = {}; for (const filename of filenames) { - // Extract just filename from path + // Mirror Python __init__: strip to just filename if path provided const fname = filename.includes("/") ? filename.split("/").pop()! : filename; @@ -177,23 +281,331 @@ export const analyzeTokenStatistics = ( allTokens[token] = (allTokens[token] || 0) + 1; } - // CRITICAL: use first TOKEN as prefix (not regex match) + // CRITICAL: use first TOKEN as prefix (not regex) if (tokens.length > 0) { const firstToken = tokens[0]; prefixTokens[firstToken] = (prefixTokens[firstToken] || 0) + 1; } + + // NEW: count tokens by position — mirrors position_tokens[i][token] += 1 + tokens.forEach((token, i) => { + if (!positionTokens[i]) positionTokens[i] = {}; + positionTokens[i][token] = (positionTokens[i][token] || 0) + 1; + }); + } + + // Cap frequencies — mirrors .most_common(50) / .most_common(20) + const tokenFrequency = Object.fromEntries( + Object.entries(allTokens) + .sort((a, b) => b[1] - a[1]) + .slice(0, 50) + ); + const prefixFrequency = Object.fromEntries( + Object.entries(prefixTokens) + .sort((a, b) => b[1] - a[1]) + .slice(0, 20) + ); + + // Cap each position bucket at top 10 — mirrors .most_common(10) + const tokenPositions: Record> = {}; + for (const [pos, counter] of Object.entries(positionTokens)) { + tokenPositions[Number(pos)] = Object.fromEntries( + Object.entries(counter) + .sort((a, b) => b[1] - a[1]) + .slice(0, 10) + ); } const dominantPrefixes = findDominantPrefixes(prefixTokens, filenames.length); + const insights = generateInsights(allTokens, prefixTokens, dominantPrefixes); return { totalFiles: filenames.length, - tokenFrequency: allTokens, - prefixFrequency: prefixTokens, + tokenFrequency, + prefixFrequency, dominantPrefixes, + tokenPositions, // NEW + insights, // NEW + uniqueTokenCount: Object.keys(allTokens).length, // NEW + uniquePrefixCount: Object.keys(prefixTokens).length, // NEW + }; +}; + +// ───────────────────────────────────────────────────────────────────────────── +// build_llm_payload() +// Mirrors FilenamePatternAnalyzer.build_llm_payload() +// ───────────────────────────────────────────────────────────────────────────── + +export const buildLLMPayload = ( + filenames: string[], + userHints: Record, + maxSamples: number = 30 +): LLMPayload => { + const stats = analyzeTokenStatistics(filenames); + const filenameSamples = sampleDiverseFilenames(filenames, maxSamples); + + return { + task: "subject_identification", + statistics: stats, + filenameSamples, + userHints, + instructions: + "Analyze the filename token statistics and samples. " + + "Determine how to group files by subject. " + + "The 'dominant_prefixes' may indicate subject identifiers. " + + "The 'insights' provide observations. " + + "User hint 'n_subjects' can help validate your hypothesis.", + }; +}; + +/** + * Analyze token statistics across all filenames. + * Mirrors FilenamePatternAnalyzer.analyze_token_statistics() + */ +// export const analyzeTokenStatistics = ( +// filenames: string[] +// ): TokenStatistics => { +// const allTokens: Record = {}; +// const prefixTokens: Record = {}; // first token only + +// for (const filename of filenames) { +// // Extract just filename from path +// const fname = filename.includes("/") +// ? filename.split("/").pop()! +// : filename; + +// const tokens = tokenizeFilename(fname); + +// // Count all tokens +// for (const token of tokens) { +// allTokens[token] = (allTokens[token] || 0) + 1; +// } + +// // CRITICAL: use first TOKEN as prefix (not regex match) +// if (tokens.length > 0) { +// const firstToken = tokens[0]; +// prefixTokens[firstToken] = (prefixTokens[firstToken] || 0) + 1; +// } +// } + +// const dominantPrefixes = findDominantPrefixes(prefixTokens, filenames.length); + +// return { +// totalFiles: filenames.length, +// tokenFrequency: allTokens, +// prefixFrequency: prefixTokens, +// dominantPrefixes, +// }; +// }; + +/* + * Find dominant prefixes — tokens appearing in >5% of files + * that are not common words. + * Mirrors FilenamePatternAnalyzer._find_dominant_prefixes() + */ +// const findDominantPrefixes = ( +// prefixCounter: Record, +// totalFiles: number +// ): DominantPrefix[] => { +// const threshold = totalFiles * 0.05; // 5% threshold + +// return Object.entries(prefixCounter) +// .filter(([prefix, count]) => { +// if (count < threshold) return false; +// if (COMMON_WORDS.has(prefix.toLowerCase())) return false; +// return true; +// }) +// .sort((a, b) => b[1] - a[1]) +// .slice(0, 20) +// .map(([prefix, count]) => ({ +// prefix, +// count, +// percentage: Math.round((count / totalFiles) * 1000) / 10, +// })); +// }; + +// ============================================================================ +// Integration Functions +// Mirrors analyze_filenames_for_subjects() + _generate_recommendation() in filename_tokenizer.py +// ============================================================================ + +export interface FilenameAnalysisResult { + python_statistics: TokenStatistics; + llm_payload: LLMPayload; + confidence: "high" | "medium" | "low" | "none"; + recommendation: string; +} + +/** + * Main entry point: analyze filenames to detect subject groupings. + * Mirrors analyze_filenames_for_subjects() in filename_tokenizer.py + * + * Called from buildEvidenceBundle() in llmHelpers.ts — replaces the + * manual filenameAnalysis block that was built inline there. + */ +export const analyzeFilenamesForSubjects = ( + allFiles: string[], + userHints: Record +): FilenameAnalysisResult => { + // Mirror Python: extract just filenames, not full paths + const filenames = allFiles.map((f) => + f.includes("/") ? f.split("/").pop()! : f + ); + + const stats = analyzeTokenStatistics(filenames); + const llmPayload = buildLLMPayload(filenames, userHints, 30); + + // Assess confidence — mirrors Python confidence logic exactly + const dominantCount = stats.dominantPrefixes.length; + const userNSubjects: number | null = userHints?.n_subjects ?? null; + + let confidence: "high" | "medium" | "low" | "none" = "none"; + if (dominantCount > 0) { + if (userNSubjects && dominantCount === userNSubjects) { + confidence = "high"; + } else if (dominantCount >= 2 && dominantCount <= 10) { + confidence = "medium"; + } else { + confidence = "low"; + } + } + + const recommendation = generateRecommendation(stats, userHints); + + return { + python_statistics: stats, + llm_payload: llmPayload, + confidence, + recommendation, }; }; +/** + * Mirrors _generate_recommendation() in filename_tokenizer.py + */ +const generateRecommendation = ( + stats: TokenStatistics, + userHints: Record +): string => { + const dominantPrefixes = stats.dominantPrefixes; + const userNSubjects: number | null = userHints?.n_subjects ?? null; + + if (dominantPrefixes.length === 0) { + return ( + "No clear filename patterns detected. " + + "Recommend using --describe to explain subject identification." + ); + } + + if (userNSubjects && dominantPrefixes.length === userNSubjects) { + const prefixesStr = dominantPrefixes.map((p) => p.prefix).join(", "); + return ( + `HIGH CONFIDENCE: Detected ${dominantPrefixes.length} dominant prefixes ` + + `(${prefixesStr}) matching user hint of ${userNSubjects} subjects.` + ); + } + + if (dominantPrefixes.length >= 2 && dominantPrefixes.length <= 5) { + return ( + `MEDIUM CONFIDENCE: Detected ${dominantPrefixes.length} potential subject groups. ` + + `Will send to LLM for validation.` + ); + } + + return ( + `LOW CONFIDENCE: Found ${dominantPrefixes.length} prefix patterns, ` + + `which may or may not represent subjects. LLM will analyze.` + ); +}; + +// ============================================================================ +// SubjectGroupingDecision +// Mirrors SubjectGroupingDecision class in filename_tokenizer.py +// Not used in runtime flow — used as typed helpers when parsing LLM responses +// ============================================================================ + +export interface PrefixMappingDecision { + method: "prefix_based"; + description: string; + rules: Array<{ + prefix: string; + maps_to_subject: string; + match_pattern: string; + }>; + participant_metadata: Record>; +} + +export interface SequentialAssignmentDecision { + method: "sequential"; + n_subjects: number; + note: string; +} + +export interface BlockingQuestionDecision { + method: "blocked"; + reason: string; + question: { + type: string; + severity: string; + message: string; + options: string[]; + }; +} + +export type SubjectGroupingDecision = + | PrefixMappingDecision + | SequentialAssignmentDecision + | BlockingQuestionDecision; + +/** + * Mirrors SubjectGroupingDecision.create_prefix_mapping() + */ +export const createPrefixMapping = ( + prefixToSubject: Record, + metadata?: Record> +): PrefixMappingDecision => ({ + method: "prefix_based", + description: `Files grouped by ${ + Object.keys(prefixToSubject).length + } filename prefixes`, + rules: Object.entries(prefixToSubject).map(([prefix, subjId]) => ({ + prefix, + maps_to_subject: subjId, + match_pattern: `${prefix}*`, + })), + participant_metadata: metadata ?? {}, +}); + +/** + * Mirrors SubjectGroupingDecision.create_sequential_assignment() + */ +export const createSequentialAssignment = ( + nSubjects: number +): SequentialAssignmentDecision => ({ + method: "sequential", + n_subjects: nSubjects, + note: + "No clear subject grouping pattern detected in filenames. " + + "Assigning sequential IDs based on file order or user hint.", +}); + +/** + * Mirrors SubjectGroupingDecision.create_blocking_question() + */ +export const createBlockingQuestion = ( + reason: string, + options: string[] +): BlockingQuestionDecision => ({ + method: "blocked", + reason, + question: { + type: "subject_grouping", + severity: "block", + message: reason, + options, + }, +}); + // ============================================================================ // extractSubjectAnalysis — mirrors build_bids_plan()'s subject extraction // ============================================================================ @@ -495,7 +907,6 @@ const extractFromDirectoryStructure = ( }; }; -const DATA_EXTENSIONS = /\.(snirf|nii|nii\.gz|dcm|mat|nirs|h5|hdf5|edf|bdf)$/i; const TRIO_FILENAMES = new Set([ "dataset_description.json", "participants.tsv", diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts new file mode 100644 index 0000000..e69de29 From edb2a8f9b4291eb30e91a79dbefad237e123378b Mon Sep 17 00:00:00 2001 From: elainefan331 Date: Fri, 3 Apr 2026 17:40:39 -0400 Subject: [PATCH 5/5] feat: reorganize the code for integrated autobidsify --- package.json | 1 + .../Dashboard/DatasetOrganizer/LLMPanel.tsx | 1384 ++++++++--------- .../DatasetOrganizer/utils/fileAnalyzers.ts | 205 ++- .../DatasetOrganizer/utils/fileProcessors.ts | 68 +- .../utils/filenameTokenizer.ts | 588 ------- .../Dashboard/DatasetOrganizer/utils/llm.ts | 963 ++++++++++++ .../DatasetOrganizer/utils/llmHelpers.ts | 983 +++++++----- .../DatasetOrganizer/utils/llmPrompts.ts | 619 +------- .../DatasetOrganizer/utils/plannerHelpers.ts | 870 +++++++++++ .../DatasetOrganizer/utils/trioHelpers.ts | 762 +++++++++ src/services/ollama.service.ts | 17 +- yarn.lock | 5 + 12 files changed, 4221 insertions(+), 2244 deletions(-) create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/llm.ts create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/trioHelpers.ts diff --git a/package.json b/package.json index 1f8144e..6f414ef 100644 --- a/package.json +++ b/package.json @@ -63,6 +63,7 @@ "devDependencies": { "@babel/plugin-proposal-private-property-in-object": "^7.21.11", "@trivago/prettier-plugin-sort-imports": "^4.2.0", + "@types/js-yaml": "^4.0.9", "@types/node": "^20.5.7", "@types/pako": "^2.0.3", "@typescript-eslint/eslint-plugin": "^5.31.0", diff --git a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx index a8fd052..9658ec6 100644 --- a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx +++ b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx @@ -1,23 +1,12 @@ import { generateId } from "./utils/fileProcessors"; -import { extractSubjectAnalysis } from "./utils/filenameTokenizer"; -//add +import { LLMConfig } from "./utils/llm"; import { - buildFileSummary, - analyzeFilePatterns, - getUserContext, - getFileAnnotations, - downloadJSON, buildEvidenceBundle, - extractSubjectsFromFiles, buildIngestInfo, + downloadJSON, } from "./utils/llmHelpers"; -import { - getDatasetDescriptionPrompt, - getReadmePrompt, - getParticipantsPrompt, - getConversionScriptPrompt, - getBIDSPlanPrompt, -} from "./utils/llmPrompts"; +import { buildBidsPlan } from "./utils/plannerHelpers"; +import { generateTrioFiles } from "./utils/trioHelpers"; import { Close, ContentCopy, @@ -40,10 +29,12 @@ import { Alert, } from "@mui/material"; import { Colors } from "design/theme"; +import { dump as yamlDump } from "js-yaml"; import JSZip from "jszip"; import React, { useState, useEffect } from "react"; import { FileItem } from "redux/projects/types/projects.interface"; -import { OllamaService } from "services/ollama.service"; + +// import { OllamaService } from "services/ollama.service"; interface LLMPanelProps { files: FileItem[]; @@ -158,6 +149,16 @@ const LLMPanel: React.FC = ({ const [panelHeight, setPanelHeight] = useState(450); const [isResizing, setIsResizing] = useState(false); + // Build LLMConfig for all helper calls — mirrors autobidsify CLI arg assembly + const buildLLMConfig = (): LLMConfig => ({ + provider, + model, + apiKey, + baseUrl: currentProvider.baseUrl, + isAnthropic: currentProvider.isAnthropic, + noApiKey: currentProvider.noApiKey, + }); + // ======================================================================== // BUTTON 1: GENERATE EVIDENCE BUNDLE // ======================================================================== @@ -202,406 +203,27 @@ const LLMPanel: React.FC = ({ setError("Please generate evidence bundle first"); return; } - if (!currentProvider.noApiKey && !apiKey.trim()) { setError("Please enter an API key"); return; } - // Create abort controller const controller = new AbortController(); setAbortController(controller); - setGeneratingTrio(true); setError(null); setStatus("Generating BIDS trio files..."); try { - const userText = evidenceBundle.user_hints.user_text || ""; - - // ========================================== - // Call 1: Generate dataset_description.json - // ========================================== - let datasetDesc: any; - if (evidenceBundle.trio_found?.["dataset_description.json"]) { - setStatus("1/3 dataset_description.json already exists, skipping..."); - const existing = files.find( - (f) => f.source === "user" && f.name === "dataset_description.json" - ); - datasetDesc = existing?.content ? JSON.parse(existing.content) : {}; - } else { - setStatus("1/3 Generating dataset_description.json..."); - const ddPrompt = getDatasetDescriptionPrompt(userText, evidenceBundle); - - let ddResponse; - if (currentProvider.isAnthropic) { - ddResponse = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - "x-api-key": apiKey, - "anthropic-version": "2023-06-01", - }, - body: JSON.stringify({ - model, - max_tokens: 2048, - messages: [{ role: "user", content: ddPrompt }], - }), - }); - } else if (provider === "ollama") { - // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434"; - // ddResponse = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, { - // method: "POST", - // signal: controller.signal, - // headers: { "Content-Type": "application/json" }, - // body: JSON.stringify({ - // model, - // messages: [{ role: "user", content: ddPrompt }], - // stream: false, - // }), - // }); - ddResponse = await OllamaService.chat(model, [ - { role: "user", content: ddPrompt }, - ]); - } else { - ddResponse = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify({ - model, - messages: [{ role: "user", content: ddPrompt }], - max_tokens: 2048, - }), - }); - } - - // const ddData = await ddResponse.json(); - const ddData = - provider === "ollama" ? ddResponse : await ddResponse.json(); - let ddText = currentProvider.isAnthropic - ? ddData.content[0].text - : ddData.choices[0].message.content; - - // Clean up markdown fences - ddText = ddText - .replace(/^```json\n?/g, "") - .replace(/\n?```$/g, "") - .trim(); - datasetDesc = JSON.parse(ddText); - } - - // ========================================== - // Call 2: Generate README.md - // ========================================== - let readmeContent: string; - if (evidenceBundle.trio_found?.["README.md"]) { - setStatus("2/3 README.md already exists, skipping..."); - const existing = files.find( - (f) => - f.source === "user" && - ["README.md", "README.txt", "README.rst", "readme.md"].includes( - f.name - ) - ); - readmeContent = existing?.content || ""; - } else { - setStatus("2/3 Generating README.md..."); - const readmePrompt = getReadmePrompt(userText); - - let readmeResponse; - if (currentProvider.isAnthropic) { - readmeResponse = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - "x-api-key": apiKey, - "anthropic-version": "2023-06-01", - }, - body: JSON.stringify({ - model, - max_tokens: 2048, - messages: [{ role: "user", content: readmePrompt }], - }), - }); - } else if (provider === "ollama") { - // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434"; - // readmeResponse = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, { - // method: "POST", - // signal: controller.signal, - // headers: { "Content-Type": "application/json" }, - // body: JSON.stringify({ - // model, - // messages: [{ role: "user", content: readmePrompt }], - // stream: false, - // }), - // }); - readmeResponse = await OllamaService.chat(model, [ - { role: "user", content: readmePrompt }, - ]); - } else { - readmeResponse = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify({ - model, - messages: [{ role: "user", content: readmePrompt }], - max_tokens: 2048, - }), - }); - } - - // const readmeData = await readmeResponse.json(); - const readmeData = - provider === "ollama" ? readmeResponse : await readmeResponse.json(); - readmeContent = currentProvider.isAnthropic - ? readmeData.content[0].text - : readmeData.choices[0].message.content; - } - // ========================================== - // Call 3: Generate participants.tsv - // ========================================== - let participantsContent: string; - if (evidenceBundle.trio_found?.["participants.tsv"]) { - setStatus("3/3 participants.tsv already exists, skipping..."); - const existing = files.find( - (f) => f.source === "user" && f.name === "participants.tsv" - ); - participantsContent = existing?.content || ""; - } else { - setStatus("3/3 Generating participants.tsv..."); - const partsPrompt = getParticipantsPrompt(userText); - - // ← ADD HERE: compute subject analysis before try block so it's in scope - const currentSubjectAnalysis = extractSubjectAnalysis( - evidenceBundle?.all_files || [], - evidenceBundle?.user_hints?.n_subjects, - evidenceBundle?.filename_analysis?.python_statistics - ?.dominant_prefixes - ); + const { datasetDesc, readmeContent, participantsTsv, skipped } = + await generateTrioFiles({ + evidenceBundle, + files, + llmConfig: buildLLMConfig(), + signal: controller.signal, + onStatus: setStatus, + }); - console.log("=== PARTICIPANTS DEBUG ==="); - console.log("method:", currentSubjectAnalysis?.method); - console.log("subject_count:", currentSubjectAnalysis?.subject_count); - console.log( - "id_mapping:", - currentSubjectAnalysis?.id_mapping?.id_mapping - ); - console.log( - "reverse_mapping:", - currentSubjectAnalysis?.id_mapping?.reverse_mapping - ); - console.log( - "subject_records sample:", - currentSubjectAnalysis?.subject_records?.slice(0, 3) - ); - const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping; - const expectedCount = evidenceBundle?.user_hints?.n_subjects; - const subjectLabels: string[] = - idMap && - Object.keys(idMap).length > 0 && - (!expectedCount || Object.keys(idMap).length === expectedCount) - ? Object.values(idMap).map((id: string) => `sub-${id}`) - : Array.from( - { - length: expectedCount || Object.keys(idMap || {}).length || 1, - }, - (_, i) => `sub-${String(i + 1).padStart(2, "0")}` - ); - - let partsResponse; - if (currentProvider.isAnthropic) { - partsResponse = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - "x-api-key": apiKey, - "anthropic-version": "2023-06-01", - }, - body: JSON.stringify({ - model, - max_tokens: 1024, - messages: [{ role: "user", content: partsPrompt }], - }), - }); - } else if (provider === "ollama") { - // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434"; - // partsResponse = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, { - // method: "POST", - // signal: controller.signal, - // headers: { "Content-Type": "application/json" }, - // body: JSON.stringify({ - // model, - // messages: [{ role: "user", content: partsPrompt }], - // stream: false, - // }), - // }); - partsResponse = await OllamaService.chat(model, [ - { role: "user", content: partsPrompt }, - ]); - } else { - partsResponse = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify({ - model, - messages: [{ role: "user", content: partsPrompt }], - max_tokens: 1024, - }), - }); - } - - // const partsData = await partsResponse.json(); - const partsData = - provider === "ollama" ? partsResponse : await partsResponse.json(); - const participantsRaw = currentProvider.isAnthropic - ? partsData.content[0].text - : partsData.choices[0].message.content; - - // Build TSV from schema - // try { - // const schemaText = participantsRaw - // .replace(/^```json\n?/g, "") - // .replace(/\n?```$/g, "") - // .trim(); - // const schema = JSON.parse(schemaText); - // const columns: string[] = schema.columns.map((c: any) => c.name); - - // // Get subject IDs from evidence bundle (extracted by Python-style analysis) - // // const idMapping = - // // evidenceBundle?.subject_analysis?.id_mapping?.id_mapping; - // // const subjectLabels: string[] = idMapping - // // ? Object.values(idMapping).map((id) => `sub-${id}`) - // // : ["sub-01"]; // fallback if no subject analysis - // // Get subject IDs from subjectAnalysis state (computed at plan stage) - // // Fall back to computing fresh if plan hasn't been run yet - // const currentSubjectAnalysis = - // subjectAnalysis || - // extractSubjectAnalysis( - // evidenceBundle?.all_files || [], - // evidenceBundle?.user_hints?.n_subjects, - // evidenceBundle?.filename_analysis?.python_statistics - // ?.dominant_prefixes - // ); - // const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping; - // const subjectLabels: string[] = - // idMap && Object.keys(idMap).length > 0 - // ? Object.values(idMap).map((id) => `sub-${id}`) - // : Array.from( - // { length: evidenceBundle?.user_hints?.n_subjects || 1 }, - // (_, i) => `sub-${String(i + 1).padStart(2, "0")}` - // ); - - // const header = columns.join("\t"); - // // ====origin==== - // // const rows = subjectLabels.map((subId) => - // // columns - // // .map((col: string) => (col === "participant_id" ? subId : "n/a")) - // // .join("\t") - // // ); - // //====== end ====== - // // =====update start===== - // const reverseMap = - // currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; - // const subjectRecords = currentSubjectAnalysis?.subject_records || []; - - // const rows = subjectLabels.map((subId) => { - // const bareId = subId.replace(/^sub-/, ""); - // const originalId = reverseMap[bareId]; - // const record = subjectRecords.find( - // (r: any) => r.original_id === originalId - // ); - // return columns - // .map((col: string) => { - // if (col === "participant_id") return subId; - // if (col === "original_id") return originalId || "n/a"; - // if (col === "group") return (record as any)?.group || "n/a"; - // return "n/a"; - // }) - // .join("\t"); - // }); - // //====update end====== - // participantsContent = [header, ...rows].join("\n"); - // } catch (e) { - // // Fallback: LLM didn't return valid JSON schema, use raw content - // participantsContent = participantsRaw - // .replace(/^```\n?/g, "") - // .replace(/\n?```$/g, "") - // .trim(); - // } - // Build TSV from schema + subject analysis - // Mirrors _generate_participants_tsv_from_python() in planner.py - try { - const schemaText = participantsRaw - .replace(/^```json\n?/g, "") - .replace(/\n?```$/g, "") - .trim(); - const schema = JSON.parse(schemaText); - - // LLM decides extra demographic columns (sex, age, group etc.) - // but we always add participant_id and original_id ourselves - const extraColumns: string[] = schema.columns - .map((c: any) => c.name) - .filter( - (name: string) => - name !== "participant_id" && name !== "original_id" - ); - - // Always start with participant_id and original_id - const columns = ["participant_id", "original_id", ...extraColumns]; - - const reverseMap = - currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; - const subjectRecords = currentSubjectAnalysis?.subject_records || []; - - const header = columns.join("\t"); - const rows = subjectLabels.map((subId) => { - const bareId = subId.replace(/^sub-/, ""); - const originalId = reverseMap[bareId] || "n/a"; - const record = subjectRecords.find( - (r: any) => r.original_id === originalId - ); - return columns - .map((col: string) => { - if (col === "participant_id") return subId; - if (col === "original_id") return originalId; - if (col === "group") return (record as any)?.group || "n/a"; - return "n/a"; - }) - .join("\t"); - }); - - participantsContent = [header, ...rows].join("\n"); - } catch (e) { - // Fallback: generate minimal TSV directly from subject analysis - const reverseMap = - currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; - const header = "participant_id\toriginal_id"; - const rows = subjectLabels.map((subId) => { - const bareId = subId.replace(/^sub-/, ""); - const originalId = reverseMap[bareId] || "n/a"; - return `${subId}\t${originalId}`; - }); - participantsContent = [header, ...rows].join("\n"); - } - } - // ========================================== - // Add trio files to Virtual File System - // ========================================== const timestamp = new Date().toLocaleString(); const trioFiles: FileItem[] = [ { @@ -621,10 +243,7 @@ const LLMPanel: React.FC = ({ name: "README.md", type: "file", fileType: "meta", - content: readmeContent - .replace(/^```markdown\n?/g, "") - .replace(/\n?```$/g, "") - .trim(), + content: readmeContent, contentType: "text", isUserMeta: true, parentId: null, @@ -636,10 +255,7 @@ const LLMPanel: React.FC = ({ name: "participants.tsv", type: "file", fileType: "meta", - content: participantsContent - .replace(/^```\n?/g, "") - .replace(/\n?```$/g, "") - .trim(), + content: participantsTsv, contentType: "text", isUserMeta: true, parentId: null, @@ -647,32 +263,27 @@ const LLMPanel: React.FC = ({ generatedAt: timestamp, }, ]; - // replace existing trio files, add if not exist + updateFiles((prev) => { const trioNames = [ "dataset_description.json", "README.md", "participants.tsv", ]; - - // Remove old AI generated trio files const withoutOldTrio = prev.filter( (f) => !(f.source === "ai" && trioNames.includes(f.name)) ); - - // Add new trio files - // return [...withoutOldTrio, ...trioFiles]; - - // Only add AI-generated files for ones that weren't user-uploaded - const newTrioFiles = trioFiles.filter( - (tf) => - !evidenceBundle.trio_found?.[ - tf.name as keyof typeof evidenceBundle.trio_found - ] - ); - + // Only add AI files for ones that weren't user-uploaded (skipped=true means user-uploaded) + const newTrioFiles = trioFiles.filter((tf) => { + if (tf.name === "dataset_description.json") + return !skipped.datasetDesc; + if (tf.name === "README.md") return !skipped.readme; + if (tf.name === "participants.tsv") return !skipped.participants; + return true; + }); return [...withoutOldTrio, ...newTrioFiles]; }); + setTrioGenerated(true); setStatus( "✓ BIDS trio files generated and added to Virtual File System!" @@ -686,9 +297,469 @@ const LLMPanel: React.FC = ({ } } finally { setGeneratingTrio(false); - setAbortController(null); // Clear controller + setAbortController(null); } }; + // const handleGenerateTrio = async () => { + // if (!evidenceBundle) { + // setError("Please generate evidence bundle first"); + // return; + // } + + // if (!currentProvider.noApiKey && !apiKey.trim()) { + // setError("Please enter an API key"); + // return; + // } + + // // Create abort controller + // const controller = new AbortController(); + // setAbortController(controller); + + // setGeneratingTrio(true); + // setError(null); + // setStatus("Generating BIDS trio files..."); + + // try { + // const userText = evidenceBundle.user_hints.user_text || ""; + + // // ========================================== + // // Call 1: Generate dataset_description.json + // // ========================================== + // let datasetDesc: any; + // if (evidenceBundle.trio_found?.["dataset_description.json"]) { + // setStatus("1/3 dataset_description.json already exists, skipping..."); + // const existing = files.find( + // (f) => f.source === "user" && f.name === "dataset_description.json" + // ); + // datasetDesc = existing?.content ? JSON.parse(existing.content) : {}; + // } else { + // setStatus("1/3 Generating dataset_description.json..."); + // const ddPrompt = getDatasetDescriptionPrompt(userText, evidenceBundle); + + // let ddResponse; + // if (currentProvider.isAnthropic) { + // ddResponse = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // "x-api-key": apiKey, + // "anthropic-version": "2023-06-01", + // }, + // body: JSON.stringify({ + // model, + // max_tokens: 2048, + // messages: [{ role: "user", content: ddPrompt }], + // }), + // }); + // } else if (provider === "ollama") { + + // ddResponse = await OllamaService.chat(model, [ + // { role: "user", content: ddPrompt }, + // ]); + // } else { + // ddResponse = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // Authorization: `Bearer ${apiKey}`, + // }, + // body: JSON.stringify({ + // model, + // messages: [{ role: "user", content: ddPrompt }], + // max_tokens: 2048, + // }), + // }); + // } + + // // const ddData = await ddResponse.json(); + // const ddData = + // provider === "ollama" ? ddResponse : await ddResponse.json(); + // let ddText = currentProvider.isAnthropic + // ? ddData.content[0].text + // : ddData.choices[0].message.content; + + // // Clean up markdown fences + // ddText = ddText + // .replace(/^```json\n?/g, "") + // .replace(/\n?```$/g, "") + // .trim(); + // datasetDesc = JSON.parse(ddText); + // } + + // // ========================================== + // // Call 2: Generate README.md + // // ========================================== + // let readmeContent: string; + // if (evidenceBundle.trio_found?.["README.md"]) { + // setStatus("2/3 README.md already exists, skipping..."); + // const existing = files.find( + // (f) => + // f.source === "user" && + // ["README.md", "README.txt", "README.rst", "readme.md"].includes( + // f.name + // ) + // ); + // readmeContent = existing?.content || ""; + // } else { + // setStatus("2/3 Generating README.md..."); + // const readmePrompt = getReadmePrompt(userText); + + // let readmeResponse; + // if (currentProvider.isAnthropic) { + // readmeResponse = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // "x-api-key": apiKey, + // "anthropic-version": "2023-06-01", + // }, + // body: JSON.stringify({ + // model, + // max_tokens: 2048, + // messages: [{ role: "user", content: readmePrompt }], + // }), + // }); + // } else if (provider === "ollama") { + + // readmeResponse = await OllamaService.chat(model, [ + // { role: "user", content: readmePrompt }, + // ]); + // } else { + // readmeResponse = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // Authorization: `Bearer ${apiKey}`, + // }, + // body: JSON.stringify({ + // model, + // messages: [{ role: "user", content: readmePrompt }], + // max_tokens: 2048, + // }), + // }); + // } + + // const readmeData = + // provider === "ollama" ? readmeResponse : await readmeResponse.json(); + // readmeContent = currentProvider.isAnthropic + // ? readmeData.content[0].text + // : readmeData.choices[0].message.content; + // } + // // ========================================== + // // Call 3: Generate participants.tsv + // // ========================================== + // let participantsContent: string; + // if (evidenceBundle.trio_found?.["participants.tsv"]) { + // setStatus("3/3 participants.tsv already exists, skipping..."); + // const existing = files.find( + // (f) => f.source === "user" && f.name === "participants.tsv" + // ); + // participantsContent = existing?.content || ""; + // } else { + // setStatus("3/3 Generating participants.tsv..."); + // const partsPrompt = getParticipantsPrompt(userText); + + // const currentSubjectAnalysis = extractSubjectAnalysis( + // evidenceBundle?.all_files || [], + // evidenceBundle?.user_hints?.n_subjects, + // evidenceBundle?.filename_analysis?.python_statistics + // ?.dominant_prefixes + // ); + + // console.log("=== PARTICIPANTS DEBUG ==="); + // console.log("method:", currentSubjectAnalysis?.method); + // console.log("subject_count:", currentSubjectAnalysis?.subject_count); + // console.log( + // "id_mapping:", + // currentSubjectAnalysis?.id_mapping?.id_mapping + // ); + // console.log( + // "reverse_mapping:", + // currentSubjectAnalysis?.id_mapping?.reverse_mapping + // ); + // console.log( + // "subject_records sample:", + // currentSubjectAnalysis?.subject_records?.slice(0, 3) + // ); + // const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping; + // const expectedCount = evidenceBundle?.user_hints?.n_subjects; + // const subjectLabels: string[] = + // idMap && + // Object.keys(idMap).length > 0 && + // (!expectedCount || Object.keys(idMap).length === expectedCount) + // ? Object.values(idMap).map((id: string) => `sub-${id}`) + // : Array.from( + // { + // length: expectedCount || Object.keys(idMap || {}).length || 1, + // }, + // (_, i) => `sub-${String(i + 1).padStart(2, "0")}` + // ); + + // let partsResponse; + // if (currentProvider.isAnthropic) { + // partsResponse = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // "x-api-key": apiKey, + // "anthropic-version": "2023-06-01", + // }, + // body: JSON.stringify({ + // model, + // max_tokens: 1024, + // messages: [{ role: "user", content: partsPrompt }], + // }), + // }); + // } else if (provider === "ollama") { + + // partsResponse = await OllamaService.chat(model, [ + // { role: "user", content: partsPrompt }, + // ]); + // } else { + // partsResponse = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // Authorization: `Bearer ${apiKey}`, + // }, + // body: JSON.stringify({ + // model, + // messages: [{ role: "user", content: partsPrompt }], + // max_tokens: 1024, + // }), + // }); + // } + + // // const partsData = await partsResponse.json(); + // const partsData = + // provider === "ollama" ? partsResponse : await partsResponse.json(); + // const participantsRaw = currentProvider.isAnthropic + // ? partsData.content[0].text + // : partsData.choices[0].message.content; + + // // Build TSV from schema + // // try { + // // const schemaText = participantsRaw + // // .replace(/^```json\n?/g, "") + // // .replace(/\n?```$/g, "") + // // .trim(); + // // const schema = JSON.parse(schemaText); + // // const columns: string[] = schema.columns.map((c: any) => c.name); + + // // // Get subject IDs from evidence bundle (extracted by Python-style analysis) + // // // const idMapping = + // // // evidenceBundle?.subject_analysis?.id_mapping?.id_mapping; + // // // const subjectLabels: string[] = idMapping + // // // ? Object.values(idMapping).map((id) => `sub-${id}`) + // // // : ["sub-01"]; // fallback if no subject analysis + // // // Get subject IDs from subjectAnalysis state (computed at plan stage) + // // // Fall back to computing fresh if plan hasn't been run yet + // // const currentSubjectAnalysis = + // // subjectAnalysis || + // // extractSubjectAnalysis( + // // evidenceBundle?.all_files || [], + // // evidenceBundle?.user_hints?.n_subjects, + // // evidenceBundle?.filename_analysis?.python_statistics + // // ?.dominant_prefixes + // // ); + // // const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping; + // // const subjectLabels: string[] = + // // idMap && Object.keys(idMap).length > 0 + // // ? Object.values(idMap).map((id) => `sub-${id}`) + // // : Array.from( + // // { length: evidenceBundle?.user_hints?.n_subjects || 1 }, + // // (_, i) => `sub-${String(i + 1).padStart(2, "0")}` + // // ); + + // // const header = columns.join("\t"); + // // // ====origin==== + // // // const rows = subjectLabels.map((subId) => + // // // columns + // // // .map((col: string) => (col === "participant_id" ? subId : "n/a")) + // // // .join("\t") + // // // ); + // // //====== end ====== + // // // =====update start===== + // // const reverseMap = + // // currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; + // // const subjectRecords = currentSubjectAnalysis?.subject_records || []; + + // // const rows = subjectLabels.map((subId) => { + // // const bareId = subId.replace(/^sub-/, ""); + // // const originalId = reverseMap[bareId]; + // // const record = subjectRecords.find( + // // (r: any) => r.original_id === originalId + // // ); + // // return columns + // // .map((col: string) => { + // // if (col === "participant_id") return subId; + // // if (col === "original_id") return originalId || "n/a"; + // // if (col === "group") return (record as any)?.group || "n/a"; + // // return "n/a"; + // // }) + // // .join("\t"); + // // }); + // // //====update end====== + // // participantsContent = [header, ...rows].join("\n"); + // // } catch (e) { + // // // Fallback: LLM didn't return valid JSON schema, use raw content + // // participantsContent = participantsRaw + // // .replace(/^```\n?/g, "") + // // .replace(/\n?```$/g, "") + // // .trim(); + // // } + // // Build TSV from schema + subject analysis + // // Mirrors _generate_participants_tsv_from_python() in planner.py + // try { + // const schemaText = participantsRaw + // .replace(/^```json\n?/g, "") + // .replace(/\n?```$/g, "") + // .trim(); + // const schema = JSON.parse(schemaText); + + // // LLM decides extra demographic columns (sex, age, group etc.) + // // but we always add participant_id and original_id ourselves + // const extraColumns: string[] = schema.columns + // .map((c: any) => c.name) + // .filter( + // (name: string) => + // name !== "participant_id" && name !== "original_id" + // ); + + // // Always start with participant_id and original_id + // const columns = ["participant_id", "original_id", ...extraColumns]; + + // const reverseMap = + // currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; + // const subjectRecords = currentSubjectAnalysis?.subject_records || []; + + // const header = columns.join("\t"); + // const rows = subjectLabels.map((subId) => { + // const bareId = subId.replace(/^sub-/, ""); + // const originalId = reverseMap[bareId] || "n/a"; + // const record = subjectRecords.find( + // (r: any) => r.original_id === originalId + // ); + // return columns + // .map((col: string) => { + // if (col === "participant_id") return subId; + // if (col === "original_id") return originalId; + // if (col === "group") return (record as any)?.group || "n/a"; + // return "n/a"; + // }) + // .join("\t"); + // }); + + // participantsContent = [header, ...rows].join("\n"); + // } catch (e) { + // // Fallback: generate minimal TSV directly from subject analysis + // const reverseMap = + // currentSubjectAnalysis?.id_mapping?.reverse_mapping || {}; + // const header = "participant_id\toriginal_id"; + // const rows = subjectLabels.map((subId) => { + // const bareId = subId.replace(/^sub-/, ""); + // const originalId = reverseMap[bareId] || "n/a"; + // return `${subId}\t${originalId}`; + // }); + // participantsContent = [header, ...rows].join("\n"); + // } + // } + // // ========================================== + // // Add trio files to Virtual File System + // // ========================================== + // const timestamp = new Date().toLocaleString(); + // const trioFiles: FileItem[] = [ + // { + // id: generateId(), + // name: "dataset_description.json", + // type: "file", + // fileType: "meta", + // content: JSON.stringify(datasetDesc, null, 2), + // contentType: "text", + // isUserMeta: true, + // parentId: null, + // source: "ai", + // generatedAt: timestamp, + // }, + // { + // id: generateId(), + // name: "README.md", + // type: "file", + // fileType: "meta", + // content: readmeContent + // .replace(/^```markdown\n?/g, "") + // .replace(/\n?```$/g, "") + // .trim(), + // contentType: "text", + // isUserMeta: true, + // parentId: null, + // source: "ai", + // generatedAt: timestamp, + // }, + // { + // id: generateId(), + // name: "participants.tsv", + // type: "file", + // fileType: "meta", + // content: participantsContent + // .replace(/^```\n?/g, "") + // .replace(/\n?```$/g, "") + // .trim(), + // contentType: "text", + // isUserMeta: true, + // parentId: null, + // source: "ai", + // generatedAt: timestamp, + // }, + // ]; + // // replace existing trio files, add if not exist + // updateFiles((prev) => { + // const trioNames = [ + // "dataset_description.json", + // "README.md", + // "participants.tsv", + // ]; + + // // Remove old AI generated trio files + // const withoutOldTrio = prev.filter( + // (f) => !(f.source === "ai" && trioNames.includes(f.name)) + // ); + + // // Add new trio files + // // return [...withoutOldTrio, ...trioFiles]; + + // // Only add AI-generated files for ones that weren't user-uploaded + // const newTrioFiles = trioFiles.filter( + // (tf) => + // !evidenceBundle.trio_found?.[ + // tf.name as keyof typeof evidenceBundle.trio_found + // ] + // ); + + // return [...withoutOldTrio, ...newTrioFiles]; + // }); + // setTrioGenerated(true); + // setStatus( + // "✓ BIDS trio files generated and added to Virtual File System!" + // ); + // } catch (err: any) { + // if (err.name === "AbortError") { + // setStatus("❌ Generation cancelled"); + // } else { + // setError(err.message || "Failed to generate trio files"); + // setStatus("❌ Error generating trio files"); + // } + // } finally { + // setGeneratingTrio(false); + // setAbortController(null); // Clear controller + // } + // }; const handleMouseDown = (e: React.MouseEvent) => { setIsResizing(true); @@ -725,312 +796,222 @@ const LLMPanel: React.FC = ({ const currentProvider = llmProviders[provider]; - const handleGenerate = async () => { + const handleGeneratePlan = async () => { if (!currentProvider.noApiKey && !apiKey.trim()) { setError("Please enter an API key"); return; } - if (!baseDirectoryPath.trim()) { setError("Please enter a base directory path"); return; } - // Create abort controller const controller = new AbortController(); setAbortController(controller); - setLoading(true); setError(null); - setStatus(`Generating script using ${currentProvider.name}...`); - - const fileSummary = buildFileSummary(files); - const filePatterns = analyzeFilePatterns(files); - const userContext = getUserContext(files); - const annotations = getFileAnnotations(files); - - // UPDATED: Improved prompt that uses trio files - const prompt = getConversionScriptPrompt( - baseDirectoryPath, - fileSummary, - filePatterns, - userContext, - annotations - ); + setStatus(`Generating BIDSPlan.yaml using ${currentProvider.name}...`); try { - let response; - - if (provider === "ollama") { - // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434"; - // response = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, { - // method: "POST", - // signal: controller.signal, - // headers: { - // "Content-Type": "application/json", - // }, - // body: JSON.stringify({ - // model, - // messages: [ - // { - // role: "system", - // content: - // "You are a neuroimaging data expert specializing in BIDS format conversion. Output only Python code without markdown fences or explanations.", - // }, - // { role: "user", content: prompt }, - // ], - // stream: false, - // }), - // }); - response = await OllamaService.chat(model, [ - { - role: "system", - content: - "You are a neuroimaging data expert specializing in BIDS format conversion. Output only Python code without markdown fences or explanations.", - }, - { role: "user", content: prompt }, - ]); - } else if (currentProvider.isAnthropic) { - response = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - "x-api-key": apiKey, - "anthropic-version": "2023-06-01", - }, - body: JSON.stringify({ - model, - max_tokens: 4096, - messages: [{ role: "user", content: prompt }], - }), - }); - } else { - const headers: Record = { - "Content-Type": "application/json", - }; - - if (!currentProvider.noApiKey) { - headers["Authorization"] = `Bearer ${apiKey}`; - } + const { + planYaml, + subjectAnalysis: sa, + participantsTsv, + coverageWarnings, + } = await buildBidsPlan({ + evidenceBundle, + llmConfig: buildLLMConfig(), + signal: controller.signal, + onStatus: setStatus, + }); - response = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers, - body: JSON.stringify({ - model, - messages: [ - { - role: "system", - content: - "You are a neuroimaging data expert specializing in BIDS format conversion. Output only Python code without markdown fences or explanations.", - }, - { role: "user", content: prompt }, - ], - max_tokens: 4096, - temperature: 0.7, - }), + // Store subject analysis for ZIP packaging + setSubjectAnalysis(sa); + + // Dump final YAML string (planYaml is raw string from LLM, already cleaned) + setBidsPlan(planYaml); + + // Update participants.tsv in VFS with the full version from the plan stage + if (participantsTsv) { + const timestamp = new Date().toLocaleString(); + updateFiles((prev) => { + const withoutOld = prev.filter( + (f) => !(f.source === "ai" && f.name === "participants.tsv") + ); + return [ + ...withoutOld, + { + id: generateId(), + name: "participants.tsv", + type: "file" as const, + fileType: "meta", + content: participantsTsv, + contentType: "text", + isUserMeta: true, + parentId: null, + source: "ai" as const, + generatedAt: timestamp, + }, + ]; }); } - // const data = await response.json(); - const data = provider === "ollama" ? response : await response.json(); - - // if (!response.ok) { - // throw new Error(data.error?.message || "Failed to generate script"); - // } - if (!response.ok && provider !== "ollama") { - throw new Error(data.error?.message || "Failed to generate script"); + if (coverageWarnings.length > 0) { + setStatus( + `✓ BIDSPlan.yaml generated (${coverageWarnings.length} coverage warning(s) — check console)` + ); + } else { + setStatus(`✓ BIDSPlan.yaml generated using ${currentProvider.name}`); } - - // let script = ""; - // if (currentProvider.isAnthropic) { - // script = data.content[0].text; - // } else { - // script = data.choices[0].message.content; - // } - let script = currentProvider.isAnthropic - ? data.content[0].text - : data.choices[0].message.content; - - // Clean up markdown fences if AI included them anyway - script = script.replace(/^```python\n?/g, "").replace(/\n?```$/g, ""); - - setGeneratedScript(script); - setStatus(`✓ Script generated using ${currentProvider.name}`); } catch (err: any) { if (err.name === "AbortError") { setStatus("❌ Generation cancelled"); } else { - setError(err.message || "Failed to generate script"); - setStatus("❌ Error generating script"); + setError(err.message || "Failed to generate BIDSPlan"); + setStatus("❌ Error generating BIDSPlan"); } } finally { setLoading(false); - setAbortController(null); // Clear controller + setAbortController(null); } }; + // const handleGeneratePlan = async () => { + // if (!currentProvider.noApiKey && !apiKey.trim()) { + // setError("Please enter an API key"); + // return; + // } + // if (!baseDirectoryPath.trim()) { + // setError("Please enter a base directory path"); + // return; + // } - const handleGeneratePlan = async () => { - if (!currentProvider.noApiKey && !apiKey.trim()) { - setError("Please enter an API key"); - return; - } - if (!baseDirectoryPath.trim()) { - setError("Please enter a base directory path"); - return; - } - - const controller = new AbortController(); - setAbortController(controller); - setLoading(true); - setError(null); - setStatus(`Generating BIDSPlan.yaml using ${currentProvider.name}...`); - - // ── Compute subject analysis (mirrors planner.py Step 1) - const allFiles = evidenceBundle?.all_files || []; - const userNSubjects = evidenceBundle?.user_hints?.n_subjects; - const dominantPrefixes = - evidenceBundle?.filename_analysis?.python_statistics?.dominant_prefixes; - - const computedSubjectAnalysis = extractSubjectAnalysis( - allFiles, - userNSubjects, - dominantPrefixes - ); - - setSubjectAnalysis(computedSubjectAnalysis); - - const fileSummary = buildFileSummary(files); - const filePatterns = analyzeFilePatterns(files); - const userContext = getUserContext(files); - // const subjectInfo = extractSubjectsFromFiles(files); - const subjectInfo = computedSubjectAnalysis; - const sampleFiles = - evidenceBundle?.samples - ?.slice(0, 10) - .map((s: any) => ` - ${s.relpath}`) - .join("\n") || ""; - - const prompt = getBIDSPlanPrompt( - fileSummary, - filePatterns, - userContext, - { - subjects: Object.entries( - computedSubjectAnalysis.id_mapping.id_mapping - ).map(([originalId, bidsId]) => ({ originalId, bidsId })), - strategy: computedSubjectAnalysis.id_mapping.strategy_used, - }, - evidenceBundle?.counts_by_ext || {}, - sampleFiles, - evidenceBundle - ); - - try { - let response; - - if (provider === "ollama") { - // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434"; - // response = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, { - // method: "POST", - // signal: controller.signal, - // headers: { "Content-Type": "application/json" }, - // body: JSON.stringify({ - // model, - // messages: [ - // { - // role: "system", - // content: - // "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.", - // }, - // { role: "user", content: prompt }, - // ], - // stream: false, - // }), - // }); - response = await OllamaService.chat(model, [ - { - role: "system", - content: - "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.", - }, - { role: "user", content: prompt }, - ]); - } else if (currentProvider.isAnthropic) { - response = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - "x-api-key": apiKey, - "anthropic-version": "2023-06-01", - }, - body: JSON.stringify({ - model, - max_tokens: 2048, - messages: [{ role: "user", content: prompt }], - }), - }); - } else { - response = await fetch(currentProvider.baseUrl, { - method: "POST", - signal: controller.signal, - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${apiKey}`, - }, - body: JSON.stringify({ - model, - messages: [ - { - role: "system", - content: - "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.", - }, - { role: "user", content: prompt }, - ], - max_tokens: 2048, - temperature: 0.15, - }), - }); - } - - // const data = await response.json(); + // const controller = new AbortController(); + // setAbortController(controller); + // setLoading(true); + // setError(null); + // setStatus(`Generating BIDSPlan.yaml using ${currentProvider.name}...`); + + // // ── Compute subject analysis (mirrors planner.py Step 1) + // const allFiles = evidenceBundle?.all_files || []; + // const userNSubjects = evidenceBundle?.user_hints?.n_subjects; + // const dominantPrefixes = + // evidenceBundle?.filename_analysis?.python_statistics?.dominant_prefixes; + + // const computedSubjectAnalysis = extractSubjectAnalysis( + // allFiles, + // userNSubjects, + // dominantPrefixes + // ); - // if (!response.ok) { - // throw new Error(data.error?.message || "Failed to generate BIDSPlan"); - // } - const data = provider === "ollama" ? response : await response.json(); - if (!response.ok && provider !== "ollama") { - throw new Error(data.error?.message || "Failed to generate BIDSPlan"); - } + // setSubjectAnalysis(computedSubjectAnalysis); + + // const fileSummary = buildFileSummary(files); + // const filePatterns = analyzeFilePatterns(files); + // const userContext = getUserContext(files); + // // const subjectInfo = extractSubjectsFromFiles(files); + // const subjectInfo = computedSubjectAnalysis; + // const sampleFiles = + // evidenceBundle?.samples + // ?.slice(0, 10) + // .map((s: any) => ` - ${s.relpath}`) + // .join("\n") || ""; + + // const prompt = getBIDSPlanPrompt( + // fileSummary, + // filePatterns, + // userContext, + // { + // subjects: Object.entries( + // computedSubjectAnalysis.id_mapping.id_mapping + // ).map(([originalId, bidsId]) => ({ originalId, bidsId })), + // strategy: computedSubjectAnalysis.id_mapping.strategy_used, + // }, + // evidenceBundle?.counts_by_ext || {}, + // sampleFiles, + // evidenceBundle + // ); - let plan = currentProvider.isAnthropic - ? data.content[0].text - : data.choices[0].message.content; + // try { + // let response; + + // if (provider === "ollama") { + + // response = await OllamaService.chat(model, [ + // { + // role: "system", + // content: + // "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.", + // }, + // { role: "user", content: prompt }, + // ]); + // } else if (currentProvider.isAnthropic) { + // response = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // "x-api-key": apiKey, + // "anthropic-version": "2023-06-01", + // }, + // body: JSON.stringify({ + // model, + // max_tokens: 2048, + // messages: [{ role: "user", content: prompt }], + // }), + // }); + // } else { + // response = await fetch(currentProvider.baseUrl, { + // method: "POST", + // signal: controller.signal, + // headers: { + // "Content-Type": "application/json", + // Authorization: `Bearer ${apiKey}`, + // }, + // body: JSON.stringify({ + // model, + // messages: [ + // { + // role: "system", + // content: + // "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.", + // }, + // { role: "user", content: prompt }, + // ], + // max_tokens: 2048, + // temperature: 0.15, + // }), + // }); + // } - // Clean up markdown fences if present - plan = plan - .replace(/^```yaml\n?/g, "") - .replace(/\n?```$/g, "") - .trim(); + // const data = provider === "ollama" ? response : await response.json(); + // if (!response.ok && provider !== "ollama") { + // throw new Error(data.error?.message || "Failed to generate BIDSPlan"); + // } - setBidsPlan(plan); - setStatus(`✓ BIDSPlan.yaml generated using ${currentProvider.name}`); - } catch (err: any) { - if (err.name === "AbortError") { - setStatus("❌ Generation cancelled"); - } else { - setError(err.message || "Failed to generate BIDSPlan"); - setStatus("❌ Error generating BIDSPlan"); - } - } finally { - setLoading(false); - setAbortController(null); - } - }; + // let plan = currentProvider.isAnthropic + // ? data.content[0].text + // : data.choices[0].message.content; + + // // Clean up markdown fences if present + // plan = plan + // .replace(/^```yaml\n?/g, "") + // .replace(/\n?```$/g, "") + // .trim(); + + // setBidsPlan(plan); + // setStatus(`✓ BIDSPlan.yaml generated using ${currentProvider.name}`); + // } catch (err: any) { + // if (err.name === "AbortError") { + // setStatus("❌ Generation cancelled"); + // } else { + // setError(err.message || "Failed to generate BIDSPlan"); + // setStatus("❌ Error generating BIDSPlan"); + // } + // } finally { + // setLoading(false); + // setAbortController(null); + // } + // }; const handleDownloadPlan = () => { const blob = new Blob([bidsPlan], { type: "text/yaml" }); @@ -1518,7 +1499,7 @@ const LLMPanel: React.FC = ({ )} - {/* = ({ size="small" multiline rows={2} - /> */} + sx={{ mb: 1 }} + />