From d62e6d1488a636510addfb786e095ae31d24b6bb Mon Sep 17 00:00:00 2001
From: elainefan331 <elainefan331@gmail.com>
Date: Wed, 1 Apr 2026 10:36:28 -0400
Subject: [PATCH 1/5] fix: increase token limit sent to LLM

---
 .../Dashboard/DatasetOrganizer/LLMPanel.tsx   | 197 ++++++++++++++----
 .../utils/filenameTokenizer.ts                | 116 ++++++++++-
 .../DatasetOrganizer/utils/llmPrompts.ts      |   6 +-
 3 files changed, 265 insertions(+), 54 deletions(-)
diff --git a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx
index d9d6366..a8fd052 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx
+++ b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx
@@ -184,6 +184,7 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
       });
 
       setEvidenceBundle(bundle);
+      setSubjectAnalysis(null); // ← add this line
       downloadJSON(bundle, "evidence_bundle.json");
       setStatus("✓ Evidence bundle generated and downloaded!");
     } catch (err: any) {
@@ -380,6 +381,43 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
         setStatus("3/3 Generating participants.tsv...");
         const partsPrompt = getParticipantsPrompt(userText);
 
+        // ← ADD HERE: compute subject analysis before try block so it's in scope
+        const currentSubjectAnalysis = extractSubjectAnalysis(
+          evidenceBundle?.all_files || [],
+          evidenceBundle?.user_hints?.n_subjects,
+          evidenceBundle?.filename_analysis?.python_statistics
+            ?.dominant_prefixes
+        );
+
+        console.log("=== PARTICIPANTS DEBUG ===");
+        console.log("method:", currentSubjectAnalysis?.method);
+        console.log("subject_count:", currentSubjectAnalysis?.subject_count);
+        console.log(
+          "id_mapping:",
+          currentSubjectAnalysis?.id_mapping?.id_mapping
+        );
+        console.log(
+          "reverse_mapping:",
+          currentSubjectAnalysis?.id_mapping?.reverse_mapping
+        );
+        console.log(
+          "subject_records sample:",
+          currentSubjectAnalysis?.subject_records?.slice(0, 3)
+        );
+        const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
+        const expectedCount = evidenceBundle?.user_hints?.n_subjects;
+        const subjectLabels: string[] =
+          idMap &&
+          Object.keys(idMap).length > 0 &&
+          (!expectedCount || Object.keys(idMap).length === expectedCount)
+            ? Object.values(idMap).map((id: string) => `sub-${id}`)
+            : Array.from(
+                {
+                  length: expectedCount || Object.keys(idMap || {}).length || 1,
+                },
+                (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
+              );
+
         let partsResponse;
         if (currentProvider.isAnthropic) {
           partsResponse = await fetch(currentProvider.baseUrl, {
@@ -435,52 +473,130 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
           : partsData.choices[0].message.content;
 
         // Build TSV from schema
+        // try {
+        //   const schemaText = participantsRaw
+        //     .replace(/^```json\n?/g, "")
+        //     .replace(/\n?```$/g, "")
+        //     .trim();
+        //   const schema = JSON.parse(schemaText);
+        //   const columns: string[] = schema.columns.map((c: any) => c.name);
+
+        //   // Get subject IDs from evidence bundle (extracted by Python-style analysis)
+        //   // const idMapping =
+        //   //   evidenceBundle?.subject_analysis?.id_mapping?.id_mapping;
+        //   // const subjectLabels: string[] = idMapping
+        //   //   ? Object.values(idMapping).map((id) => `sub-${id}`)
+        //   //   : ["sub-01"]; // fallback if no subject analysis
+        //   // Get subject IDs from subjectAnalysis state (computed at plan stage)
+        //   // Fall back to computing fresh if plan hasn't been run yet
+        //   const currentSubjectAnalysis =
+        //     subjectAnalysis ||
+        //     extractSubjectAnalysis(
+        //       evidenceBundle?.all_files || [],
+        //       evidenceBundle?.user_hints?.n_subjects,
+        //       evidenceBundle?.filename_analysis?.python_statistics
+        //         ?.dominant_prefixes
+        //     );
+        //   const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
+        //   const subjectLabels: string[] =
+        //     idMap && Object.keys(idMap).length > 0
+        //       ? Object.values(idMap).map((id) => `sub-${id}`)
+        //       : Array.from(
+        //           { length: evidenceBundle?.user_hints?.n_subjects || 1 },
+        //           (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
+        //         );
+
+        //   const header = columns.join("\t");
+        //   // ====origin====
+        //   // const rows = subjectLabels.map((subId) =>
+        //   //   columns
+        //   //     .map((col: string) => (col === "participant_id" ? subId : "n/a"))
+        //   //     .join("\t")
+        //   // );
+        //   //====== end ======
+        //   // =====update start=====
+        //   const reverseMap =
+        //     currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
+        //   const subjectRecords = currentSubjectAnalysis?.subject_records || [];
+
+        //   const rows = subjectLabels.map((subId) => {
+        //     const bareId = subId.replace(/^sub-/, "");
+        //     const originalId = reverseMap[bareId];
+        //     const record = subjectRecords.find(
+        //       (r: any) => r.original_id === originalId
+        //     );
+        //     return columns
+        //       .map((col: string) => {
+        //         if (col === "participant_id") return subId;
+        //         if (col === "original_id") return originalId || "n/a";
+        //         if (col === "group") return (record as any)?.group || "n/a";
+        //         return "n/a";
+        //       })
+        //       .join("\t");
+        //   });
+        //   //====update end======
+        //   participantsContent = [header, ...rows].join("\n");
+        // } catch (e) {
+        //   // Fallback: LLM didn't return valid JSON schema, use raw content
+        //   participantsContent = participantsRaw
+        //     .replace(/^```\n?/g, "")
+        //     .replace(/\n?```$/g, "")
+        //     .trim();
+        // }
+        // Build TSV from schema + subject analysis
+        // Mirrors _generate_participants_tsv_from_python() in planner.py
         try {
           const schemaText = participantsRaw
             .replace(/^```json\n?/g, "")
             .replace(/\n?```$/g, "")
             .trim();
           const schema = JSON.parse(schemaText);
-          const columns: string[] = schema.columns.map((c: any) => c.name);
-
-          // Get subject IDs from evidence bundle (extracted by Python-style analysis)
-          // const idMapping =
-          //   evidenceBundle?.subject_analysis?.id_mapping?.id_mapping;
-          // const subjectLabels: string[] = idMapping
-          //   ? Object.values(idMapping).map((id) => `sub-${id}`)
-          //   : ["sub-01"]; // fallback if no subject analysis
-          // Get subject IDs from subjectAnalysis state (computed at plan stage)
-          // Fall back to computing fresh if plan hasn't been run yet
-          const currentSubjectAnalysis =
-            subjectAnalysis ||
-            extractSubjectAnalysis(
-              evidenceBundle?.all_files || [],
-              evidenceBundle?.user_hints?.n_subjects,
-              evidenceBundle?.filename_analysis?.python_statistics
-                ?.dominant_prefixes
+
+          // LLM decides extra demographic columns (sex, age, group etc.)
+          // but we always add participant_id and original_id ourselves
+          const extraColumns: string[] = schema.columns
+            .map((c: any) => c.name)
+            .filter(
+              (name: string) =>
+                name !== "participant_id" && name !== "original_id"
             );
-          const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
-          const subjectLabels: string[] =
-            idMap && Object.keys(idMap).length > 0
-              ? Object.values(idMap).map((id) => `sub-${id}`)
-              : Array.from(
-                  { length: evidenceBundle?.user_hints?.n_subjects || 1 },
-                  (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
-                );
+
+          // Always start with participant_id and original_id
+          const columns = ["participant_id", "original_id", ...extraColumns];
+
+          const reverseMap =
+            currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
+          const subjectRecords = currentSubjectAnalysis?.subject_records || [];
 
           const header = columns.join("\t");
-          const rows = subjectLabels.map((subId) =>
-            columns
-              .map((col: string) => (col === "participant_id" ? subId : "n/a"))
-              .join("\t")
-          );
+          const rows = subjectLabels.map((subId) => {
+            const bareId = subId.replace(/^sub-/, "");
+            const originalId = reverseMap[bareId] || "n/a";
+            const record = subjectRecords.find(
+              (r: any) => r.original_id === originalId
+            );
+            return columns
+              .map((col: string) => {
+                if (col === "participant_id") return subId;
+                if (col === "original_id") return originalId;
+                if (col === "group") return (record as any)?.group || "n/a";
+                return "n/a";
+              })
+              .join("\t");
+          });
+
           participantsContent = [header, ...rows].join("\n");
         } catch (e) {
-          // Fallback: LLM didn't return valid JSON schema, use raw content
-          participantsContent = participantsRaw
-            .replace(/^```\n?/g, "")
-            .replace(/\n?```$/g, "")
-            .trim();
+          // Fallback: generate minimal TSV directly from subject analysis
+          const reverseMap =
+            currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
+          const header = "participant_id\toriginal_id";
+          const rows = subjectLabels.map((subId) => {
+            const bareId = subId.replace(/^sub-/, "");
+            const originalId = reverseMap[bareId] || "n/a";
+            return `${subId}\t${originalId}`;
+          });
+          participantsContent = [header, ...rows].join("\n");
         }
       }
       // ==========================================
@@ -632,11 +748,6 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
     const filePatterns = analyzeFilePatterns(files);
     const userContext = getUserContext(files);
     const annotations = getFileAnnotations(files);
-    // console.log("=== PROMPT BEING SENT TO LLM ===");
-    // console.log(fileSummary);
-    // console.log(filePatterns);
-    // console.log(userContext);
-    // console.log("=================================");
 
     // UPDATED: Improved prompt that uses trio files
     const prompt = getConversionScriptPrompt(
@@ -788,6 +899,7 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
       userNSubjects,
       dominantPrefixes
     );
+
     setSubjectAnalysis(computedSubjectAnalysis);
 
     const fileSummary = buildFileSummary(files);
@@ -801,11 +913,6 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
         .map((s: any) => `  - ${s.relpath}`)
         .join("\n") || "";
 
-    // console.log("=== SAMPLE FILES ===");
-    // console.log(sampleFiles);
-    // console.log("=== COUNTS BY EXT ===");
-    // console.log(evidenceBundle?.counts_by_ext);
-
     const prompt = getBIDSPlanPrompt(
       fileSummary,
       filePatterns,
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
index 1c1820b..4708c13 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
@@ -7,6 +7,7 @@ export interface SubjectRecord {
   site: string | null;
   pattern_name: string;
   file_count: number;
+  group?: string;
 }
 
 export interface SubjectAnalysis {
@@ -359,14 +360,52 @@ const extractNumericIdFromIdentifier = (identifier: string): string | null => {
 
 // ── Step 1: Directory structure patterns
 // Mirrors _extract_subjects_from_directory_structure() in planner.py
+const SKIP_DIRS = new Set([
+  "anat",
+  "func",
+  "dwi",
+  "fmap",
+  "nirs",
+  "meg",
+  "eeg",
+  "beh",
+  "perf",
+  "derivatives",
+  "sourcedata",
+  "stimuli",
+  "walking",
+  "resting",
+  "resting_state",
+  "run",
+  "ses",
+  "pd",
+  "control",
+  "hc",
+  "task",
+  "sub",
+  "dataset",
+  "data",
+  "raw",
+  "bids",
+  "output",
+  "outputs",
+  "staging",
+  "_staging",
+  "mri",
+  "fnirs",
+  "edf",
+  "dicom",
+]);
+
 const extractFromDirectoryStructure = (
   allFiles: string[]
 ): Omit<SubjectAnalysis, "id_mapping"> | null => {
   const patterns: Array<[RegExp, boolean, number, number | null, string]> = [
-    [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"],
-    [/^sub-(\w+)$/, false, 1, null, "standard_bids"], // directory named sub-01
-    [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"],
-    [/^\d{3,}$/, false, 1, null, "numeric_only"], // directory named 001
+    [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"], // Beijing_sub82352
+    [/^sub-(\w+)$/, false, 1, null, "standard_bids"], // sub-01
+    [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"], // subject_01
+    [/^\d{3,}$/, false, 1, null, "numeric_only"], // 001
+    [/^([A-Za-z]+\d+)$/, false, 1, null, "alphanum_id"], // PD01, Control01, HC03
   ];
 
   const subjectRecords: SubjectRecord[] = [];
@@ -374,11 +413,14 @@ const extractFromDirectoryStructure = (
 
   for (const filepath of allFiles) {
     const parts = filepath.split("/");
-    // Only check the first 2 path parts (directory levels), not the filename
-    // mirrors: for part in parts[:2]
-    const dirsOnly = parts.slice(0, Math.min(2, parts.length - 1)); // exclude filename
+    // Check ALL directory levels (not just first 2)
+    const dirsOnly = parts.slice(0, parts.length - 1);
+    // const dirsOnly = parts.slice(0, Math.min(2, parts.length - 1)); // only first 2 levels
 
     for (const part of dirsOnly) {
+      // Skip known non-subject directory names
+      // if (SKIP_DIRS.has(part.toLowerCase())) continue;
+
       for (const [
         regex,
         hasSite,
@@ -393,7 +435,7 @@ const extractFromDirectoryStructure = (
           seenIds.add(originalId);
           subjectRecords.push({
             original_id: originalId,
-            numeric_id: match[idGroup],
+            numeric_id: match[idGroup] || match[0],
             site: hasSite && siteGroup ? match[siteGroup] : null,
             pattern_name: patternName,
             file_count: 0,
@@ -407,11 +449,41 @@ const extractFromDirectoryStructure = (
   if (subjectRecords.length === 0) return null;
 
   subjectRecords.sort((a, b) => {
+    // const na = parseInt(a.numeric_id) || 0;
+    // const nb = parseInt(b.numeric_id) || 0;
+    // return na - nb;
+    const aMatch = a.original_id.match(/^([A-Za-z]+)(\d+)$/);
+    const bMatch = b.original_id.match(/^([A-Za-z]+)(\d+)$/);
+
+    if (aMatch && bMatch) {
+      const prefixCompare = aMatch[1].localeCompare(bMatch[1]);
+      if (prefixCompare !== 0) return prefixCompare;
+      return parseInt(aMatch[2]) - parseInt(bMatch[2]);
+    }
+
     const na = parseInt(a.numeric_id) || 0;
     const nb = parseInt(b.numeric_id) || 0;
     return na - nb;
   });
 
+  // Build group map: subject originalId → parent directory name
+  // const groupMap: Record<string, string> = {};
+  // for (const filepath of allFiles) {
+  //   const parts = filepath.split("/");
+  //   for (let i = 1; i < parts.length - 1; i++) {
+  //     if (seenIds.has(parts[i]) && !SKIP_DIRS.has(parts[i - 1].toLowerCase())) {
+  //       groupMap[parts[i]] = parts[i - 1];
+  //     }
+  //   }
+  // }
+
+  // // Attach group to each record
+  // for (const rec of subjectRecords) {
+  //   if (groupMap[rec.original_id]) {
+  //     rec.group = groupMap[rec.original_id];
+  //   }
+  // }
+
   return {
     success: true,
     method: "directory_structure",
@@ -551,6 +623,34 @@ export const extractSubjectAnalysis = (
       python_generated_filename_rules: [],
     };
   }
+  // bug fix for subject mapping
+  // === original
+  // const idMapping = generateIdMapping(subjectInfo);
+  // return { ...subjectInfo, id_mapping: idMapping };
+  // ==== end
+  // ==== updates
+  // CRITICAL: n_subjects is authoritative (mirrors planner.py PROMPT_BIDS_PLAN)
+  // If analysis count doesn't match user input, fall back to sequential numbering
+  const expectedCount = userNSubjects;
+  if (expectedCount && subjectInfo.subject_count !== expectedCount) {
+    const idMap: Record<string, string> = {};
+    const reverseMap: Record<string, string> = {};
+    for (let i = 1; i <= expectedCount; i++) {
+      const bidsId = String(i).padStart(2, "0");
+      idMap[`sub-${bidsId}`] = bidsId;
+      reverseMap[bidsId] = `sub-${bidsId}`;
+    }
+    return {
+      ...subjectInfo,
+      subject_count: expectedCount,
+      id_mapping: {
+        id_mapping: idMap,
+        reverse_mapping: reverseMap,
+        strategy_used: "numeric_fallback",
+        metadata_columns: [],
+      },
+    };
+  }
 
   const idMapping = generateIdMapping(subjectInfo);
   return { ...subjectInfo, id_mapping: idMapping };
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts
index 6d6a796..be03f83 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts
@@ -9,9 +9,13 @@ export const getDatasetDescriptionPrompt = (
   evidenceBundle?: any
 ): string => {
   const documentsContext =
+    // evidenceBundle?.documents
+    //   ?.map((d: any) => `[${d.filename}]:\n${d.content}`)
+    //   .join("\n\n") || "";
     evidenceBundle?.documents
-      ?.map((d: any) => `[${d.filename}]:\n${d.content}`)
+      ?.map((d: any) => `[${d.filename}]:\n${(d.content || "").slice(0, 500)}`)
       .join("\n\n") || "";
+
   return `You are a BIDS dataset_description.json generator.
   
   CRITICAL: Use the following user-provided content to extract dataset information!

From 843f0a21d95bc273bb2374f57647a6b6bc69cd95 Mon Sep 17 00:00:00 2001
From: elainefan331 <elainefan331@gmail.com>
Date: Wed, 1 Apr 2026 15:05:56 -0400
Subject: [PATCH 2/5] fix: resolve leaks in previewModal 3D viewer

---
 src/components/PreviewModal.tsx |  36 +-
 src/utils/preview.js            | 600 +++++++++++++++++++++++---------
 2 files changed, 454 insertions(+), 182 deletions(-)

diff --git a/src/components/PreviewModal.tsx b/src/components/PreviewModal.tsx
index 4a3ce82..bf81318 100644
--- a/src/components/PreviewModal.tsx
+++ b/src/components/PreviewModal.tsx
@@ -28,30 +28,14 @@ const PreviewModal: React.FC<{
   // fix end---------------------
 
   useEffect(() => {
-    if (!isOpen) return;
-    //add spinner
-    // if (!isOpen || isLoading) return;
-
-    // fix start-----------: Get the container element from the ref.
-    // const container = canvasContainerRef.current;
-    // if (!container) {
-    //   // This can happen briefly on the first render, so we just wait for the next render.
-    //   return;
-    // }
-    // // 3. Check for the required legacy functions on the window object.
-    // if (
-    //   typeof window.previewdata !== "function" ||
-    //   typeof window.initcanvas_with_container !== "function"
-    // ) {
-    //   console.error(
-    //     "❌ Legacy preview script functions are not available on the window object."
-    //   );
-    //   return;
-    // }
-
-    // window.previewdata(dataKey, previewIndex, isInternal, false);
-    // fix end---------------------------------
-    // clear old canvas
+    // if (!isOpen) return;
+    if (!isOpen) {
+      // Modal just closed — clean up Three.js immediately
+      if (typeof window.destroyPreview === "function") {
+        window.destroyPreview();
+      }
+      return;
+    }
     const canvasDiv = document.getElementById("canvas");
     if (canvasDiv)
       while (canvasDiv.firstChild) canvasDiv.removeChild(canvasDiv.firstChild);
@@ -69,6 +53,10 @@ const PreviewModal: React.FC<{
 
     return () => {
       clearInterval(interval);
+      // Component unmounting — clean up Three.js
+      if (typeof window.destroyPreview === "function") {
+        window.destroyPreview();
+      }
     };
   }, [isOpen, dataKey, previewIndex, isInternal]);
 
diff --git a/src/utils/preview.js b/src/utils/preview.js
index 912c8f8..496e855 100644
--- a/src/utils/preview.js
+++ b/src/utils/preview.js
@@ -74,18 +74,55 @@ var typedfun = {
   BigUint64Array: null,
 };
 
+// function destroyPreview() {
+//   if (window.scene) {
+//     while (window.scene.children.length > 0) {
+//       const obj = window.scene.children[0];
+//       window.scene.remove(obj);
+//       if (obj.geometry) obj.geometry.dispose();
+//       if (obj.material) obj.material.dispose();
+//     }
+//   }
+
+//   if (window.renderer && window.renderer.domElement) {
+//     window.renderer.domElement.remove();
+//     window.renderer.dispose();
+//   }
+
+//   window.scene = undefined;
+//   window.camera = undefined;
+//   window.renderer = undefined;
+//   window.controls = undefined;
+//   window.reqid = undefined;
+// }
 function destroyPreview() {
+  // Cancel animation loop first
+  if (reqid !== undefined) {
+    cancelAnimationFrame(reqid);
+    reqid = undefined;
+    window.reqid = undefined;
+  }
+
   if (window.scene) {
     while (window.scene.children.length > 0) {
       const obj = window.scene.children[0];
       window.scene.remove(obj);
       if (obj.geometry) obj.geometry.dispose();
-      if (obj.material) obj.material.dispose();
+      if (obj.material) {
+        // Dispose any textures stored in shader uniforms
+        if (obj.material.uniforms) {
+          Object.values(obj.material.uniforms).forEach((u) => {
+            if (u.value && u.value.isTexture) u.value.dispose();
+          });
+        }
+        obj.material.dispose();
+      }
     }
   }
 
   if (window.renderer && window.renderer.domElement) {
     window.renderer.domElement.remove();
+    window.renderer.forceContextLoss();
     window.renderer.dispose();
   }
 
@@ -93,7 +130,9 @@ function destroyPreview() {
   window.camera = undefined;
   window.renderer = undefined;
   window.controls = undefined;
-  window.reqid = undefined;
+  lastvolume = null;
+  lastvolumedata = null;
+  texture = undefined;
 }
 
 function drawpreview(cfg) {
@@ -967,183 +1006,428 @@ function initcanvas() {
     panel.appendChild(stats.domElement);
   }
 
-  $("#camera-near").on("input", function () {
-    camera.near = parseFloat($(this).val());
-    renderer.render(scene, camera);
-    controls.update();
-    renderer.updateComplete = false;
-  });
+  // $("#camera-near").on("input", function () {
+  //   camera.near = parseFloat($(this).val());
+  //   renderer.render(scene, camera);
+  //   controls.update();
+  //   renderer.updateComplete = false;
+  // });
 
-  $("#camera-far").on("input", function () {
-    camera.far = parseFloat($(this).val());
-    renderer.render(scene, camera);
-    controls.update();
-    renderer.updateComplete = false;
-  });
+  // $("#camera-far").on("input", function () {
+  //   camera.far = parseFloat($(this).val());
+  //   renderer.render(scene, camera);
+  //   controls.update();
+  //   renderer.updateComplete = false;
+  // });
 
-  $("#clim-low").on("input", function () {
-    $(this).prop(
-      "title",
-      "" +
-        $(this).val() +
-        " [" +
-        $(this).prop("min") +
-        "," +
-        $(this).prop("max") +
-        "]"
-    );
-    if (lastvolume !== null) {
-      let val = lastvolume.material.uniforms["u_clim"].value;
-      lastvolume.material.uniforms["u_clim"].value.set(
-        parseFloat($(this).val()),
-        val.y
-      );
-      renderer.updateComplete = false;
-    }
-  });
+  // $("#clim-low").on("input", function () {
+  //   $(this).prop(
+  //     "title",
+  //     "" +
+  //       $(this).val() +
+  //       " [" +
+  //       $(this).prop("min") +
+  //       "," +
+  //       $(this).prop("max") +
+  //       "]"
+  //   );
+  //   if (lastvolume !== null) {
+  //     let val = lastvolume.material.uniforms["u_clim"].value;
+  //     lastvolume.material.uniforms["u_clim"].value.set(
+  //       parseFloat($(this).val()),
+  //       val.y
+  //     );
+  //     renderer.updateComplete = false;
+  //   }
+  // });
 
-  $("#clim-hi").on("input", function () {
-    $(this).prop(
-      "title",
-      "" +
-        $(this).val() +
-        " [" +
-        $(this).prop("min") +
-        "," +
-        $(this).prop("max") +
-        "]"
-    );
-    if (lastvolume !== null) {
-      let val = lastvolume.material.uniforms["u_clim"].value;
-      lastvolume.material.uniforms["u_clim"].value.set(
-        val.x,
-        parseFloat($(this).val())
-      );
-      renderer.updateComplete = false;
-    }
-  });
+  // $("#clim-hi").on("input", function () {
+  //   $(this).prop(
+  //     "title",
+  //     "" +
+  //       $(this).val() +
+  //       " [" +
+  //       $(this).prop("min") +
+  //       "," +
+  //       $(this).prop("max") +
+  //       "]"
+  //   );
+  //   if (lastvolume !== null) {
+  //     let val = lastvolume.material.uniforms["u_clim"].value;
+  //     lastvolume.material.uniforms["u_clim"].value.set(
+  //       val.x,
+  //       parseFloat($(this).val())
+  //     );
+  //     renderer.updateComplete = false;
+  //   }
+  // });
 
-  $("#isothreshold").on("input", function () {
-    $(this).prop(
-      "title",
-      "" +
-        $(this).val() +
-        " [" +
-        $(this).prop("min") +
-        "," +
-        $(this).prop("max") +
-        "]"
-    );
-    if (lastvolume !== null) {
-      let val = lastvolume.material.uniforms["u_renderthreshold"].value;
-      lastvolume.material.uniforms["u_renderthreshold"].value = parseFloat(
-        $(this).val()
-      );
-      renderer.updateComplete = false;
-    }
-  });
+  // $("#isothreshold").on("input", function () {
+  //   $(this).prop(
+  //     "title",
+  //     "" +
+  //       $(this).val() +
+  //       " [" +
+  //       $(this).prop("min") +
+  //       "," +
+  //       $(this).prop("max") +
+  //       "]"
+  //   );
+  //   if (lastvolume !== null) {
+  //     let val = lastvolume.material.uniforms["u_renderthreshold"].value;
+  //     lastvolume.material.uniforms["u_renderthreshold"].value = parseFloat(
+  //       $(this).val()
+  //     );
+  //     renderer.updateComplete = false;
+  //   }
+  // });
 
-  $("#mip-radio-button").on("change", function () {
-    if (lastvolume !== null) {
-      const unfs = lastvolume.material.uniforms;
-      lastvolume.material = new THREE.ShaderMaterial({
-        uniforms: THREE.UniformsUtils.clone(MipRenderShader.uniforms),
-        vertexShader: MipRenderShader.vertexShader,
-        fragmentShader: MipRenderShader.fragmentShader,
-        side: THREE.BackSide,
-      });
-      lastvolume.material.uniforms = unfs;
-      renderer.updateComplete = false;
-    }
-  });
+  // $("#mip-radio-button").on("change", function () {
+  //   if (lastvolume !== null) {
+  //     const unfs = lastvolume.material.uniforms;
+  //     lastvolume.material = new THREE.ShaderMaterial({
+  //       uniforms: THREE.UniformsUtils.clone(MipRenderShader.uniforms),
+  //       vertexShader: MipRenderShader.vertexShader,
+  //       fragmentShader: MipRenderShader.fragmentShader,
+  //       side: THREE.BackSide,
+  //     });
+  //     lastvolume.material.uniforms = unfs;
+  //     renderer.updateComplete = false;
+  //   }
+  // });
 
-  $("#iso-radio-button").on("change", function () {
-    if (lastvolume !== null) {
-      const unfs = lastvolume.material.uniforms;
-      lastvolume.material = new THREE.ShaderMaterial({
-        uniforms: THREE.UniformsUtils.clone(IsoRenderShader.uniforms),
-        vertexShader: IsoRenderShader.vertexShader,
-        fragmentShader: IsoRenderShader.fragmentShader,
-        side: THREE.BackSide,
-      });
-      lastvolume.material.uniforms = unfs;
-      renderer.updateComplete = false;
-    }
-  });
+  // $("#iso-radio-button").on("change", function () {
+  //   if (lastvolume !== null) {
+  //     const unfs = lastvolume.material.uniforms;
+  //     lastvolume.material = new THREE.ShaderMaterial({
+  //       uniforms: THREE.UniformsUtils.clone(IsoRenderShader.uniforms),
+  //       vertexShader: IsoRenderShader.vertexShader,
+  //       fragmentShader: IsoRenderShader.fragmentShader,
+  //       side: THREE.BackSide,
+  //     });
+  //     lastvolume.material.uniforms = unfs;
+  //     renderer.updateComplete = false;
+  //   }
+  // });
 
-  $("#interp-radio-button").on("change", function () {
-    if (lastvolume !== null) {
-      const unfs = lastvolume.material.uniforms;
-      lastvolume.material = new THREE.RawShaderMaterial(InterpRenderShader());
-      lastvolume.material.uniforms = unfs;
-      lastvolume.material.uniforms.cameraPos.value.copy(camera.position);
-      renderer.updateComplete = false;
-    }
-  });
+  // $("#interp-radio-button").on("change", function () {
+  //   if (lastvolume !== null) {
+  //     const unfs = lastvolume.material.uniforms;
+  //     lastvolume.material = new THREE.RawShaderMaterial(InterpRenderShader());
+  //     lastvolume.material.uniforms = unfs;
+  //     lastvolume.material.uniforms.cameraPos.value.copy(camera.position);
+  //     renderer.updateComplete = false;
+  //   }
+  // });
 
-  $("#cross-x-low").on("input", function () {
-    setcrosssectionsizes(this);
-  });
+  // $("#cross-x-low").on("input", function () {
+  //   setcrosssectionsizes(this);
+  // });
 
-  $("#cross-y-low").on("input", function () {
-    setcrosssectionsizes(this);
-  });
+  // $("#cross-y-low").on("input", function () {
+  //   setcrosssectionsizes(this);
+  // });
 
-  $("#cross-z-low").on("input", function () {
-    setcrosssectionsizes(this);
-  });
+  // $("#cross-z-low").on("input", function () {
+  //   setcrosssectionsizes(this);
+  // });
 
-  $("#cross-x-hi").on("input", function () {
-    setcrosssectionsizes(this);
-  });
+  // $("#cross-x-hi").on("input", function () {
+  //   setcrosssectionsizes(this);
+  // });
 
-  $("#cross-y-hi").on("input", function () {
-    setcrosssectionsizes(this);
-  });
+  // $("#cross-y-hi").on("input", function () {
+  //   setcrosssectionsizes(this);
+  // });
 
-  $("#cross-z-hi").on("input", function () {
-    setcrosssectionsizes(this);
-  });
+  // $("#cross-z-hi").on("input", function () {
+  //   setcrosssectionsizes(this);
+  // });
 
-  $("#x_thickness, #y_thickness, #z_thickness").on("input", function () {
-    let eid = $(this).attr("id");
-    let linkedeid1 = eid.replace(/_thickness/, "-low").replace(/^/, "cross-");
-    let linkedeid2 = eid.replace(/_thickness/, "-hi").replace(/^/, "cross-");
-    if ($(this).val() == 0) {
-      $("#" + linkedeid1).val(0);
-      $("#" + linkedeid2).val(1);
-    } else {
-      $("#" + linkedeid1).val(
-        ($("#" + linkedeid1).val() + $("#" + linkedeid2).val()) * 0.5
+  // $("#x_thickness, #y_thickness, #z_thickness").on("input", function () {
+  //   let eid = $(this).attr("id");
+  //   let linkedeid1 = eid.replace(/_thickness/, "-low").replace(/^/, "cross-");
+  //   let linkedeid2 = eid.replace(/_thickness/, "-hi").replace(/^/, "cross-");
+  //   if ($(this).val() == 0) {
+  //     $("#" + linkedeid1).val(0);
+  //     $("#" + linkedeid2).val(1);
+  //   } else {
+  //     $("#" + linkedeid1).val(
+  //       ($("#" + linkedeid1).val() + $("#" + linkedeid2).val()) * 0.5
+  //     );
+  //   }
+  //   setcrosssectionsizes($("#" + linkedeid1));
+  // });
+
+  // $("#pos-x-view").on("click", function () {
+  //   setControlAngles((Math.PI * 90) / 180, (Math.PI * 90) / 180);
+  //   renderer.updateComplete = false;
+  // });
+
+  // $("#neg-x-view").on("click", function () {
+  //   setControlAngles((Math.PI * 90) / 180, (Math.PI * 270) / 180);
+  // });
+
+  // $("#pos-y-view").on("click", function () {
+  //   setControlAngles((Math.PI * 90) / 180, (Math.PI * 180) / 180);
+  // });
+
+  // $("#neg-y-view").on("click", function () {
+  //   setControlAngles((Math.PI * 90) / 180, (Math.PI * 0) / 180);
+  // });
+
+  // $("#pos-z-view").on("click", function () {
+  //   setControlAngles(0, 0);
+  // });
+
+  // $("#neg-z-view").on("click", function () {
+  //   setControlAngles((Math.PI * 180) / 180, 0);
+  // });
+
+  $("#camera-near")
+    .off("input")
+    .on("input", function () {
+      camera.near = parseFloat($(this).val());
+      renderer.render(scene, camera);
+      controls.update();
+      renderer.updateComplete = false;
+    });
+
+  $("#camera-far")
+    .off("input")
+    .on("input", function () {
+      camera.far = parseFloat($(this).val());
+      renderer.render(scene, camera);
+      controls.update();
+      renderer.updateComplete = false;
+    });
+
+  $("#clim-low")
+    .off("input")
+    .on("input", function () {
+      $(this).prop(
+        "title",
+        "" +
+          $(this).val() +
+          " [" +
+          $(this).prop("min") +
+          "," +
+          $(this).prop("max") +
+          "]"
       );
-    }
-    setcrosssectionsizes($("#" + linkedeid1));
-  });
+      if (lastvolume !== null) {
+        let val = lastvolume.material.uniforms["u_clim"].value;
+        lastvolume.material.uniforms["u_clim"].value.set(
+          parseFloat($(this).val()),
+          val.y
+        );
+        renderer.updateComplete = false;
+      }
+    });
 
-  $("#pos-x-view").on("click", function () {
-    setControlAngles((Math.PI * 90) / 180, (Math.PI * 90) / 180);
-    renderer.updateComplete = false;
-  });
+  $("#clim-hi")
+    .off("input")
+    .on("input", function () {
+      $(this).prop(
+        "title",
+        "" +
+          $(this).val() +
+          " [" +
+          $(this).prop("min") +
+          "," +
+          $(this).prop("max") +
+          "]"
+      );
+      if (lastvolume !== null) {
+        let val = lastvolume.material.uniforms["u_clim"].value;
+        lastvolume.material.uniforms["u_clim"].value.set(
+          val.x,
+          parseFloat($(this).val())
+        );
+        renderer.updateComplete = false;
+      }
+    });
 
-  $("#neg-x-view").on("click", function () {
-    setControlAngles((Math.PI * 90) / 180, (Math.PI * 270) / 180);
-  });
+  $("#isothreshold")
+    .off("input")
+    .on("input", function () {
+      $(this).prop(
+        "title",
+        "" +
+          $(this).val() +
+          " [" +
+          $(this).prop("min") +
+          "," +
+          $(this).prop("max") +
+          "]"
+      );
+      if (lastvolume !== null) {
+        lastvolume.material.uniforms["u_renderthreshold"].value = parseFloat(
+          $(this).val()
+        );
+        renderer.updateComplete = false;
+      }
+    });
 
-  $("#pos-y-view").on("click", function () {
-    setControlAngles((Math.PI * 90) / 180, (Math.PI * 180) / 180);
-  });
+  $("#mip-radio-button")
+    .off("change")
+    .on("change", function () {
+      if (lastvolume !== null) {
+        const unfs = lastvolume.material.uniforms;
+        lastvolume.material = new THREE.ShaderMaterial({
+          uniforms: THREE.UniformsUtils.clone(MipRenderShader.uniforms),
+          vertexShader: MipRenderShader.vertexShader,
+          fragmentShader: MipRenderShader.fragmentShader,
+          side: THREE.BackSide,
+        });
+        lastvolume.material.uniforms = unfs;
+        renderer.updateComplete = false;
+      }
+    });
 
-  $("#neg-y-view").on("click", function () {
-    setControlAngles((Math.PI * 90) / 180, (Math.PI * 0) / 180);
-  });
+  $("#iso-radio-button")
+    .off("change")
+    .on("change", function () {
+      if (lastvolume !== null) {
+        const unfs = lastvolume.material.uniforms;
+        lastvolume.material = new THREE.ShaderMaterial({
+          uniforms: THREE.UniformsUtils.clone(IsoRenderShader.uniforms),
+          vertexShader: IsoRenderShader.vertexShader,
+          fragmentShader: IsoRenderShader.fragmentShader,
+          side: THREE.BackSide,
+        });
+        lastvolume.material.uniforms = unfs;
+        renderer.updateComplete = false;
+      }
+    });
 
-  $("#pos-z-view").on("click", function () {
-    setControlAngles(0, 0);
-  });
+  $("#interp-radio-button")
+    .off("change")
+    .on("change", function () {
+      if (lastvolume !== null) {
+        const unfs = lastvolume.material.uniforms;
+        lastvolume.material = new THREE.RawShaderMaterial(InterpRenderShader());
+        lastvolume.material.uniforms = unfs;
+        lastvolume.material.uniforms.cameraPos.value.copy(camera.position);
+        renderer.updateComplete = false;
+      }
+    });
 
-  $("#neg-z-view").on("click", function () {
-    setControlAngles((Math.PI * 180) / 180, 0);
-  });
+  $("#cross-x-low")
+    .off("input")
+    .on("input", function () {
+      setcrosssectionsizes(this);
+    });
+  $("#cross-y-low")
+    .off("input")
+    .on("input", function () {
+      setcrosssectionsizes(this);
+    });
+  $("#cross-z-low")
+    .off("input")
+    .on("input", function () {
+      setcrosssectionsizes(this);
+    });
+  $("#cross-x-hi")
+    .off("input")
+    .on("input", function () {
+      setcrosssectionsizes(this);
+    });
+  $("#cross-y-hi")
+    .off("input")
+    .on("input", function () {
+      setcrosssectionsizes(this);
+    });
+  $("#cross-z-hi")
+    .off("input")
+    .on("input", function () {
+      setcrosssectionsizes(this);
+    });
+
+  $("#x_thickness, #y_thickness, #z_thickness")
+    .off("input")
+    .on("input", function () {
+      let eid = $(this).attr("id");
+      let linkedeid1 = eid.replace(/_thickness/, "-low").replace(/^/, "cross-");
+      let linkedeid2 = eid.replace(/_thickness/, "-hi").replace(/^/, "cross-");
+      if ($(this).val() == 0) {
+        $("#" + linkedeid1).val(0);
+        $("#" + linkedeid2).val(1);
+      } else {
+        $("#" + linkedeid1).val(
+          ($("#" + linkedeid1).val() + $("#" + linkedeid2).val()) * 0.5
+        );
+      }
+      setcrosssectionsizes($("#" + linkedeid1));
+    });
+
+  $("#pos-x-view")
+    .off("click")
+    .on("click", function () {
+      setControlAngles((Math.PI * 90) / 180, (Math.PI * 90) / 180);
+      renderer.updateComplete = false;
+    });
+  $("#neg-x-view")
+    .off("click")
+    .on("click", function () {
+      setControlAngles((Math.PI * 90) / 180, (Math.PI * 270) / 180);
+    });
+  $("#pos-y-view")
+    .off("click")
+    .on("click", function () {
+      setControlAngles((Math.PI * 90) / 180, (Math.PI * 180) / 180);
+    });
+  $("#neg-y-view")
+    .off("click")
+    .on("click", function () {
+      setControlAngles((Math.PI * 90) / 180, (Math.PI * 0) / 180);
+    });
+  $("#pos-z-view")
+    .off("click")
+    .on("click", function () {
+      setControlAngles(0, 0);
+    });
+  $("#neg-z-view")
+    .off("click")
+    .on("click", function () {
+      setControlAngles((Math.PI * 180) / 180, 0);
+    });
+
+  $("#cross-t")
+    .off("mouseup")
+    .on("mouseup", function () {
+      $(this).prop(
+        "title",
+        "" +
+          $(this).val() +
+          " [" +
+          $(this).prop("min") +
+          "," +
+          $(this).prop("max") +
+          "]"
+      );
+      if (lastvolume !== null && lastvolumedata !== undefined) {
+        let dim = lastvolumedim;
+        let offset =
+          Math.min($(this).val(), dim[3] - 2) * dim[0] * dim[1] * dim[2];
+        let texture = new THREE.Data3DTexture(
+          lastvolumedata.selection.data.slice(
+            offset - 1,
+            offset + dim[0] * dim[1] * dim[2] - 1
+          ),
+          dim[0],
+          dim[1],
+          dim[2]
+        );
+        texture.format = THREE.RedFormat;
+        texture.type = texture_dtype[lastvolumedata.dtype];
+        texture.minFilter = texture.magFilter = THREE.LinearFilter;
+        texture.unpackAlignment = 1;
+        texture.needsUpdate = true;
+        lastvolume.material.uniforms["u_data"].value = texture;
+        renderer.updateComplete = false;
+      }
+    });
 
   $("#cross-t").on("mouseup", function () {
     $(this).prop(

From e54ea6f058496d17201552a454f8c83d3e9458fe Mon Sep 17 00:00:00 2001
From: elainefan331 <elainefan331@gmail.com>
Date: Thu, 2 Apr 2026 09:32:15 -0400
Subject: [PATCH 3/5] fix: resolve memory leaks in 2D plot viewer

---
 src/utils/preview.js | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/utils/preview.js b/src/utils/preview.js
index 496e855..02f3908 100644
--- a/src/utils/preview.js
+++ b/src/utils/preview.js
@@ -33,6 +33,7 @@ var lastvolume = null;
 var lastvolumedata = null;
 var lastvolumedim = [];
 var lastclim = 0;
+var uplotInstance = null;
 var reqid = undefined;
 
 var canvas = null;
@@ -133,6 +134,12 @@ function destroyPreview() {
   lastvolume = null;
   lastvolumedata = null;
   texture = undefined;
+
+  if (uplotInstance !== null) {
+    uplotInstance.destroy();
+    uplotInstance = null;
+  }
+  $("#chartpanel").hide().html("");
 }
 
 function drawpreview(cfg) {
@@ -428,22 +435,32 @@ function dopreview(key, idx, isinternal, hastime) {
             ? "y" + i
             : hastime[i];
       }
-      let u = new uPlot(opts, plotdata, document.getElementById("plotchart"));
+      // let u = new uPlot(opts, plotdata, document.getElementById("plotchart"));
+      if (uplotInstance !== null) {
+        uplotInstance.destroy();
+        uplotInstance = null;
+      }
+      uplotInstance = new uPlot(
+        opts,
+        plotdata,
+        document.getElementById("plotchart")
+      );
     } else {
-      let u = new uPlot(
+      // let u = new uPlot(
+      //   opts,
+      //   [[...Array(dataroot.length).keys()], dataroot],
+      //   document.getElementById("plotchart")
+      // );
+      if (uplotInstance !== null) {
+        uplotInstance.destroy();
+        uplotInstance = null;
+      }
+      uplotInstance = new uPlot(
         opts,
         [[...Array(dataroot.length).keys()], dataroot],
         document.getElementById("plotchart")
       );
     }
-    // add spinner
-    // --- NEW LOGIC for 2D plot ---
-    // Signal that the 2D plot has just been created and is now visible.
-    // if (typeof window.__onPreviewReady === "function") {
-    //   window.__onPreviewReady();
-    //   window.__onPreviewReady = null; // Clean up to prevent accidental re-firing
-    // }
-    // --- END NEW LOGIC ---
 
     // for spinner
     // --- Signal React that 2D preview is ready ---

From 66f6e435ead30e55958fba297e4255b34a2283e9 Mon Sep 17 00:00:00 2001
From: elainefan331 <elainefan331@gmail.com>
Date: Thu, 2 Apr 2026 13:51:51 -0400
Subject: [PATCH 4/5] feat: add executorHelpers and plannerHelpers

---
 .../DatasetOrganizer/utils/executorHelpers.ts | 522 ++++++++++++++++++
 .../DatasetOrganizer/utils/fileAnalyzers.ts   | 332 ++---------
 .../utils/filenameTokenizer.ts                | 505 +++++++++++++++--
 .../DatasetOrganizer/utils/plannerHelpers.ts  |   0
 4 files changed, 1030 insertions(+), 329 deletions(-)
 create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts
 create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts

diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts
new file mode 100644
index 0000000..782dee5
--- /dev/null
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/executorHelpers.ts
@@ -0,0 +1,522 @@
+// src/components/DatasetOrganizer/utils/executorHelpers.ts
+//
+// Portable helper functions from autobidsify/converters/executor.py
+//
+// What is NOT here (intentionally — requires server-side CLI):
+//   execute_bids_plan()       — file copy/conversion operations
+//   convert_mat_to_snirf()    — binary .mat read + .snirf write
+//   run_dcm2niix_batch()      — dcm2niix subprocess
+//   convert_jnifti_to_nifti() — nibabel NIfTI write
+//
+// What IS here (useful client-side for plan validation + preview):
+//   sanitizeBidsLabel()       mirrors _sanitize_bids_label()
+//   normalizeFilename()       mirrors _normalize_filename()
+//   extractAcqLabel()         mirrors _extract_acq_label()
+//   selectPreferredFile()     mirrors _select_preferred_file()
+//   matchGlobPattern()        mirrors _match_glob_pattern()
+//   inferScanType()           mirrors infer_scan_type_from_filepath()
+//   inferSubdirectory()       mirrors infer_subdirectory_from_suffix()
+//   categorizeScanType()      mirrors categorize_scan_type()
+//   analyzeFilepathUniversal() mirrors analyze_filepath_universal()
+//   validatePlanCoverage()    NEW — uses matchGlobPattern to check LLM patterns
+
+// ============================================================================
+// sanitizeBidsLabel()
+// Mirrors _sanitize_bids_label() in executor.py
+// Removes all non-alphanumeric characters from a BIDS entity value
+// e.g. "mental_arithmetic" → "mentalarithmetic"
+// ============================================================================
+
+export const sanitizeBidsLabel = (label: string): string =>
+  label.replace(/[^a-zA-Z0-9]/g, "");
+
+// ============================================================================
+// normalizeFilename()
+// Mirrors _normalize_filename() in executor.py
+//
+// Strips extensions and trailing sequence numbers.
+// Used to identify DICOM series and detect format duplicates.
+//
+// Examples:
+//   "VHFCT1mm-Hip (134).dcm"       → "vhfct1mm-hip"
+//   "scan_mprage_anonymized.nii.gz" → "scan_mprage_anonymized"
+//   "scan_001.dcm"                  → "scan"
+// ============================================================================
+
+export const normalizeFilename = (filepath: string): string => {
+  let name = filepath.split("/").pop()!;
+
+  // Strip all extensions (up to 6 chars)
+  while (name.includes(".") && name.split(".").pop()!.length <= 6) {
+    name = name.substring(0, name.lastIndexOf("."));
+  }
+
+  // Strip trailing " (N)"
+  name = name.replace(/\s*\(\d+\)\s*$/, "");
+  // Strip trailing _NNN or -NNN
+  name = name.replace(/[_\-]\d+$/, "");
+
+  return name.trim().toLowerCase();
+};
+
+// ============================================================================
+// extractAcqLabel()
+// Mirrors _extract_acq_label() in executor.py
+//
+// Derives a short, clean acq- label from a normalized DICOM filename.
+// Keeps the last meaningful alphabetic token (body part or scan descriptor).
+//
+// Examples:
+//   "vhfct1mmankle" → "ankle"
+//   "vhfct1mmhead"  → "head"
+//   "vhmct1mmhip"   → "hip"
+//   "scanmprage"    → "mprage"
+// ============================================================================
+
+export const extractAcqLabel = (normalizedFname: string): string => {
+  const skip = new Set(["vhf", "vhm", "ct", "mr", "mri", "mm", "scan", "the"]);
+  const tokens = normalizedFname.match(/[a-z]+/g) || [];
+  const meaningful = tokens.filter((t) => t.length > 2 && !skip.has(t));
+
+  if (meaningful.length > 0) {
+    return meaningful[meaningful.length - 1]; // last = body part
+  }
+  return normalizedFname.slice(0, 20); // fallback: cap at 20 chars
+};
+
+// ============================================================================
+// selectPreferredFile()
+// Mirrors _select_preferred_file() in executor.py
+//
+// Priority: NIfTI dir > non-BRIK > shortest path > alphabetical
+// ============================================================================
+
+export const selectPreferredFile = (files: string[]): string | null => {
+  if (files.length === 0) return null;
+  if (files.length === 1) return files[0];
+
+  const priority = (f: string): [number, number, number, string] => {
+    const parts = f.toLowerCase().split("/");
+    return [
+      parts.some((p) => p.includes("nifti")) ? 0 : 1,
+      parts.some((p) => p.includes("brik")) ? 1 : 0,
+      parts.length,
+      f,
+    ];
+  };
+
+  return [...files].sort((a, b) => {
+    const [a0, a1, a2, a3] = priority(a);
+    const [b0, b1, b2, b3] = priority(b);
+    if (a0 !== b0) return a0 - b0;
+    if (a1 !== b1) return a1 - b1;
+    if (a2 !== b2) return a2 - b2;
+    return a3.localeCompare(b3);
+  })[0];
+};
+
+// ============================================================================
+// matchGlobPattern()
+// Mirrors _match_glob_pattern() in executor.py
+//
+// Supported patterns:
+//   "**/*.nii.gz"  → any .nii.gz at any depth
+//   "**/BRIK/**"   → any file inside a BRIK directory
+//   "*token*"      → filepath contains token
+//   "*.ext"        → filename ends with extension
+//   "token*"       → filename starts with token
+//   "plain"        → substring anywhere in path (fallback)
+// ============================================================================
+
+export const matchGlobPattern = (
+  filepath: string,
+  pattern: string
+): boolean => {
+  const fp = filepath.toLowerCase();
+  const pat = pattern.toLowerCase();
+  const parts = fp.split("/");
+  const filename = parts[parts.length - 1];
+
+  // **/TOKEN/** — directory component match
+  if (pat.startsWith("**/") && pat.endsWith("/**")) {
+    const token = pat.slice(3, -3);
+    return parts.slice(0, -1).includes(token);
+  }
+
+  // **/*.ext — any depth extension match
+  if (pat.startsWith("**/")) {
+    const suffix = pat.slice(3);
+    if (suffix.startsWith("*.")) return fp.endsWith(suffix.slice(1));
+    return fp.includes(suffix);
+  }
+
+  // *token* — substring in full path
+  if (pat.startsWith("*") && pat.endsWith("*")) {
+    return fp.includes(pat.slice(1, -1));
+  }
+
+  // *.ext — extension match on filename only
+  if (pat.startsWith("*.")) {
+    return filename.endsWith(pat.slice(1));
+  }
+
+  // token* — filename prefix
+  if (pat.endsWith("*")) {
+    return filename.startsWith(pat.slice(0, -1));
+  }
+
+  // fallback — substring anywhere in path
+  return fp.includes(pat);
+};
+
+// ============================================================================
+// inferScanType()
+// Mirrors infer_scan_type_from_filepath() in executor.py
+//
+// Priority:
+//   1. LLM filename_rules from BIDSPlan
+//   2. BIDS entities already in filename (ses-, task-, acq-, run-)
+//   3. Keyword detection in path
+//   4. Extension fallback
+// ============================================================================
+
+interface ScanTypeResult {
+  suffix: string;
+  subdirectory: string;
+  category: string;
+}
+
+export const inferScanType = (
+  filepath: string,
+  filenameRules: any[] = []
+): ScanTypeResult => {
+  const pathLower = filepath.toLowerCase();
+  const filename = filepath.split("/").pop()!;
+  const fnameLow = filename.toLowerCase();
+
+  // ── Priority 1: LLM filename_rules ──────────────────────────────────
+  for (const rule of filenameRules) {
+    try {
+      const mp = (rule.match_pattern || "").replace(/\\\\/g, "\\");
+      if (!new RegExp(mp, "i").test(filename)) continue;
+
+      const template: string = rule.bids_template || "";
+      const m = template.match(/sub-[^_]+_(.*?)\.(nii\.gz|snirf|nii)/);
+      if (!m) continue;
+
+      let raw = m[1];
+      // Remove placeholder entities
+      raw = raw
+        .replace(/ses-X_?/g, "")
+        .replace(/task-X_?/g, "")
+        .replace(/^_|_$/g, "");
+
+      // Remove spurious ses- if no ses- dir in path
+      if (
+        /ses-[A-Za-z0-9]+/.test(raw) &&
+        !/\/ses-[A-Za-z0-9]+\//.test(filepath)
+      ) {
+        raw = raw.replace(/ses-[A-Za-z0-9]+_?/g, "").replace(/^_|_$/g, "");
+      }
+
+      if (raw) {
+        // Sanitize entity values — mirrors _sanitize_suffix() in executor.py
+        // "task-mental_arithmetic_nirs" → "task-mentalarithmetic_nirs"
+        raw = raw.replace(
+          /([a-zA-Z]+-)(.+?)(?=_[a-zA-Z]+-|_[a-zA-Z]+$|$)/g,
+          (_match, key, val) => key + sanitizeBidsLabel(val)
+        );
+        const subdir = inferSubdirectory(raw);
+        return {
+          suffix: raw,
+          subdirectory: subdir,
+          category: categorizeScanType(raw),
+        };
+      }
+    } catch {
+      continue;
+    }
+  }
+
+  // ── Priority 2: BIDS entities already in filename ────────────────────
+  const entities: Record<string, string> = {};
+  for (const [key, pattern] of [
+    ["ses", /ses-([A-Za-z0-9]+)/],
+    ["task", /task-([A-Za-z0-9]+)/],
+    ["acq", /acq-([A-Za-z0-9]+)/],
+    ["run", /run-([A-Za-z0-9]+)/],
+  ] as [string, RegExp][]) {
+    const match = filename.match(pattern);
+    if (match) entities[key] = match[1];
+  }
+
+  // Infer task from filename keywords when no task- entity present
+  if (!entities.task) {
+    const nameNoExt = fnameLow.replace(/\.[^.]+$/, "");
+    if (/rest|resting/.test(nameNoExt)) entities.task = "rest";
+    else if (/finger|tapping|fingertap/.test(nameNoExt))
+      entities.task = "fingertapping";
+    else if (/walking|walk/.test(nameNoExt)) entities.task = "walking";
+    else if (/motor|tap/.test(nameNoExt)) entities.task = "motor";
+  }
+
+  let modalityLabel: string | null = null;
+  let subdir = "anat";
+
+  if (fnameLow.endsWith(".snirf") || fnameLow.includes("nirs")) {
+    modalityLabel = "nirs";
+    subdir = "nirs";
+  } else if (/t1w|t1/.test(fnameLow)) {
+    modalityLabel = "T1w";
+    subdir = "anat";
+  } else if (/t2w|t2/.test(fnameLow)) {
+    modalityLabel = "T2w";
+    subdir = "anat";
+  } else if (/bold|func/.test(fnameLow)) {
+    modalityLabel = "bold";
+    subdir = "func";
+  } else if (/dwi/.test(fnameLow)) {
+    modalityLabel = "dwi";
+    subdir = "dwi";
+  }
+
+  // BIDS rule: task-* scans go in func/ (unless nirs)
+  if (subdir !== "nirs" && (entities.task || pathLower.includes("func/"))) {
+    subdir = "func";
+    if (!modalityLabel) modalityLabel = "bold";
+  }
+
+  if (Object.keys(entities).length > 0 || modalityLabel) {
+    const parts: string[] = [];
+    for (const key of ["ses", "task", "acq", "run"]) {
+      if (entities[key])
+        parts.push(`${key}-${sanitizeBidsLabel(entities[key])}`);
+    }
+    if (modalityLabel) parts.push(modalityLabel);
+    if (parts.length > 0) {
+      const suffix = parts.join("_");
+      return {
+        suffix,
+        subdirectory: subdir,
+        category: categorizeScanType(suffix),
+      };
+    }
+  }
+
+  // ── Priority 3: Heuristic path keywords ─────────────────────────────
+  if (/anat|mprage|t1w/.test(pathLower))
+    return { suffix: "T1w", subdirectory: "anat", category: "anatomical" };
+  if (/func|bold/.test(pathLower)) {
+    const m = pathLower.match(/task[_-]([a-z0-9]+)/);
+    const suffix = m ? `task-${m[1]}_bold` : "task-rest_bold";
+    return { suffix, subdirectory: "func", category: "functional" };
+  }
+  if (pathLower.includes("rest"))
+    return {
+      suffix: "task-rest_bold",
+      subdirectory: "func",
+      category: "functional",
+    };
+  if (/nirs|fnirs|\.snirf/.test(pathLower))
+    return { suffix: "nirs", subdirectory: "nirs", category: "functional" };
+  if (pathLower.includes("dwi"))
+    return { suffix: "dwi", subdirectory: "dwi", category: "diffusion" };
+
+  // ── Priority 4: Extension fallback ──────────────────────────────────
+  if (fnameLow.endsWith(".snirf"))
+    return { suffix: "nirs", subdirectory: "nirs", category: "functional" };
+  if (fnameLow.endsWith(".nii") || fnameLow.endsWith(".nii.gz"))
+    return { suffix: "T1w", subdirectory: "anat", category: "anatomical" };
+
+  return { suffix: "unknown", subdirectory: "anat", category: "unknown" };
+};
+
+// ============================================================================
+// inferSubdirectory()
+// Mirrors infer_subdirectory_from_suffix() in executor.py
+// ============================================================================
+
+export const inferSubdirectory = (suffix: string): string => {
+  const s = suffix.toLowerCase();
+  if (s.includes("t1w") || s.includes("t2w")) return "anat";
+  if (s.includes("bold")) return "func";
+  if (s.includes("nirs")) return "nirs";
+  if (s.includes("dwi")) return "dwi";
+  return "anat";
+};
+
+// ============================================================================
+// categorizeScanType()
+// Mirrors categorize_scan_type() in executor.py
+// ============================================================================
+
+export const categorizeScanType = (suffix: string): string => {
+  const s = suffix.toLowerCase();
+  if (s.includes("t1w") || s.includes("t2w")) return "anatomical";
+  if (s.includes("bold") || s.includes("nirs")) return "functional";
+  if (s.includes("dwi")) return "diffusion";
+  return "unknown";
+};
+
+// ============================================================================
+// analyzeFilepathUniversal()
+// Mirrors analyze_filepath_universal() in executor.py
+//
+// Determines BIDS subject ID and output filename for one source file.
+// Used for plan preview — shows user what each file will become.
+// ============================================================================
+
+export interface FilepathAnalysis {
+  subject_id: string;
+  scan_type_suffix: string;
+  bids_filename: string;
+  subdirectory: string;
+  scan_category: string;
+  original_filepath: string;
+  modality: string;
+}
+
+export const analyzeFilepathUniversal = (
+  filepath: string,
+  assignmentRules: any[],
+  filenameRules: any[],
+  modality: string = "mri"
+): FilepathAnalysis => {
+  const filename = filepath.split("/").pop()!;
+  const pathParts = filepath.split("/");
+  let subjectId: string | null = null;
+
+  // Priority 1: match glob patterns
+  for (const rule of assignmentRules) {
+    for (const pat of rule.match || []) {
+      if (matchGlobPattern(filepath, pat)) {
+        subjectId = rule.subject;
+        break;
+      }
+    }
+    if (subjectId) break;
+  }
+
+  // Priority 2: original substring match
+  if (!subjectId) {
+    for (const rule of assignmentRules) {
+      const orig: string = rule.original || "";
+      if (orig && filepath.toLowerCase().includes(orig.toLowerCase())) {
+        subjectId = rule.subject;
+        break;
+      }
+    }
+  }
+
+  // Priority 3: prefix match
+  if (!subjectId) {
+    for (const rule of assignmentRules) {
+      const pfx: string = rule.prefix || "";
+      if (pfx && filename.toLowerCase().startsWith(pfx.toLowerCase())) {
+        subjectId = rule.subject;
+        break;
+      }
+    }
+  }
+
+  // Priority 4: sub-XX already in path
+  if (!subjectId) {
+    for (const part of pathParts) {
+      const m = part.match(/sub[_-]?(\w+)/i);
+      if (m) {
+        subjectId = m[1];
+        break;
+      }
+    }
+  }
+
+  // Fallback
+  if (!subjectId) subjectId = "unknown";
+
+  // Strip accidental sub- prefix
+  if (subjectId.startsWith("sub-")) subjectId = subjectId.slice(4);
+
+  const scanInfo = inferScanType(filepath, filenameRules);
+  const ext = modality === "nirs" ? ".snirf" : ".nii.gz";
+  const bidsFilename = `sub-${subjectId}_${scanInfo.suffix}${ext}`;
+
+  return {
+    subject_id: subjectId,
+    scan_type_suffix: scanInfo.suffix,
+    bids_filename: bidsFilename,
+    subdirectory: scanInfo.subdirectory,
+    scan_category: scanInfo.category,
+    original_filepath: filepath,
+    modality,
+  };
+};
+
+// ============================================================================
+// validatePlanCoverage()
+// NEW — not in Python (Python validates at runtime, we validate at plan-time)
+//
+// Checks that the LLM's match patterns in BIDSPlan actually cover the
+// sample files from the evidence bundle. Warns about uncovered files.
+//
+// Used in plannerHelpers.ts after buildBidsPlan() to surface issues
+// before the user downloads the ZIP.
+// ============================================================================
+
+export interface PlanCoverageResult {
+  covered: string[];
+  uncovered: string[];
+  coveragePercent: number;
+  warnings: string[];
+}
+
+export const validatePlanCoverage = (
+  sampleFiles: string[],
+  mappings: any[]
+): PlanCoverageResult => {
+  const covered: string[] = [];
+  const uncovered: string[] = [];
+  const warnings: string[] = [];
+
+  for (const filepath of sampleFiles) {
+    let isCovered = false;
+
+    for (const mapping of mappings) {
+      const patterns: string[] = mapping.match || [];
+      const excludes: string[] = mapping.exclude || [];
+
+      const isExcluded = excludes.some((ex) => matchGlobPattern(filepath, ex));
+      if (isExcluded) continue;
+
+      const isMatched = patterns.some((pat) => matchGlobPattern(filepath, pat));
+      if (isMatched) {
+        isCovered = true;
+        break;
+      }
+    }
+
+    if (isCovered) covered.push(filepath);
+    else uncovered.push(filepath);
+  }
+
+  if (uncovered.length > 0) {
+    warnings.push(
+      `${uncovered.length} sample file(s) not covered by any mapping pattern.`
+    );
+    for (const f of uncovered.slice(0, 5)) {
+      warnings.push(`  Uncovered: ${f}`);
+    }
+    if (uncovered.length > 5) {
+      warnings.push(`  ... and ${uncovered.length - 5} more`);
+    }
+  }
+
+  return {
+    covered,
+    uncovered,
+    coveragePercent:
+      sampleFiles.length > 0
+        ? Math.round((covered.length / sampleFiles.length) * 100)
+        : 100,
+    warnings,
+  };
+};
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts
index 76142c0..c3c89b0 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts
@@ -1,54 +1,60 @@
 // src/components/DatasetOrganizer/utils/fileAnalyzers.ts
+// VFS adapter layer because NeuroJSON.io needs to work with FileItem[] objects
+//  No single Python mirror. Functions map to:
+//   categorizeFile()       → executor.py (infer_subdirectory_from_suffix, categorize_scan_type)
+//   detectModality()       → evidence.py (detect_kind) + constants.py (MODALITY_*)
+//   getCountsByExtension() → evidence.py (by_ext dict construction)
+//   getUserContextText()   → evidence.py (_extract_document_content + documents[] assembly)
 import { FileItem } from "redux/projects/types/projects.interface";
 
-/**
- * Categorize a file based on its name and type
- * Returns detailed scan category (anatomical-T1w, functional-bold, etc.)
- */
+// ============================================================================
+// categorizeFile()
+// UI display function — determines file label/color in FileTree.
+//
+// NOT the same as inferScanType() in executorHelpers.ts:
+//   categorizeFile()  → "what to show in the UI"
+//   inferScanType()   → "what BIDS filename to generate"
+//
+// Partial mirror of executor.py → infer_subdirectory_from_suffix()
+//                               + categorize_scan_type()
+// ============================================================================
 export const categorizeFile = (file: FileItem): string => {
   const name = file.name.toLowerCase();
 
   // Functional scans (task-based)
-  if (name.includes("task-") && name.includes("bold")) {
-    return "functional-bold";
-  }
-  if (name.endsWith(".snirf")) {
-    return "functional-nirs";
-  }
-
+  if (name.includes("task-") && name.includes("bold")) return "functional-bold";
+  if (name.endsWith(".snirf")) return "functional-nirs";
   if (name.endsWith(".nirs")) return "functional-nirs";
   if (name.endsWith(".mat")) return "functional-nirs";
 
   // Anatomical scans
-  if (name.includes("t1w")) {
-    return "anatomical-T1w";
-  }
-  if (name.includes("t2w") || name.includes("inplanet2")) {
+  if (name.includes("t1w")) return "anatomical-T1w";
+  if (name.includes("t2w") || name.includes("inplanet2"))
     return "anatomical-T2w";
-  }
-  if (name.includes("flair")) {
-    return "anatomical-FLAIR";
-  }
-
+  if (name.includes("flair")) return "anatomical-FLAIR";
   if (name.endsWith(".dcm")) return "anatomical-dicom";
 
+  // JNIfTI — mirrors JNIFTI_EXT in constants.py: {'.jnii', '.bnii'}
+  if (name.endsWith(".jnii") || name.endsWith(".bnii"))
+    return "anatomical-jnifti";
+
   // Diffusion
-  if (name.includes("dwi") || name.includes("diffusion")) {
-    return "diffusion";
-  }
+  if (name.includes("dwi") || name.includes("diffusion")) return "diffusion";
 
   // Field maps
-  if (name.includes("fieldmap") || name.includes("fmap")) {
-    return "fieldmap";
-  }
+  if (name.includes("fieldmap") || name.includes("fmap")) return "fieldmap";
 
-  // Fall back to file type
+  // Array/HDF5 (non-SNIRF)
+  if (name.endsWith(".h5") || name.endsWith(".hdf5")) return "array";
+
+  // Fall back to fileType from fileProcessors.ts
   return file.fileType || "unknown";
 };
 
-/**
- * Detect modality from file collection
- */
+// ============================================================================
+// Detect modality from file collection
+// Rough equivalent of evidence.py → detect_kind() + constants.py MODALITY_*
+// ============================================================================
 export const detectModality = (files: FileItem[]): string => {
   const counts: Record<string, number> = {};
   files.forEach((f) => {
@@ -57,31 +63,29 @@ export const detectModality = (files: FileItem[]): string => {
   });
 
   if (counts.nifti > 0 || counts.dicom > 0) return "mri";
+  // FIX: fileProcessors.ts returns "nirs" for .nirs files, not "homer3"
   if (
     counts.hdf5 > 0 ||
     counts.matlab > 0 ||
-    counts.homer3 > 0 ||
+    counts.nirs > 0 ||
     files.some((f) => f.name.endsWith(".snirf"))
   )
     return "nirs";
   return "mixed";
 };
 
-/**
- * Get file extension counts
- */
+// ============================================================================
+// Get file extension counts
+// Mirrors evidence.py → by_ext dict construction.
+// Uses ".nii.gz" as a single key — mirrors Python: p.name.lower().endswith(".nii.gz")
+// ============================================================================
 export const getCountsByExtension = (
   files: FileItem[]
 ): Record<string, number> => {
   const counts: Record<string, number> = {};
-  // files.forEach((f) => {
-  //   const ext = f.fileType || "unknown";
-  //   counts[ext] = (counts[ext] || 0) + 1;
-  // });
   files
     .filter((f) => f.source === "user" && f.type === "file")
     .forEach((f) => {
-      // Mirror Python: use ".nii.gz" as a single key for .nii.gz files
       const name = f.name.toLowerCase();
       const ext = name.endsWith(".nii.gz")
         ? ".nii.gz"
@@ -91,9 +95,13 @@ export const getCountsByExtension = (
   return counts;
 };
 
-/**
- * Extract user context from metadata files
- */
+// ============================================================================
+// Extract user context from metadata files
+// Partial mirror of evidence.py → _extract_document_content() +
+// the documents[] assembly in _build_evidence_bundle_internal().
+//
+// Python reads files from disk; this reads from VFS FileItem.content.
+// ============================================================================
 export const getUserContextText = (files: FileItem[]): string => {
   const readme = files.find((f) => f.name.toLowerCase().includes("readme"));
   const instructions = files.find(
@@ -130,243 +138,3 @@ export const getUserContextText = (files: FileItem[]): string => {
   });
   return parts.join("\n\n");
 };
-
-/** (not using yet)
- * Analyze filename patterns to detect subjects
- * (Simplified version inspired by auto-bidsify's filename_tokenizer)
- */
-export const analyzeFilenamePatterns = (
-  files: FileItem[]
-): {
-  subjectCount: number;
-  subjectIds: string[];
-  hasRunNumbers: boolean;
-  hasTaskNames: boolean;
-} => {
-  const dataFiles = files.filter((f) => f.type === "file" && !f.isUserMeta);
-  const subjectIds = new Set<string>();
-  let hasRunNumbers = false;
-  let hasTaskNames = false;
-
-  dataFiles.forEach((f) => {
-    const name = f.name;
-
-    // Extract subject ID (sub-01, sub-02, etc.)
-    const subMatch = name.match(/sub-(\d+)/i);
-    if (subMatch) {
-      subjectIds.add(subMatch[1]);
-    }
-
-    // Check for run numbers
-    if (name.includes("_run-")) {
-      hasRunNumbers = true;
-    }
-
-    // Check for task names
-    if (name.includes("task-")) {
-      hasTaskNames = true;
-    }
-  });
-
-  return {
-    subjectCount: subjectIds.size,
-    subjectIds: Array.from(subjectIds).sort(),
-    hasRunNumbers,
-    hasTaskNames,
-  };
-};
-
-// add to fileAnalyzers.ts
-
-// export interface SubjectRecord {
-//   original_id: string;
-//   numeric_id: string;
-//   site: string | null;
-//   pattern_name: string;
-//   file_count: number;
-// }
-
-// export interface SubjectAnalysis {
-//   success: boolean;
-//   method: string;
-//   subject_records: SubjectRecord[];
-//   subject_count: number;
-//   has_site_info: boolean;
-//   variants_by_subject: Record<string, any>;
-//   python_generated_filename_rules: any[];
-//   id_mapping: {
-//     id_mapping: Record<string, string>;
-//     reverse_mapping: Record<string, string>;
-//     strategy_used: string;
-//     metadata_columns: string[];
-//   };
-// }
-
-// // mirrors _extract_subjects_from_directory_structure
-// const extractFromDirectoryStructure = (
-//   allFiles: string[]
-// ): Omit<SubjectAnalysis, "id_mapping"> | null => {
-//   const patterns: Array<[RegExp, boolean, number, number | null, string]> = [
-//     [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"],
-//     [/^sub-(\d+)$/i, false, 1, null, "standard_bids"],
-//     [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"],
-//     [/^(\d{3,})$/, false, 1, null, "numeric_only"],
-//   ];
-
-//   const subjectRecords: SubjectRecord[] = [];
-//   const seenIds = new Set<string>();
-
-//   for (const filepath of allFiles) {
-//     const parts = filepath.split("/");
-//     for (const part of parts.slice(0, 2)) {
-//       for (const [
-//         regex,
-//         hasSite,
-//         idGroup,
-//         siteGroup,
-//         patternName,
-//       ] of patterns) {
-//         const match = part.match(regex);
-//         if (match) {
-//           const originalId = match[0];
-//           if (seenIds.has(originalId)) break;
-//           seenIds.add(originalId);
-//           subjectRecords.push({
-//             original_id: originalId,
-//             numeric_id: match[idGroup],
-//             site: hasSite && siteGroup ? match[siteGroup] : null,
-//             pattern_name: patternName,
-//             file_count: 0,
-//           });
-//           break;
-//         }
-//       }
-//     }
-//   }
-
-//   if (subjectRecords.length === 0) return null;
-
-//   subjectRecords.sort((a, b) => {
-//     const na = parseInt(a.numeric_id) || 0;
-//     const nb = parseInt(b.numeric_id) || 0;
-//     return na - nb;
-//   });
-
-//   return {
-//     success: true,
-//     method: "directory_structure",
-//     subject_records: subjectRecords,
-//     subject_count: subjectRecords.length,
-//     has_site_info: subjectRecords.some((r) => r.site !== null),
-//     variants_by_subject: {},
-//     python_generated_filename_rules: [],
-//   };
-// };
-
-// // mirrors _extract_subjects_from_flat_filenames
-// const extractFromFlatFilenames = (
-//   allFiles: string[]
-// ): Omit<SubjectAnalysis, "id_mapping"> | null => {
-//   const identifierToFiles: Record<string, string[]> = {};
-
-//   for (const filepath of allFiles) {
-//     const filename = filepath.split("/").pop() || "";
-//     const nameNoExt = filename
-//       .replace(/\.[^/.]+$/, "")
-//       .replace(/\.nii\.gz$/, "");
-//     const match = nameNoExt.match(/^([A-Za-z0-9\-]+)/);
-//     if (match) {
-//       const identifier = match[1];
-//       if (!identifierToFiles[identifier]) identifierToFiles[identifier] = [];
-//       identifierToFiles[identifier].push(filepath);
-//     }
-//   }
-
-//   if (Object.keys(identifierToFiles).length === 0) return null;
-
-//   const extractNumeric = (id: string): number => {
-//     const nums = id.match(/\d+/g);
-//     return nums ? parseInt(nums[nums.length - 1]) : 999999;
-//   };
-
-//   const sortedIdentifiers = Object.keys(identifierToFiles).sort(
-//     (a, b) => extractNumeric(a) - extractNumeric(b)
-//   );
-
-//   const subjectRecords: SubjectRecord[] = sortedIdentifiers.map((id, i) => ({
-//     original_id: id,
-//     numeric_id: String(i + 1),
-//     site: null,
-//     pattern_name: "dominant_prefix",
-//     file_count: identifierToFiles[id].length,
-//   }));
-
-//   return {
-//     success: true,
-//     method: "dominant_prefix_fallback",
-//     subject_records: subjectRecords,
-//     subject_count: subjectRecords.length,
-//     has_site_info: false,
-//     variants_by_subject: {},
-//     python_generated_filename_rules: [],
-//   };
-// };
-
-// // mirrors _generate_subject_id_mapping
-// const generateIdMapping = (
-//   subjectInfo: Omit<SubjectAnalysis, "id_mapping">
-// ): SubjectAnalysis["id_mapping"] => {
-//   const records = subjectInfo.subject_records;
-//   const idMapping: Record<string, string> = {};
-//   const reverseMapping: Record<string, string> = {};
-
-//   // detect already-BIDS format (sub-01, sub-02...)
-//   const allAlreadyBids = records.every((r) => /^sub-\w+$/i.test(r.original_id));
-
-//   if (allAlreadyBids) {
-//     for (const rec of records) {
-//       const bidsId = rec.original_id.replace(/^sub-/i, "");
-//       idMapping[rec.original_id] = bidsId;
-//       reverseMapping[bidsId] = rec.original_id;
-//     }
-//     return {
-//       id_mapping: idMapping,
-//       reverse_mapping: reverseMapping,
-//       strategy_used: "already_bids",
-//       metadata_columns: [],
-//     };
-//   }
-
-//   // numeric strategy
-//   for (let i = 0; i < records.length; i++) {
-//     const orig = records[i].original_id;
-//     const bidsId = String(i + 1);
-//     idMapping[orig] = bidsId;
-//     reverseMapping[bidsId] = orig;
-//   }
-
-//   return {
-//     id_mapping: idMapping,
-//     reverse_mapping: reverseMapping,
-//     strategy_used: "numeric",
-//     metadata_columns: ["original_id"],
-//   };
-// };
-
-// // main export — call this from llmHelpers
-// export const extractSubjectAnalysis = (allFiles: string[]): SubjectAnalysis => {
-//   const fromDir = extractFromDirectoryStructure(allFiles);
-//   const base = fromDir ??
-//     extractFromFlatFilenames(allFiles) ?? {
-//       success: false,
-//       method: "none",
-//       subject_records: [],
-//       subject_count: 0,
-//       has_site_info: false,
-//       variants_by_subject: {},
-//       python_generated_filename_rules: [],
-//     };
-
-//   const idMapping = generateIdMapping(base);
-//   return { ...base, id_mapping: idMapping };
-// };
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
index 4708c13..24aa930 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
@@ -1,6 +1,6 @@
 // src/components/DatasetOrganizer/utils/filenameTokenizer.ts
-// Port of autobidsify's filename_tokenizer.py
-// Philosophy: Python stats → dominant prefixes → subject IDs (no LLM needed for this part)
+// Mirrors filename_tokenizer.py
+
 export interface SubjectRecord {
   original_id: string;
   numeric_id: string;
@@ -52,40 +52,21 @@ const COMMON_WORDS = new Set([
   "experiment",
 ]);
 
+const DATA_EXTENSIONS =
+  /\.(snirf|nii|nii\.gz|dcm|mat|nirs|jnii|bnii|h5|hdf5|edf|bdf)$/i;
+
 // ============================================================================
-// FilenameTokenizer — mirrors FilenameTokenizer class in filename_tokenizer.py
+// FilenamePatternAnalyzer — mirrors FilenameTokenizer class
 // ============================================================================
-
-/**
- * Advanced split: CamelCase + number boundaries
- * "VHMCT" → ["VHM", "CT"]
- * "CT1mm" → ["CT", "1", "mm"]
- * "sub82352" → ["sub", "82352"]
- */
-const splitAdvanced = (text: string): string[] => {
-  if (!text) return [];
-
-  // Keep known neuroimaging terms together
-  if (NEUROIMAGING_TERMS.has(text)) return [text];
-
-  // Split on type boundaries:
-  // - Uppercase sequence before uppercase+lowercase: "VHM" before "CT"
-  // - CamelCase: uppercase followed by lowercase
-  // - Letter/digit boundaries
-  const pattern = /([A-Z]+(?=[A-Z][a-z]|\b|[0-9])|[A-Z][a-z]+|[a-z]+|[0-9]+)/g;
-  const tokens = text.match(pattern) || [];
-  return tokens.filter((t) => t.length > 0);
-};
-
-/**
+/*
  * Tokenize a filename into meaningful tokens.
- * Mirrors FilenameTokenizer.tokenize() in filename_tokenizer.py
  *
  * Examples:
  *   "VHMCT1mm-Hip (134).dcm" → ["VHM", "CT", "1", "mm", "Hip", "134"]
  *   "Beijing_sub82352"       → ["Beijing", "sub", "82352"]
  *   "scan_001_T1w.nii"       → ["scan", "001", "T1w"]
  */
+
 export const tokenizeFilename = (filename: string): string[] => {
   // Step 1: Remove all extensions (up to 6 chars)
   let name = filename;
@@ -111,28 +92,60 @@ export const tokenizeFilename = (filename: string): string[] => {
   return tokens.filter((t) => t.trim().length >= 1);
 };
 
+/*
+ * Advanced split: CamelCase + number boundaries
+ * "VHMCT" → ["VHM", "CT"]
+ * "CT1mm" → ["CT", "1", "mm"]
+ * "sub82352" → ["sub", "82352"]
+ */
+const splitAdvanced = (text: string): string[] => {
+  if (!text) return [];
+
+  // Keep known neuroimaging terms together
+  if (NEUROIMAGING_TERMS.has(text)) return [text];
+
+  // Split on type boundaries:
+  // - Uppercase sequence before uppercase+lowercase: "VHM" before "CT"
+  // - CamelCase: uppercase followed by lowercase
+  // - Letter/digit boundaries
+  const pattern = /([A-Z]+(?=[A-Z][a-z]|\b|[0-9])|[A-Z][a-z]+|[a-z]+|[0-9]+)/g;
+  const tokens = text.match(pattern) || [];
+  return tokens.filter((t) => t.length > 0);
+};
+
 // ============================================================================
 // FilenamePatternAnalyzer — mirrors FilenamePatternAnalyzer class
 // ============================================================================
 
+interface TokenStatistics {
+  totalFiles: number;
+  tokenFrequency: Record<string, number>;
+  prefixFrequency: Record<string, number>;
+  dominantPrefixes: DominantPrefix[];
+  tokenPositions: Record<number, Record<string, number>>; // NEW — mirrors token_positions
+  insights: string[]; // NEW — mirrors _generate_insights()
+  uniqueTokenCount: number; // NEW
+  uniquePrefixCount: number; // NEW
+}
+
 interface DominantPrefix {
   prefix: string;
   count: number;
   percentage: number;
 }
 
-interface TokenStatistics {
-  totalFiles: number;
-  tokenFrequency: Record<string, number>;
-  prefixFrequency: Record<string, number>;
-  dominantPrefixes: DominantPrefix[];
+interface LLMPayload {
+  task: string;
+  statistics: TokenStatistics;
+  filenameSamples: string[];
+  userHints: Record<string, any>;
+  instructions: string;
 }
 
-/**
- * Find dominant prefixes — tokens appearing in >5% of files
- * that are not common words.
- * Mirrors FilenamePatternAnalyzer._find_dominant_prefixes()
- */
+// ─────────────────────────────────────────────────────────────────────────────
+// Mirrors FilenamePatternAnalyzer._find_dominant_prefixes()
+// ─────────────────────────────────────────────────────────────────────────────
+
 const findDominantPrefixes = (
   prefixCounter: Record<string, number>,
   totalFiles: number
@@ -154,18 +167,109 @@ const findDominantPrefixes = (
     }));
 };
 
-/**
- * Analyze token statistics across all filenames.
- * Mirrors FilenamePatternAnalyzer.analyze_token_statistics()
- */
+// ─────────────────────────────────────────────────────────────────────────────
+// _generate_insights()
+// Mirrors FilenamePatternAnalyzer._generate_insights()
+// ─────────────────────────────────────────────────────────────────────────────
+
+const generateInsights = (
+  allTokens: Record<string, number>,
+  prefixTokens: Record<string, number>,
+  dominantPrefixes: DominantPrefix[]
+): string[] => {
+  const insights: string[] = [];
+  const uniqueTokenCount = Object.keys(allTokens).length;
+
+  // Insight 1: token diversity
+  if (uniqueTokenCount < 20) {
+    insights.push(
+      `Low token diversity: only ${uniqueTokenCount} unique tokens across all files`
+    );
+  } else if (uniqueTokenCount > 100) {
+    insights.push(
+      `High token diversity: ${uniqueTokenCount} unique tokens detected`
+    );
+  }
+
+  // Insight 2: prefix distribution
+  if (dominantPrefixes.length === 0) {
+    insights.push("No dominant filename prefixes detected");
+  } else if (dominantPrefixes.length === 1) {
+    const p = dominantPrefixes[0];
+    insights.push(
+      `Single dominant prefix '${p.prefix}' in ${p.percentage}% of files`
+    );
+  } else if (dominantPrefixes.length === 2) {
+    const [p1, p2] = dominantPrefixes;
+    insights.push(
+      `Two major prefixes detected: '${p1.prefix}' (${p1.percentage}%) and '${p2.prefix}' (${p2.percentage}%)`
+    );
+  } else {
+    insights.push(
+      `${dominantPrefixes.length} dominant prefixes detected, suggesting possible subject groupings`
+    );
+  }
+
+  // Insight 3: most common tokens
+  const topTokens = Object.entries(allTokens)
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 3);
+  if (topTokens.length > 0) {
+    const commonList = topTokens.map(([t, c]) => `'${t}' (${c})`).join(", ");
+    insights.push(`Most frequent tokens: ${commonList}`);
+  }
+
+  return insights;
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+// _sample_diverse_filenames()
+// Mirrors FilenamePatternAnalyzer._sample_diverse_filenames()
+// ─────────────────────────────────────────────────────────────────────────────
+
+const sampleDiverseFilenames = (
+  filenames: string[],
+  maxSamples: number = 30
+): string[] => {
+  if (filenames.length <= maxSamples) return [...filenames].sort();
+
+  // Group by first token (mirrors Python: prefix_groups[prefix].append(filename))
+  const prefixGroups: Record<string, string[]> = {};
+  for (const filename of filenames) {
+    const tokens = tokenizeFilename(filename);
+    const prefix = tokens.length > 0 ? tokens[0] : "none";
+    if (!prefixGroups[prefix]) prefixGroups[prefix] = [];
+    prefixGroups[prefix].push(filename);
+  }
+
+  const groupCount = Object.keys(prefixGroups).length;
+  const samplesPerGroup = Math.max(1, Math.floor(maxSamples / groupCount));
+
+  const samples: string[] = [];
+  for (const prefix of Object.keys(prefixGroups).sort()) {
+    const groupFiles = prefixGroups[prefix];
+    const n = Math.min(groupFiles.length, samplesPerGroup);
+    samples.push(...[...groupFiles].sort().slice(0, n));
+    if (samples.length >= maxSamples) break;
+  }
+
+  return samples.slice(0, maxSamples);
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+// analyze_token_statistics()
+// Mirrors FilenamePatternAnalyzer.analyze_token_statistics()
+// ─────────────────────────────────────────────────────────────────────────────
+
 export const analyzeTokenStatistics = (
   filenames: string[]
 ): TokenStatistics => {
   const allTokens: Record<string, number> = {};
-  const prefixTokens: Record<string, number> = {}; // first token only
+  const prefixTokens: Record<string, number> = {};
+  const positionTokens: Record<number, Record<string, number>> = {};
 
   for (const filename of filenames) {
-    // Extract just filename from path
+    // Mirror Python __init__: strip to just filename if path provided
     const fname = filename.includes("/")
       ? filename.split("/").pop()!
       : filename;
@@ -177,23 +281,331 @@ export const analyzeTokenStatistics = (
       allTokens[token] = (allTokens[token] || 0) + 1;
     }
 
-    // CRITICAL: use first TOKEN as prefix (not regex match)
+    // CRITICAL: use first TOKEN as prefix (not regex)
     if (tokens.length > 0) {
       const firstToken = tokens[0];
       prefixTokens[firstToken] = (prefixTokens[firstToken] || 0) + 1;
     }
+
+    // NEW: count tokens by position — mirrors position_tokens[i][token] += 1
+    tokens.forEach((token, i) => {
+      if (!positionTokens[i]) positionTokens[i] = {};
+      positionTokens[i][token] = (positionTokens[i][token] || 0) + 1;
+    });
+  }
+
+  // Cap frequencies — mirrors .most_common(50) / .most_common(20)
+  const tokenFrequency = Object.fromEntries(
+    Object.entries(allTokens)
+      .sort((a, b) => b[1] - a[1])
+      .slice(0, 50)
+  );
+  const prefixFrequency = Object.fromEntries(
+    Object.entries(prefixTokens)
+      .sort((a, b) => b[1] - a[1])
+      .slice(0, 20)
+  );
+
+  // Cap each position bucket at top 10 — mirrors .most_common(10)
+  const tokenPositions: Record<number, Record<string, number>> = {};
+  for (const [pos, counter] of Object.entries(positionTokens)) {
+    tokenPositions[Number(pos)] = Object.fromEntries(
+      Object.entries(counter)
+        .sort((a, b) => b[1] - a[1])
+        .slice(0, 10)
+    );
   }
 
   const dominantPrefixes = findDominantPrefixes(prefixTokens, filenames.length);
+  const insights = generateInsights(allTokens, prefixTokens, dominantPrefixes);
 
   return {
     totalFiles: filenames.length,
-    tokenFrequency: allTokens,
-    prefixFrequency: prefixTokens,
+    tokenFrequency,
+    prefixFrequency,
     dominantPrefixes,
+    tokenPositions, // NEW
+    insights, // NEW
+    uniqueTokenCount: Object.keys(allTokens).length, // NEW
+    uniquePrefixCount: Object.keys(prefixTokens).length, // NEW
+  };
+};
+
+// ─────────────────────────────────────────────────────────────────────────────
+// build_llm_payload()
+// Mirrors FilenamePatternAnalyzer.build_llm_payload()
+// ─────────────────────────────────────────────────────────────────────────────
+
+export const buildLLMPayload = (
+  filenames: string[],
+  userHints: Record<string, any>,
+  maxSamples: number = 30
+): LLMPayload => {
+  const stats = analyzeTokenStatistics(filenames);
+  const filenameSamples = sampleDiverseFilenames(filenames, maxSamples);
+
+  return {
+    task: "subject_identification",
+    statistics: stats,
+    filenameSamples,
+    userHints,
+    instructions:
+      "Analyze the filename token statistics and samples. " +
+      "Determine how to group files by subject. " +
+      "The 'dominant_prefixes' may indicate subject identifiers. " +
+      "The 'insights' provide observations. " +
+      "User hint 'n_subjects' can help validate your hypothesis.",
+  };
+};
+
+/**
+ * Analyze token statistics across all filenames.
+ * Mirrors FilenamePatternAnalyzer.analyze_token_statistics()
+ */
+// export const analyzeTokenStatistics = (
+//   filenames: string[]
+// ): TokenStatistics => {
+//   const allTokens: Record<string, number> = {};
+//   const prefixTokens: Record<string, number> = {}; // first token only
+
+//   for (const filename of filenames) {
+//     // Extract just filename from path
+//     const fname = filename.includes("/")
+//       ? filename.split("/").pop()!
+//       : filename;
+
+//     const tokens = tokenizeFilename(fname);
+
+//     // Count all tokens
+//     for (const token of tokens) {
+//       allTokens[token] = (allTokens[token] || 0) + 1;
+//     }
+
+//     // CRITICAL: use first TOKEN as prefix (not regex match)
+//     if (tokens.length > 0) {
+//       const firstToken = tokens[0];
+//       prefixTokens[firstToken] = (prefixTokens[firstToken] || 0) + 1;
+//     }
+//   }
+
+//   const dominantPrefixes = findDominantPrefixes(prefixTokens, filenames.length);
+
+//   return {
+//     totalFiles: filenames.length,
+//     tokenFrequency: allTokens,
+//     prefixFrequency: prefixTokens,
+//     dominantPrefixes,
+//   };
+// };
+
+/*
+ * Find dominant prefixes — tokens appearing in >5% of files
+ * that are not common words.
+ * Mirrors FilenamePatternAnalyzer._find_dominant_prefixes()
+ */
+// const findDominantPrefixes = (
+//   prefixCounter: Record<string, number>,
+//   totalFiles: number
+// ): DominantPrefix[] => {
+//   const threshold = totalFiles * 0.05; // 5% threshold
+
+//   return Object.entries(prefixCounter)
+//     .filter(([prefix, count]) => {
+//       if (count < threshold) return false;
+//       if (COMMON_WORDS.has(prefix.toLowerCase())) return false;
+//       return true;
+//     })
+//     .sort((a, b) => b[1] - a[1])
+//     .slice(0, 20)
+//     .map(([prefix, count]) => ({
+//       prefix,
+//       count,
+//       percentage: Math.round((count / totalFiles) * 1000) / 10,
+//     }));
+// };
+
+// ============================================================================
+// Integration Functions
+// Mirrors analyze_filenames_for_subjects() + _generate_recommendation() in filename_tokenizer.py
+// ============================================================================
+
+export interface FilenameAnalysisResult {
+  python_statistics: TokenStatistics;
+  llm_payload: LLMPayload;
+  confidence: "high" | "medium" | "low" | "none";
+  recommendation: string;
+}
+
+/**
+ * Main entry point: analyze filenames to detect subject groupings.
+ * Mirrors analyze_filenames_for_subjects() in filename_tokenizer.py
+ *
+ * Called from buildEvidenceBundle() in llmHelpers.ts — replaces the
+ * manual filenameAnalysis block that was built inline there.
+ */
+export const analyzeFilenamesForSubjects = (
+  allFiles: string[],
+  userHints: Record<string, any>
+): FilenameAnalysisResult => {
+  // Mirror Python: extract just filenames, not full paths
+  const filenames = allFiles.map((f) =>
+    f.includes("/") ? f.split("/").pop()! : f
+  );
+
+  const stats = analyzeTokenStatistics(filenames);
+  const llmPayload = buildLLMPayload(filenames, userHints, 30);
+
+  // Assess confidence — mirrors Python confidence logic exactly
+  const dominantCount = stats.dominantPrefixes.length;
+  const userNSubjects: number | null = userHints?.n_subjects ?? null;
+
+  let confidence: "high" | "medium" | "low" | "none" = "none";
+  if (dominantCount > 0) {
+    if (userNSubjects && dominantCount === userNSubjects) {
+      confidence = "high";
+    } else if (dominantCount >= 2 && dominantCount <= 10) {
+      confidence = "medium";
+    } else {
+      confidence = "low";
+    }
+  }
+
+  const recommendation = generateRecommendation(stats, userHints);
+
+  return {
+    python_statistics: stats,
+    llm_payload: llmPayload,
+    confidence,
+    recommendation,
   };
 };
 
+/**
+ * Mirrors _generate_recommendation() in filename_tokenizer.py
+ */
+const generateRecommendation = (
+  stats: TokenStatistics,
+  userHints: Record<string, any>
+): string => {
+  const dominantPrefixes = stats.dominantPrefixes;
+  const userNSubjects: number | null = userHints?.n_subjects ?? null;
+
+  if (dominantPrefixes.length === 0) {
+    return (
+      "No clear filename patterns detected. " +
+      "Recommend using --describe to explain subject identification."
+    );
+  }
+
+  if (userNSubjects && dominantPrefixes.length === userNSubjects) {
+    const prefixesStr = dominantPrefixes.map((p) => p.prefix).join(", ");
+    return (
+      `HIGH CONFIDENCE: Detected ${dominantPrefixes.length} dominant prefixes ` +
+      `(${prefixesStr}) matching user hint of ${userNSubjects} subjects.`
+    );
+  }
+
+  if (dominantPrefixes.length >= 2 && dominantPrefixes.length <= 5) {
+    return (
+      `MEDIUM CONFIDENCE: Detected ${dominantPrefixes.length} potential subject groups. ` +
+      `Will send to LLM for validation.`
+    );
+  }
+
+  return (
+    `LOW CONFIDENCE: Found ${dominantPrefixes.length} prefix patterns, ` +
+    `which may or may not represent subjects. LLM will analyze.`
+  );
+};
+
+// ============================================================================
+// SubjectGroupingDecision
+// Mirrors SubjectGroupingDecision class in filename_tokenizer.py
+// Not used in runtime flow — used as typed helpers when parsing LLM responses
+// ============================================================================
+
+export interface PrefixMappingDecision {
+  method: "prefix_based";
+  description: string;
+  rules: Array<{
+    prefix: string;
+    maps_to_subject: string;
+    match_pattern: string;
+  }>;
+  participant_metadata: Record<string, Record<string, any>>;
+}
+
+export interface SequentialAssignmentDecision {
+  method: "sequential";
+  n_subjects: number;
+  note: string;
+}
+
+export interface BlockingQuestionDecision {
+  method: "blocked";
+  reason: string;
+  question: {
+    type: string;
+    severity: string;
+    message: string;
+    options: string[];
+  };
+}
+
+export type SubjectGroupingDecision =
+  | PrefixMappingDecision
+  | SequentialAssignmentDecision
+  | BlockingQuestionDecision;
+
+/**
+ * Mirrors SubjectGroupingDecision.create_prefix_mapping()
+ */
+export const createPrefixMapping = (
+  prefixToSubject: Record<string, string>,
+  metadata?: Record<string, Record<string, any>>
+): PrefixMappingDecision => ({
+  method: "prefix_based",
+  description: `Files grouped by ${
+    Object.keys(prefixToSubject).length
+  } filename prefixes`,
+  rules: Object.entries(prefixToSubject).map(([prefix, subjId]) => ({
+    prefix,
+    maps_to_subject: subjId,
+    match_pattern: `${prefix}*`,
+  })),
+  participant_metadata: metadata ?? {},
+});
+
+/**
+ * Mirrors SubjectGroupingDecision.create_sequential_assignment()
+ */
+export const createSequentialAssignment = (
+  nSubjects: number
+): SequentialAssignmentDecision => ({
+  method: "sequential",
+  n_subjects: nSubjects,
+  note:
+    "No clear subject grouping pattern detected in filenames. " +
+    "Assigning sequential IDs based on file order or user hint.",
+});
+
+/**
+ * Mirrors SubjectGroupingDecision.create_blocking_question()
+ */
+export const createBlockingQuestion = (
+  reason: string,
+  options: string[]
+): BlockingQuestionDecision => ({
+  method: "blocked",
+  reason,
+  question: {
+    type: "subject_grouping",
+    severity: "block",
+    message: reason,
+    options,
+  },
+});
+
 // ============================================================================
 // extractSubjectAnalysis — mirrors build_bids_plan()'s subject extraction
 // ============================================================================
@@ -495,7 +907,6 @@ const extractFromDirectoryStructure = (
   };
 };
 
-const DATA_EXTENSIONS = /\.(snirf|nii|nii\.gz|dcm|mat|nirs|h5|hdf5|edf|bdf)$/i;
 const TRIO_FILENAMES = new Set([
   "dataset_description.json",
   "participants.tsv",
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts
new file mode 100644
index 0000000..e69de29

From edb2a8f9b4291eb30e91a79dbefad237e123378b Mon Sep 17 00:00:00 2001
From: elainefan331 <elainefan331@gmail.com>
Date: Fri, 3 Apr 2026 17:40:39 -0400
Subject: [PATCH 5/5] feat: reorganize the code for integrated autobidsify

---
 package.json                                  |    1 +
 .../Dashboard/DatasetOrganizer/LLMPanel.tsx   | 1384 ++++++++---------
 .../DatasetOrganizer/utils/fileAnalyzers.ts   |  205 ++-
 .../DatasetOrganizer/utils/fileProcessors.ts  |   68 +-
 .../utils/filenameTokenizer.ts                |  588 -------
 .../Dashboard/DatasetOrganizer/utils/llm.ts   |  963 ++++++++++++
 .../DatasetOrganizer/utils/llmHelpers.ts      |  983 +++++++-----
 .../DatasetOrganizer/utils/llmPrompts.ts      |  619 +-------
 .../DatasetOrganizer/utils/plannerHelpers.ts  |  870 +++++++++++
 .../DatasetOrganizer/utils/trioHelpers.ts     |  762 +++++++++
 src/services/ollama.service.ts                |   17 +-
 yarn.lock                                     |    5 +
 12 files changed, 4221 insertions(+), 2244 deletions(-)
 create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/llm.ts
 create mode 100644 src/components/User/Dashboard/DatasetOrganizer/utils/trioHelpers.ts

diff --git a/package.json b/package.json
index 1f8144e..6f414ef 100644
--- a/package.json
+++ b/package.json
@@ -63,6 +63,7 @@
   "devDependencies": {
     "@babel/plugin-proposal-private-property-in-object": "^7.21.11",
     "@trivago/prettier-plugin-sort-imports": "^4.2.0",
+    "@types/js-yaml": "^4.0.9",
     "@types/node": "^20.5.7",
     "@types/pako": "^2.0.3",
     "@typescript-eslint/eslint-plugin": "^5.31.0",
diff --git a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx
index a8fd052..9658ec6 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx
+++ b/src/components/User/Dashboard/DatasetOrganizer/LLMPanel.tsx
@@ -1,23 +1,12 @@
 import { generateId } from "./utils/fileProcessors";
-import { extractSubjectAnalysis } from "./utils/filenameTokenizer";
-//add
+import { LLMConfig } from "./utils/llm";
 import {
-  buildFileSummary,
-  analyzeFilePatterns,
-  getUserContext,
-  getFileAnnotations,
-  downloadJSON,
   buildEvidenceBundle,
-  extractSubjectsFromFiles,
   buildIngestInfo,
+  downloadJSON,
 } from "./utils/llmHelpers";
-import {
-  getDatasetDescriptionPrompt,
-  getReadmePrompt,
-  getParticipantsPrompt,
-  getConversionScriptPrompt,
-  getBIDSPlanPrompt,
-} from "./utils/llmPrompts";
+import { buildBidsPlan } from "./utils/plannerHelpers";
+import { generateTrioFiles } from "./utils/trioHelpers";
 import {
   Close,
   ContentCopy,
@@ -40,10 +29,12 @@ import {
   Alert,
 } from "@mui/material";
 import { Colors } from "design/theme";
+import { dump as yamlDump } from "js-yaml";
 import JSZip from "jszip";
 import React, { useState, useEffect } from "react";
 import { FileItem } from "redux/projects/types/projects.interface";
-import { OllamaService } from "services/ollama.service";
+
+// import { OllamaService } from "services/ollama.service";
 
 interface LLMPanelProps {
   files: FileItem[];
@@ -158,6 +149,16 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
   const [panelHeight, setPanelHeight] = useState<number>(450);
   const [isResizing, setIsResizing] = useState(false);
 
+  // Build LLMConfig for all helper calls — mirrors autobidsify CLI arg assembly
+  const buildLLMConfig = (): LLMConfig => ({
+    provider,
+    model,
+    apiKey,
+    baseUrl: currentProvider.baseUrl,
+    isAnthropic: currentProvider.isAnthropic,
+    noApiKey: currentProvider.noApiKey,
+  });
+
   // ========================================================================
   // BUTTON 1: GENERATE EVIDENCE BUNDLE
   // ========================================================================
@@ -202,406 +203,27 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
       setError("Please generate evidence bundle first");
       return;
     }
-
     if (!currentProvider.noApiKey && !apiKey.trim()) {
       setError("Please enter an API key");
       return;
     }
 
-    // Create abort controller
     const controller = new AbortController();
     setAbortController(controller);
-
     setGeneratingTrio(true);
     setError(null);
     setStatus("Generating BIDS trio files...");
 
     try {
-      const userText = evidenceBundle.user_hints.user_text || "";
-
-      // ==========================================
-      // Call 1: Generate dataset_description.json
-      // ==========================================
-      let datasetDesc: any;
-      if (evidenceBundle.trio_found?.["dataset_description.json"]) {
-        setStatus("1/3 dataset_description.json already exists, skipping...");
-        const existing = files.find(
-          (f) => f.source === "user" && f.name === "dataset_description.json"
-        );
-        datasetDesc = existing?.content ? JSON.parse(existing.content) : {};
-      } else {
-        setStatus("1/3 Generating dataset_description.json...");
-        const ddPrompt = getDatasetDescriptionPrompt(userText, evidenceBundle);
-
-        let ddResponse;
-        if (currentProvider.isAnthropic) {
-          ddResponse = await fetch(currentProvider.baseUrl, {
-            method: "POST",
-            signal: controller.signal,
-            headers: {
-              "Content-Type": "application/json",
-              "x-api-key": apiKey,
-              "anthropic-version": "2023-06-01",
-            },
-            body: JSON.stringify({
-              model,
-              max_tokens: 2048,
-              messages: [{ role: "user", content: ddPrompt }],
-            }),
-          });
-        } else if (provider === "ollama") {
-          // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434";
-          // ddResponse = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, {
-          //   method: "POST",
-          //   signal: controller.signal,
-          //   headers: { "Content-Type": "application/json" },
-          //   body: JSON.stringify({
-          //     model,
-          //     messages: [{ role: "user", content: ddPrompt }],
-          //     stream: false,
-          //   }),
-          // });
-          ddResponse = await OllamaService.chat(model, [
-            { role: "user", content: ddPrompt },
-          ]);
-        } else {
-          ddResponse = await fetch(currentProvider.baseUrl, {
-            method: "POST",
-            signal: controller.signal,
-            headers: {
-              "Content-Type": "application/json",
-              Authorization: `Bearer ${apiKey}`,
-            },
-            body: JSON.stringify({
-              model,
-              messages: [{ role: "user", content: ddPrompt }],
-              max_tokens: 2048,
-            }),
-          });
-        }
-
-        // const ddData = await ddResponse.json();
-        const ddData =
-          provider === "ollama" ? ddResponse : await ddResponse.json();
-        let ddText = currentProvider.isAnthropic
-          ? ddData.content[0].text
-          : ddData.choices[0].message.content;
-
-        // Clean up markdown fences
-        ddText = ddText
-          .replace(/^```json\n?/g, "")
-          .replace(/\n?```$/g, "")
-          .trim();
-        datasetDesc = JSON.parse(ddText);
-      }
-
-      // ==========================================
-      // Call 2: Generate README.md
-      // ==========================================
-      let readmeContent: string;
-      if (evidenceBundle.trio_found?.["README.md"]) {
-        setStatus("2/3 README.md already exists, skipping...");
-        const existing = files.find(
-          (f) =>
-            f.source === "user" &&
-            ["README.md", "README.txt", "README.rst", "readme.md"].includes(
-              f.name
-            )
-        );
-        readmeContent = existing?.content || "";
-      } else {
-        setStatus("2/3 Generating README.md...");
-        const readmePrompt = getReadmePrompt(userText);
-
-        let readmeResponse;
-        if (currentProvider.isAnthropic) {
-          readmeResponse = await fetch(currentProvider.baseUrl, {
-            method: "POST",
-            signal: controller.signal,
-            headers: {
-              "Content-Type": "application/json",
-              "x-api-key": apiKey,
-              "anthropic-version": "2023-06-01",
-            },
-            body: JSON.stringify({
-              model,
-              max_tokens: 2048,
-              messages: [{ role: "user", content: readmePrompt }],
-            }),
-          });
-        } else if (provider === "ollama") {
-          // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434";
-          // readmeResponse = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, {
-          //   method: "POST",
-          //   signal: controller.signal,
-          //   headers: { "Content-Type": "application/json" },
-          //   body: JSON.stringify({
-          //     model,
-          //     messages: [{ role: "user", content: readmePrompt }],
-          //     stream: false,
-          //   }),
-          // });
-          readmeResponse = await OllamaService.chat(model, [
-            { role: "user", content: readmePrompt },
-          ]);
-        } else {
-          readmeResponse = await fetch(currentProvider.baseUrl, {
-            method: "POST",
-            signal: controller.signal,
-            headers: {
-              "Content-Type": "application/json",
-              Authorization: `Bearer ${apiKey}`,
-            },
-            body: JSON.stringify({
-              model,
-              messages: [{ role: "user", content: readmePrompt }],
-              max_tokens: 2048,
-            }),
-          });
-        }
-
-        // const readmeData = await readmeResponse.json();
-        const readmeData =
-          provider === "ollama" ? readmeResponse : await readmeResponse.json();
-        readmeContent = currentProvider.isAnthropic
-          ? readmeData.content[0].text
-          : readmeData.choices[0].message.content;
-      }
-      // ==========================================
-      // Call 3: Generate participants.tsv
-      // ==========================================
-      let participantsContent: string;
-      if (evidenceBundle.trio_found?.["participants.tsv"]) {
-        setStatus("3/3 participants.tsv already exists, skipping...");
-        const existing = files.find(
-          (f) => f.source === "user" && f.name === "participants.tsv"
-        );
-        participantsContent = existing?.content || "";
-      } else {
-        setStatus("3/3 Generating participants.tsv...");
-        const partsPrompt = getParticipantsPrompt(userText);
-
-        // ← ADD HERE: compute subject analysis before try block so it's in scope
-        const currentSubjectAnalysis = extractSubjectAnalysis(
-          evidenceBundle?.all_files || [],
-          evidenceBundle?.user_hints?.n_subjects,
-          evidenceBundle?.filename_analysis?.python_statistics
-            ?.dominant_prefixes
-        );
+      const { datasetDesc, readmeContent, participantsTsv, skipped } =
+        await generateTrioFiles({
+          evidenceBundle,
+          files,
+          llmConfig: buildLLMConfig(),
+          signal: controller.signal,
+          onStatus: setStatus,
+        });
 
-        console.log("=== PARTICIPANTS DEBUG ===");
-        console.log("method:", currentSubjectAnalysis?.method);
-        console.log("subject_count:", currentSubjectAnalysis?.subject_count);
-        console.log(
-          "id_mapping:",
-          currentSubjectAnalysis?.id_mapping?.id_mapping
-        );
-        console.log(
-          "reverse_mapping:",
-          currentSubjectAnalysis?.id_mapping?.reverse_mapping
-        );
-        console.log(
-          "subject_records sample:",
-          currentSubjectAnalysis?.subject_records?.slice(0, 3)
-        );
-        const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
-        const expectedCount = evidenceBundle?.user_hints?.n_subjects;
-        const subjectLabels: string[] =
-          idMap &&
-          Object.keys(idMap).length > 0 &&
-          (!expectedCount || Object.keys(idMap).length === expectedCount)
-            ? Object.values(idMap).map((id: string) => `sub-${id}`)
-            : Array.from(
-                {
-                  length: expectedCount || Object.keys(idMap || {}).length || 1,
-                },
-                (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
-              );
-
-        let partsResponse;
-        if (currentProvider.isAnthropic) {
-          partsResponse = await fetch(currentProvider.baseUrl, {
-            method: "POST",
-            signal: controller.signal,
-            headers: {
-              "Content-Type": "application/json",
-              "x-api-key": apiKey,
-              "anthropic-version": "2023-06-01",
-            },
-            body: JSON.stringify({
-              model,
-              max_tokens: 1024,
-              messages: [{ role: "user", content: partsPrompt }],
-            }),
-          });
-        } else if (provider === "ollama") {
-          // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434";
-          // partsResponse = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, {
-          //   method: "POST",
-          //   signal: controller.signal,
-          //   headers: { "Content-Type": "application/json" },
-          //   body: JSON.stringify({
-          //     model,
-          //     messages: [{ role: "user", content: partsPrompt }],
-          //     stream: false,
-          //   }),
-          // });
-          partsResponse = await OllamaService.chat(model, [
-            { role: "user", content: partsPrompt },
-          ]);
-        } else {
-          partsResponse = await fetch(currentProvider.baseUrl, {
-            method: "POST",
-            signal: controller.signal,
-            headers: {
-              "Content-Type": "application/json",
-              Authorization: `Bearer ${apiKey}`,
-            },
-            body: JSON.stringify({
-              model,
-              messages: [{ role: "user", content: partsPrompt }],
-              max_tokens: 1024,
-            }),
-          });
-        }
-
-        // const partsData = await partsResponse.json();
-        const partsData =
-          provider === "ollama" ? partsResponse : await partsResponse.json();
-        const participantsRaw = currentProvider.isAnthropic
-          ? partsData.content[0].text
-          : partsData.choices[0].message.content;
-
-        // Build TSV from schema
-        // try {
-        //   const schemaText = participantsRaw
-        //     .replace(/^```json\n?/g, "")
-        //     .replace(/\n?```$/g, "")
-        //     .trim();
-        //   const schema = JSON.parse(schemaText);
-        //   const columns: string[] = schema.columns.map((c: any) => c.name);
-
-        //   // Get subject IDs from evidence bundle (extracted by Python-style analysis)
-        //   // const idMapping =
-        //   //   evidenceBundle?.subject_analysis?.id_mapping?.id_mapping;
-        //   // const subjectLabels: string[] = idMapping
-        //   //   ? Object.values(idMapping).map((id) => `sub-${id}`)
-        //   //   : ["sub-01"]; // fallback if no subject analysis
-        //   // Get subject IDs from subjectAnalysis state (computed at plan stage)
-        //   // Fall back to computing fresh if plan hasn't been run yet
-        //   const currentSubjectAnalysis =
-        //     subjectAnalysis ||
-        //     extractSubjectAnalysis(
-        //       evidenceBundle?.all_files || [],
-        //       evidenceBundle?.user_hints?.n_subjects,
-        //       evidenceBundle?.filename_analysis?.python_statistics
-        //         ?.dominant_prefixes
-        //     );
-        //   const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
-        //   const subjectLabels: string[] =
-        //     idMap && Object.keys(idMap).length > 0
-        //       ? Object.values(idMap).map((id) => `sub-${id}`)
-        //       : Array.from(
-        //           { length: evidenceBundle?.user_hints?.n_subjects || 1 },
-        //           (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
-        //         );
-
-        //   const header = columns.join("\t");
-        //   // ====origin====
-        //   // const rows = subjectLabels.map((subId) =>
-        //   //   columns
-        //   //     .map((col: string) => (col === "participant_id" ? subId : "n/a"))
-        //   //     .join("\t")
-        //   // );
-        //   //====== end ======
-        //   // =====update start=====
-        //   const reverseMap =
-        //     currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
-        //   const subjectRecords = currentSubjectAnalysis?.subject_records || [];
-
-        //   const rows = subjectLabels.map((subId) => {
-        //     const bareId = subId.replace(/^sub-/, "");
-        //     const originalId = reverseMap[bareId];
-        //     const record = subjectRecords.find(
-        //       (r: any) => r.original_id === originalId
-        //     );
-        //     return columns
-        //       .map((col: string) => {
-        //         if (col === "participant_id") return subId;
-        //         if (col === "original_id") return originalId || "n/a";
-        //         if (col === "group") return (record as any)?.group || "n/a";
-        //         return "n/a";
-        //       })
-        //       .join("\t");
-        //   });
-        //   //====update end======
-        //   participantsContent = [header, ...rows].join("\n");
-        // } catch (e) {
-        //   // Fallback: LLM didn't return valid JSON schema, use raw content
-        //   participantsContent = participantsRaw
-        //     .replace(/^```\n?/g, "")
-        //     .replace(/\n?```$/g, "")
-        //     .trim();
-        // }
-        // Build TSV from schema + subject analysis
-        // Mirrors _generate_participants_tsv_from_python() in planner.py
-        try {
-          const schemaText = participantsRaw
-            .replace(/^```json\n?/g, "")
-            .replace(/\n?```$/g, "")
-            .trim();
-          const schema = JSON.parse(schemaText);
-
-          // LLM decides extra demographic columns (sex, age, group etc.)
-          // but we always add participant_id and original_id ourselves
-          const extraColumns: string[] = schema.columns
-            .map((c: any) => c.name)
-            .filter(
-              (name: string) =>
-                name !== "participant_id" && name !== "original_id"
-            );
-
-          // Always start with participant_id and original_id
-          const columns = ["participant_id", "original_id", ...extraColumns];
-
-          const reverseMap =
-            currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
-          const subjectRecords = currentSubjectAnalysis?.subject_records || [];
-
-          const header = columns.join("\t");
-          const rows = subjectLabels.map((subId) => {
-            const bareId = subId.replace(/^sub-/, "");
-            const originalId = reverseMap[bareId] || "n/a";
-            const record = subjectRecords.find(
-              (r: any) => r.original_id === originalId
-            );
-            return columns
-              .map((col: string) => {
-                if (col === "participant_id") return subId;
-                if (col === "original_id") return originalId;
-                if (col === "group") return (record as any)?.group || "n/a";
-                return "n/a";
-              })
-              .join("\t");
-          });
-
-          participantsContent = [header, ...rows].join("\n");
-        } catch (e) {
-          // Fallback: generate minimal TSV directly from subject analysis
-          const reverseMap =
-            currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
-          const header = "participant_id\toriginal_id";
-          const rows = subjectLabels.map((subId) => {
-            const bareId = subId.replace(/^sub-/, "");
-            const originalId = reverseMap[bareId] || "n/a";
-            return `${subId}\t${originalId}`;
-          });
-          participantsContent = [header, ...rows].join("\n");
-        }
-      }
-      // ==========================================
-      // Add trio files to Virtual File System
-      // ==========================================
       const timestamp = new Date().toLocaleString();
       const trioFiles: FileItem[] = [
         {
@@ -621,10 +243,7 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
           name: "README.md",
           type: "file",
           fileType: "meta",
-          content: readmeContent
-            .replace(/^```markdown\n?/g, "")
-            .replace(/\n?```$/g, "")
-            .trim(),
+          content: readmeContent,
           contentType: "text",
           isUserMeta: true,
           parentId: null,
@@ -636,10 +255,7 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
           name: "participants.tsv",
           type: "file",
           fileType: "meta",
-          content: participantsContent
-            .replace(/^```\n?/g, "")
-            .replace(/\n?```$/g, "")
-            .trim(),
+          content: participantsTsv,
           contentType: "text",
           isUserMeta: true,
           parentId: null,
@@ -647,32 +263,27 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
           generatedAt: timestamp,
         },
       ];
-      // replace existing trio files, add if not exist
+
       updateFiles((prev) => {
         const trioNames = [
           "dataset_description.json",
           "README.md",
           "participants.tsv",
         ];
-
-        // Remove old AI generated trio files
         const withoutOldTrio = prev.filter(
           (f) => !(f.source === "ai" && trioNames.includes(f.name))
         );
-
-        // Add new trio files
-        // return [...withoutOldTrio, ...trioFiles];
-
-        // Only add AI-generated files for ones that weren't user-uploaded
-        const newTrioFiles = trioFiles.filter(
-          (tf) =>
-            !evidenceBundle.trio_found?.[
-              tf.name as keyof typeof evidenceBundle.trio_found
-            ]
-        );
-
+        // Only add AI files for ones that weren't user-uploaded (skipped=true means user-uploaded)
+        const newTrioFiles = trioFiles.filter((tf) => {
+          if (tf.name === "dataset_description.json")
+            return !skipped.datasetDesc;
+          if (tf.name === "README.md") return !skipped.readme;
+          if (tf.name === "participants.tsv") return !skipped.participants;
+          return true;
+        });
         return [...withoutOldTrio, ...newTrioFiles];
       });
+
       setTrioGenerated(true);
       setStatus(
         "✓ BIDS trio files generated and added to Virtual File System!"
@@ -686,9 +297,469 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
       }
     } finally {
       setGeneratingTrio(false);
-      setAbortController(null); // Clear controller
+      setAbortController(null);
     }
   };
+  // const handleGenerateTrio = async () => {
+  //   if (!evidenceBundle) {
+  //     setError("Please generate evidence bundle first");
+  //     return;
+  //   }
+
+  //   if (!currentProvider.noApiKey && !apiKey.trim()) {
+  //     setError("Please enter an API key");
+  //     return;
+  //   }
+
+  //   // Create abort controller
+  //   const controller = new AbortController();
+  //   setAbortController(controller);
+
+  //   setGeneratingTrio(true);
+  //   setError(null);
+  //   setStatus("Generating BIDS trio files...");
+
+  //   try {
+  //     const userText = evidenceBundle.user_hints.user_text || "";
+
+  //     // ==========================================
+  //     // Call 1: Generate dataset_description.json
+  //     // ==========================================
+  //     let datasetDesc: any;
+  //     if (evidenceBundle.trio_found?.["dataset_description.json"]) {
+  //       setStatus("1/3 dataset_description.json already exists, skipping...");
+  //       const existing = files.find(
+  //         (f) => f.source === "user" && f.name === "dataset_description.json"
+  //       );
+  //       datasetDesc = existing?.content ? JSON.parse(existing.content) : {};
+  //     } else {
+  //       setStatus("1/3 Generating dataset_description.json...");
+  //       const ddPrompt = getDatasetDescriptionPrompt(userText, evidenceBundle);
+
+  //       let ddResponse;
+  //       if (currentProvider.isAnthropic) {
+  //         ddResponse = await fetch(currentProvider.baseUrl, {
+  //           method: "POST",
+  //           signal: controller.signal,
+  //           headers: {
+  //             "Content-Type": "application/json",
+  //             "x-api-key": apiKey,
+  //             "anthropic-version": "2023-06-01",
+  //           },
+  //           body: JSON.stringify({
+  //             model,
+  //             max_tokens: 2048,
+  //             messages: [{ role: "user", content: ddPrompt }],
+  //           }),
+  //         });
+  //       } else if (provider === "ollama") {
+
+  //         ddResponse = await OllamaService.chat(model, [
+  //           { role: "user", content: ddPrompt },
+  //         ]);
+  //       } else {
+  //         ddResponse = await fetch(currentProvider.baseUrl, {
+  //           method: "POST",
+  //           signal: controller.signal,
+  //           headers: {
+  //             "Content-Type": "application/json",
+  //             Authorization: `Bearer ${apiKey}`,
+  //           },
+  //           body: JSON.stringify({
+  //             model,
+  //             messages: [{ role: "user", content: ddPrompt }],
+  //             max_tokens: 2048,
+  //           }),
+  //         });
+  //       }
+
+  //       // const ddData = await ddResponse.json();
+  //       const ddData =
+  //         provider === "ollama" ? ddResponse : await ddResponse.json();
+  //       let ddText = currentProvider.isAnthropic
+  //         ? ddData.content[0].text
+  //         : ddData.choices[0].message.content;
+
+  //       // Clean up markdown fences
+  //       ddText = ddText
+  //         .replace(/^```json\n?/g, "")
+  //         .replace(/\n?```$/g, "")
+  //         .trim();
+  //       datasetDesc = JSON.parse(ddText);
+  //     }
+
+  //     // ==========================================
+  //     // Call 2: Generate README.md
+  //     // ==========================================
+  //     let readmeContent: string;
+  //     if (evidenceBundle.trio_found?.["README.md"]) {
+  //       setStatus("2/3 README.md already exists, skipping...");
+  //       const existing = files.find(
+  //         (f) =>
+  //           f.source === "user" &&
+  //           ["README.md", "README.txt", "README.rst", "readme.md"].includes(
+  //             f.name
+  //           )
+  //       );
+  //       readmeContent = existing?.content || "";
+  //     } else {
+  //       setStatus("2/3 Generating README.md...");
+  //       const readmePrompt = getReadmePrompt(userText);
+
+  //       let readmeResponse;
+  //       if (currentProvider.isAnthropic) {
+  //         readmeResponse = await fetch(currentProvider.baseUrl, {
+  //           method: "POST",
+  //           signal: controller.signal,
+  //           headers: {
+  //             "Content-Type": "application/json",
+  //             "x-api-key": apiKey,
+  //             "anthropic-version": "2023-06-01",
+  //           },
+  //           body: JSON.stringify({
+  //             model,
+  //             max_tokens: 2048,
+  //             messages: [{ role: "user", content: readmePrompt }],
+  //           }),
+  //         });
+  //       } else if (provider === "ollama") {
+
+  //         readmeResponse = await OllamaService.chat(model, [
+  //           { role: "user", content: readmePrompt },
+  //         ]);
+  //       } else {
+  //         readmeResponse = await fetch(currentProvider.baseUrl, {
+  //           method: "POST",
+  //           signal: controller.signal,
+  //           headers: {
+  //             "Content-Type": "application/json",
+  //             Authorization: `Bearer ${apiKey}`,
+  //           },
+  //           body: JSON.stringify({
+  //             model,
+  //             messages: [{ role: "user", content: readmePrompt }],
+  //             max_tokens: 2048,
+  //           }),
+  //         });
+  //       }
+
+  //       const readmeData =
+  //         provider === "ollama" ? readmeResponse : await readmeResponse.json();
+  //       readmeContent = currentProvider.isAnthropic
+  //         ? readmeData.content[0].text
+  //         : readmeData.choices[0].message.content;
+  //     }
+  //     // ==========================================
+  //     // Call 3: Generate participants.tsv
+  //     // ==========================================
+  //     let participantsContent: string;
+  //     if (evidenceBundle.trio_found?.["participants.tsv"]) {
+  //       setStatus("3/3 participants.tsv already exists, skipping...");
+  //       const existing = files.find(
+  //         (f) => f.source === "user" && f.name === "participants.tsv"
+  //       );
+  //       participantsContent = existing?.content || "";
+  //     } else {
+  //       setStatus("3/3 Generating participants.tsv...");
+  //       const partsPrompt = getParticipantsPrompt(userText);
+
+  //       const currentSubjectAnalysis = extractSubjectAnalysis(
+  //         evidenceBundle?.all_files || [],
+  //         evidenceBundle?.user_hints?.n_subjects,
+  //         evidenceBundle?.filename_analysis?.python_statistics
+  //           ?.dominant_prefixes
+  //       );
+
+  //       console.log("=== PARTICIPANTS DEBUG ===");
+  //       console.log("method:", currentSubjectAnalysis?.method);
+  //       console.log("subject_count:", currentSubjectAnalysis?.subject_count);
+  //       console.log(
+  //         "id_mapping:",
+  //         currentSubjectAnalysis?.id_mapping?.id_mapping
+  //       );
+  //       console.log(
+  //         "reverse_mapping:",
+  //         currentSubjectAnalysis?.id_mapping?.reverse_mapping
+  //       );
+  //       console.log(
+  //         "subject_records sample:",
+  //         currentSubjectAnalysis?.subject_records?.slice(0, 3)
+  //       );
+  //       const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
+  //       const expectedCount = evidenceBundle?.user_hints?.n_subjects;
+  //       const subjectLabels: string[] =
+  //         idMap &&
+  //         Object.keys(idMap).length > 0 &&
+  //         (!expectedCount || Object.keys(idMap).length === expectedCount)
+  //           ? Object.values(idMap).map((id: string) => `sub-${id}`)
+  //           : Array.from(
+  //               {
+  //                 length: expectedCount || Object.keys(idMap || {}).length || 1,
+  //               },
+  //               (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
+  //             );
+
+  //       let partsResponse;
+  //       if (currentProvider.isAnthropic) {
+  //         partsResponse = await fetch(currentProvider.baseUrl, {
+  //           method: "POST",
+  //           signal: controller.signal,
+  //           headers: {
+  //             "Content-Type": "application/json",
+  //             "x-api-key": apiKey,
+  //             "anthropic-version": "2023-06-01",
+  //           },
+  //           body: JSON.stringify({
+  //             model,
+  //             max_tokens: 1024,
+  //             messages: [{ role: "user", content: partsPrompt }],
+  //           }),
+  //         });
+  //       } else if (provider === "ollama") {
+
+  //         partsResponse = await OllamaService.chat(model, [
+  //           { role: "user", content: partsPrompt },
+  //         ]);
+  //       } else {
+  //         partsResponse = await fetch(currentProvider.baseUrl, {
+  //           method: "POST",
+  //           signal: controller.signal,
+  //           headers: {
+  //             "Content-Type": "application/json",
+  //             Authorization: `Bearer ${apiKey}`,
+  //           },
+  //           body: JSON.stringify({
+  //             model,
+  //             messages: [{ role: "user", content: partsPrompt }],
+  //             max_tokens: 1024,
+  //           }),
+  //         });
+  //       }
+
+  //       // const partsData = await partsResponse.json();
+  //       const partsData =
+  //         provider === "ollama" ? partsResponse : await partsResponse.json();
+  //       const participantsRaw = currentProvider.isAnthropic
+  //         ? partsData.content[0].text
+  //         : partsData.choices[0].message.content;
+
+  //       // Build TSV from schema
+  //       // try {
+  //       //   const schemaText = participantsRaw
+  //       //     .replace(/^```json\n?/g, "")
+  //       //     .replace(/\n?```$/g, "")
+  //       //     .trim();
+  //       //   const schema = JSON.parse(schemaText);
+  //       //   const columns: string[] = schema.columns.map((c: any) => c.name);
+
+  //       //   // Get subject IDs from evidence bundle (extracted by Python-style analysis)
+  //       //   // const idMapping =
+  //       //   //   evidenceBundle?.subject_analysis?.id_mapping?.id_mapping;
+  //       //   // const subjectLabels: string[] = idMapping
+  //       //   //   ? Object.values(idMapping).map((id) => `sub-${id}`)
+  //       //   //   : ["sub-01"]; // fallback if no subject analysis
+  //       //   // Get subject IDs from subjectAnalysis state (computed at plan stage)
+  //       //   // Fall back to computing fresh if plan hasn't been run yet
+  //       //   const currentSubjectAnalysis =
+  //       //     subjectAnalysis ||
+  //       //     extractSubjectAnalysis(
+  //       //       evidenceBundle?.all_files || [],
+  //       //       evidenceBundle?.user_hints?.n_subjects,
+  //       //       evidenceBundle?.filename_analysis?.python_statistics
+  //       //         ?.dominant_prefixes
+  //       //     );
+  //       //   const idMap = currentSubjectAnalysis?.id_mapping?.id_mapping;
+  //       //   const subjectLabels: string[] =
+  //       //     idMap && Object.keys(idMap).length > 0
+  //       //       ? Object.values(idMap).map((id) => `sub-${id}`)
+  //       //       : Array.from(
+  //       //           { length: evidenceBundle?.user_hints?.n_subjects || 1 },
+  //       //           (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
+  //       //         );
+
+  //       //   const header = columns.join("\t");
+  //       //   // ====origin====
+  //       //   // const rows = subjectLabels.map((subId) =>
+  //       //   //   columns
+  //       //   //     .map((col: string) => (col === "participant_id" ? subId : "n/a"))
+  //       //   //     .join("\t")
+  //       //   // );
+  //       //   //====== end ======
+  //       //   // =====update start=====
+  //       //   const reverseMap =
+  //       //     currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
+  //       //   const subjectRecords = currentSubjectAnalysis?.subject_records || [];
+
+  //       //   const rows = subjectLabels.map((subId) => {
+  //       //     const bareId = subId.replace(/^sub-/, "");
+  //       //     const originalId = reverseMap[bareId];
+  //       //     const record = subjectRecords.find(
+  //       //       (r: any) => r.original_id === originalId
+  //       //     );
+  //       //     return columns
+  //       //       .map((col: string) => {
+  //       //         if (col === "participant_id") return subId;
+  //       //         if (col === "original_id") return originalId || "n/a";
+  //       //         if (col === "group") return (record as any)?.group || "n/a";
+  //       //         return "n/a";
+  //       //       })
+  //       //       .join("\t");
+  //       //   });
+  //       //   //====update end======
+  //       //   participantsContent = [header, ...rows].join("\n");
+  //       // } catch (e) {
+  //       //   // Fallback: LLM didn't return valid JSON schema, use raw content
+  //       //   participantsContent = participantsRaw
+  //       //     .replace(/^```\n?/g, "")
+  //       //     .replace(/\n?```$/g, "")
+  //       //     .trim();
+  //       // }
+  //       // Build TSV from schema + subject analysis
+  //       // Mirrors _generate_participants_tsv_from_python() in planner.py
+  //       try {
+  //         const schemaText = participantsRaw
+  //           .replace(/^```json\n?/g, "")
+  //           .replace(/\n?```$/g, "")
+  //           .trim();
+  //         const schema = JSON.parse(schemaText);
+
+  //         // LLM decides extra demographic columns (sex, age, group etc.)
+  //         // but we always add participant_id and original_id ourselves
+  //         const extraColumns: string[] = schema.columns
+  //           .map((c: any) => c.name)
+  //           .filter(
+  //             (name: string) =>
+  //               name !== "participant_id" && name !== "original_id"
+  //           );
+
+  //         // Always start with participant_id and original_id
+  //         const columns = ["participant_id", "original_id", ...extraColumns];
+
+  //         const reverseMap =
+  //           currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
+  //         const subjectRecords = currentSubjectAnalysis?.subject_records || [];
+
+  //         const header = columns.join("\t");
+  //         const rows = subjectLabels.map((subId) => {
+  //           const bareId = subId.replace(/^sub-/, "");
+  //           const originalId = reverseMap[bareId] || "n/a";
+  //           const record = subjectRecords.find(
+  //             (r: any) => r.original_id === originalId
+  //           );
+  //           return columns
+  //             .map((col: string) => {
+  //               if (col === "participant_id") return subId;
+  //               if (col === "original_id") return originalId;
+  //               if (col === "group") return (record as any)?.group || "n/a";
+  //               return "n/a";
+  //             })
+  //             .join("\t");
+  //         });
+
+  //         participantsContent = [header, ...rows].join("\n");
+  //       } catch (e) {
+  //         // Fallback: generate minimal TSV directly from subject analysis
+  //         const reverseMap =
+  //           currentSubjectAnalysis?.id_mapping?.reverse_mapping || {};
+  //         const header = "participant_id\toriginal_id";
+  //         const rows = subjectLabels.map((subId) => {
+  //           const bareId = subId.replace(/^sub-/, "");
+  //           const originalId = reverseMap[bareId] || "n/a";
+  //           return `${subId}\t${originalId}`;
+  //         });
+  //         participantsContent = [header, ...rows].join("\n");
+  //       }
+  //     }
+  //     // ==========================================
+  //     // Add trio files to Virtual File System
+  //     // ==========================================
+  //     const timestamp = new Date().toLocaleString();
+  //     const trioFiles: FileItem[] = [
+  //       {
+  //         id: generateId(),
+  //         name: "dataset_description.json",
+  //         type: "file",
+  //         fileType: "meta",
+  //         content: JSON.stringify(datasetDesc, null, 2),
+  //         contentType: "text",
+  //         isUserMeta: true,
+  //         parentId: null,
+  //         source: "ai",
+  //         generatedAt: timestamp,
+  //       },
+  //       {
+  //         id: generateId(),
+  //         name: "README.md",
+  //         type: "file",
+  //         fileType: "meta",
+  //         content: readmeContent
+  //           .replace(/^```markdown\n?/g, "")
+  //           .replace(/\n?```$/g, "")
+  //           .trim(),
+  //         contentType: "text",
+  //         isUserMeta: true,
+  //         parentId: null,
+  //         source: "ai",
+  //         generatedAt: timestamp,
+  //       },
+  //       {
+  //         id: generateId(),
+  //         name: "participants.tsv",
+  //         type: "file",
+  //         fileType: "meta",
+  //         content: participantsContent
+  //           .replace(/^```\n?/g, "")
+  //           .replace(/\n?```$/g, "")
+  //           .trim(),
+  //         contentType: "text",
+  //         isUserMeta: true,
+  //         parentId: null,
+  //         source: "ai",
+  //         generatedAt: timestamp,
+  //       },
+  //     ];
+  //     // replace existing trio files, add if not exist
+  //     updateFiles((prev) => {
+  //       const trioNames = [
+  //         "dataset_description.json",
+  //         "README.md",
+  //         "participants.tsv",
+  //       ];
+
+  //       // Remove old AI generated trio files
+  //       const withoutOldTrio = prev.filter(
+  //         (f) => !(f.source === "ai" && trioNames.includes(f.name))
+  //       );
+
+  //       // Add new trio files
+  //       // return [...withoutOldTrio, ...trioFiles];
+
+  //       // Only add AI-generated files for ones that weren't user-uploaded
+  //       const newTrioFiles = trioFiles.filter(
+  //         (tf) =>
+  //           !evidenceBundle.trio_found?.[
+  //             tf.name as keyof typeof evidenceBundle.trio_found
+  //           ]
+  //       );
+
+  //       return [...withoutOldTrio, ...newTrioFiles];
+  //     });
+  //     setTrioGenerated(true);
+  //     setStatus(
+  //       "✓ BIDS trio files generated and added to Virtual File System!"
+  //     );
+  //   } catch (err: any) {
+  //     if (err.name === "AbortError") {
+  //       setStatus("❌ Generation cancelled");
+  //     } else {
+  //       setError(err.message || "Failed to generate trio files");
+  //       setStatus("❌ Error generating trio files");
+  //     }
+  //   } finally {
+  //     setGeneratingTrio(false);
+  //     setAbortController(null); // Clear controller
+  //   }
+  // };
 
   const handleMouseDown = (e: React.MouseEvent) => {
     setIsResizing(true);
@@ -725,312 +796,222 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
 
   const currentProvider = llmProviders[provider];
 
-  const handleGenerate = async () => {
+  const handleGeneratePlan = async () => {
     if (!currentProvider.noApiKey && !apiKey.trim()) {
       setError("Please enter an API key");
       return;
     }
-
     if (!baseDirectoryPath.trim()) {
       setError("Please enter a base directory path");
       return;
     }
 
-    // Create abort controller
     const controller = new AbortController();
     setAbortController(controller);
-
     setLoading(true);
     setError(null);
-    setStatus(`Generating script using ${currentProvider.name}...`);
-
-    const fileSummary = buildFileSummary(files);
-    const filePatterns = analyzeFilePatterns(files);
-    const userContext = getUserContext(files);
-    const annotations = getFileAnnotations(files);
-
-    // UPDATED: Improved prompt that uses trio files
-    const prompt = getConversionScriptPrompt(
-      baseDirectoryPath,
-      fileSummary,
-      filePatterns,
-      userContext,
-      annotations
-    );
+    setStatus(`Generating BIDSPlan.yaml using ${currentProvider.name}...`);
 
     try {
-      let response;
-
-      if (provider === "ollama") {
-        // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434";
-        // response = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, {
-        //   method: "POST",
-        //   signal: controller.signal,
-        //   headers: {
-        //     "Content-Type": "application/json",
-        //   },
-        //   body: JSON.stringify({
-        //     model,
-        //     messages: [
-        //       {
-        //         role: "system",
-        //         content:
-        //           "You are a neuroimaging data expert specializing in BIDS format conversion. Output only Python code without markdown fences or explanations.",
-        //       },
-        //       { role: "user", content: prompt },
-        //     ],
-        //     stream: false,
-        //   }),
-        // });
-        response = await OllamaService.chat(model, [
-          {
-            role: "system",
-            content:
-              "You are a neuroimaging data expert specializing in BIDS format conversion. Output only Python code without markdown fences or explanations.",
-          },
-          { role: "user", content: prompt },
-        ]);
-      } else if (currentProvider.isAnthropic) {
-        response = await fetch(currentProvider.baseUrl, {
-          method: "POST",
-          signal: controller.signal,
-          headers: {
-            "Content-Type": "application/json",
-            "x-api-key": apiKey,
-            "anthropic-version": "2023-06-01",
-          },
-          body: JSON.stringify({
-            model,
-            max_tokens: 4096,
-            messages: [{ role: "user", content: prompt }],
-          }),
-        });
-      } else {
-        const headers: Record<string, string> = {
-          "Content-Type": "application/json",
-        };
-
-        if (!currentProvider.noApiKey) {
-          headers["Authorization"] = `Bearer ${apiKey}`;
-        }
+      const {
+        planYaml,
+        subjectAnalysis: sa,
+        participantsTsv,
+        coverageWarnings,
+      } = await buildBidsPlan({
+        evidenceBundle,
+        llmConfig: buildLLMConfig(),
+        signal: controller.signal,
+        onStatus: setStatus,
+      });
 
-        response = await fetch(currentProvider.baseUrl, {
-          method: "POST",
-          signal: controller.signal,
-          headers,
-          body: JSON.stringify({
-            model,
-            messages: [
-              {
-                role: "system",
-                content:
-                  "You are a neuroimaging data expert specializing in BIDS format conversion. Output only Python code without markdown fences or explanations.",
-              },
-              { role: "user", content: prompt },
-            ],
-            max_tokens: 4096,
-            temperature: 0.7,
-          }),
+      // Store subject analysis for ZIP packaging
+      setSubjectAnalysis(sa);
+
+      // Dump final YAML string (planYaml is raw string from LLM, already cleaned)
+      setBidsPlan(planYaml);
+
+      // Update participants.tsv in VFS with the full version from the plan stage
+      if (participantsTsv) {
+        const timestamp = new Date().toLocaleString();
+        updateFiles((prev) => {
+          const withoutOld = prev.filter(
+            (f) => !(f.source === "ai" && f.name === "participants.tsv")
+          );
+          return [
+            ...withoutOld,
+            {
+              id: generateId(),
+              name: "participants.tsv",
+              type: "file" as const,
+              fileType: "meta",
+              content: participantsTsv,
+              contentType: "text",
+              isUserMeta: true,
+              parentId: null,
+              source: "ai" as const,
+              generatedAt: timestamp,
+            },
+          ];
         });
       }
 
-      // const data = await response.json();
-      const data = provider === "ollama" ? response : await response.json();
-
-      // if (!response.ok) {
-      //   throw new Error(data.error?.message || "Failed to generate script");
-      // }
-      if (!response.ok && provider !== "ollama") {
-        throw new Error(data.error?.message || "Failed to generate script");
+      if (coverageWarnings.length > 0) {
+        setStatus(
+          `✓ BIDSPlan.yaml generated (${coverageWarnings.length} coverage warning(s) — check console)`
+        );
+      } else {
+        setStatus(`✓ BIDSPlan.yaml generated using ${currentProvider.name}`);
       }
-
-      // let script = "";
-      // if (currentProvider.isAnthropic) {
-      //   script = data.content[0].text;
-      // } else {
-      //   script = data.choices[0].message.content;
-      // }
-      let script = currentProvider.isAnthropic
-        ? data.content[0].text
-        : data.choices[0].message.content;
-
-      // Clean up markdown fences if AI included them anyway
-      script = script.replace(/^```python\n?/g, "").replace(/\n?```$/g, "");
-
-      setGeneratedScript(script);
-      setStatus(`✓ Script generated using ${currentProvider.name}`);
     } catch (err: any) {
       if (err.name === "AbortError") {
         setStatus("❌ Generation cancelled");
       } else {
-        setError(err.message || "Failed to generate script");
-        setStatus("❌ Error generating script");
+        setError(err.message || "Failed to generate BIDSPlan");
+        setStatus("❌ Error generating BIDSPlan");
       }
     } finally {
       setLoading(false);
-      setAbortController(null); // Clear controller
+      setAbortController(null);
     }
   };
+  // const handleGeneratePlan = async () => {
+  //   if (!currentProvider.noApiKey && !apiKey.trim()) {
+  //     setError("Please enter an API key");
+  //     return;
+  //   }
+  //   if (!baseDirectoryPath.trim()) {
+  //     setError("Please enter a base directory path");
+  //     return;
+  //   }
 
-  const handleGeneratePlan = async () => {
-    if (!currentProvider.noApiKey && !apiKey.trim()) {
-      setError("Please enter an API key");
-      return;
-    }
-    if (!baseDirectoryPath.trim()) {
-      setError("Please enter a base directory path");
-      return;
-    }
-
-    const controller = new AbortController();
-    setAbortController(controller);
-    setLoading(true);
-    setError(null);
-    setStatus(`Generating BIDSPlan.yaml using ${currentProvider.name}...`);
-
-    // ── Compute subject analysis (mirrors planner.py Step 1)
-    const allFiles = evidenceBundle?.all_files || [];
-    const userNSubjects = evidenceBundle?.user_hints?.n_subjects;
-    const dominantPrefixes =
-      evidenceBundle?.filename_analysis?.python_statistics?.dominant_prefixes;
-
-    const computedSubjectAnalysis = extractSubjectAnalysis(
-      allFiles,
-      userNSubjects,
-      dominantPrefixes
-    );
-
-    setSubjectAnalysis(computedSubjectAnalysis);
-
-    const fileSummary = buildFileSummary(files);
-    const filePatterns = analyzeFilePatterns(files);
-    const userContext = getUserContext(files);
-    // const subjectInfo = extractSubjectsFromFiles(files);
-    const subjectInfo = computedSubjectAnalysis;
-    const sampleFiles =
-      evidenceBundle?.samples
-        ?.slice(0, 10)
-        .map((s: any) => `  - ${s.relpath}`)
-        .join("\n") || "";
-
-    const prompt = getBIDSPlanPrompt(
-      fileSummary,
-      filePatterns,
-      userContext,
-      {
-        subjects: Object.entries(
-          computedSubjectAnalysis.id_mapping.id_mapping
-        ).map(([originalId, bidsId]) => ({ originalId, bidsId })),
-        strategy: computedSubjectAnalysis.id_mapping.strategy_used,
-      },
-      evidenceBundle?.counts_by_ext || {},
-      sampleFiles,
-      evidenceBundle
-    );
-
-    try {
-      let response;
-
-      if (provider === "ollama") {
-        // const ollamaBaseUrl = ollamaUrl || "http://localhost:11434";
-        // response = await fetch(`${ollamaBaseUrl}/v1/chat/completions`, {
-        //   method: "POST",
-        //   signal: controller.signal,
-        //   headers: { "Content-Type": "application/json" },
-        //   body: JSON.stringify({
-        //     model,
-        //     messages: [
-        //       {
-        //         role: "system",
-        //         content:
-        //           "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.",
-        //       },
-        //       { role: "user", content: prompt },
-        //     ],
-        //     stream: false,
-        //   }),
-        // });
-        response = await OllamaService.chat(model, [
-          {
-            role: "system",
-            content:
-              "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.",
-          },
-          { role: "user", content: prompt },
-        ]);
-      } else if (currentProvider.isAnthropic) {
-        response = await fetch(currentProvider.baseUrl, {
-          method: "POST",
-          signal: controller.signal,
-          headers: {
-            "Content-Type": "application/json",
-            "x-api-key": apiKey,
-            "anthropic-version": "2023-06-01",
-          },
-          body: JSON.stringify({
-            model,
-            max_tokens: 2048,
-            messages: [{ role: "user", content: prompt }],
-          }),
-        });
-      } else {
-        response = await fetch(currentProvider.baseUrl, {
-          method: "POST",
-          signal: controller.signal,
-          headers: {
-            "Content-Type": "application/json",
-            Authorization: `Bearer ${apiKey}`,
-          },
-          body: JSON.stringify({
-            model,
-            messages: [
-              {
-                role: "system",
-                content:
-                  "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.",
-              },
-              { role: "user", content: prompt },
-            ],
-            max_tokens: 2048,
-            temperature: 0.15,
-          }),
-        });
-      }
-
-      // const data = await response.json();
+  //   const controller = new AbortController();
+  //   setAbortController(controller);
+  //   setLoading(true);
+  //   setError(null);
+  //   setStatus(`Generating BIDSPlan.yaml using ${currentProvider.name}...`);
+
+  //   // ── Compute subject analysis (mirrors planner.py Step 1)
+  //   const allFiles = evidenceBundle?.all_files || [];
+  //   const userNSubjects = evidenceBundle?.user_hints?.n_subjects;
+  //   const dominantPrefixes =
+  //     evidenceBundle?.filename_analysis?.python_statistics?.dominant_prefixes;
+
+  //   const computedSubjectAnalysis = extractSubjectAnalysis(
+  //     allFiles,
+  //     userNSubjects,
+  //     dominantPrefixes
+  //   );
 
-      // if (!response.ok) {
-      //   throw new Error(data.error?.message || "Failed to generate BIDSPlan");
-      // }
-      const data = provider === "ollama" ? response : await response.json();
-      if (!response.ok && provider !== "ollama") {
-        throw new Error(data.error?.message || "Failed to generate BIDSPlan");
-      }
+  //   setSubjectAnalysis(computedSubjectAnalysis);
+
+  //   const fileSummary = buildFileSummary(files);
+  //   const filePatterns = analyzeFilePatterns(files);
+  //   const userContext = getUserContext(files);
+  //   // const subjectInfo = extractSubjectsFromFiles(files);
+  //   const subjectInfo = computedSubjectAnalysis;
+  //   const sampleFiles =
+  //     evidenceBundle?.samples
+  //       ?.slice(0, 10)
+  //       .map((s: any) => `  - ${s.relpath}`)
+  //       .join("\n") || "";
+
+  //   const prompt = getBIDSPlanPrompt(
+  //     fileSummary,
+  //     filePatterns,
+  //     userContext,
+  //     {
+  //       subjects: Object.entries(
+  //         computedSubjectAnalysis.id_mapping.id_mapping
+  //       ).map(([originalId, bidsId]) => ({ originalId, bidsId })),
+  //       strategy: computedSubjectAnalysis.id_mapping.strategy_used,
+  //     },
+  //     evidenceBundle?.counts_by_ext || {},
+  //     sampleFiles,
+  //     evidenceBundle
+  //   );
 
-      let plan = currentProvider.isAnthropic
-        ? data.content[0].text
-        : data.choices[0].message.content;
+  //   try {
+  //     let response;
+
+  //     if (provider === "ollama") {
+
+  //       response = await OllamaService.chat(model, [
+  //         {
+  //           role: "system",
+  //           content:
+  //             "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.",
+  //         },
+  //         { role: "user", content: prompt },
+  //       ]);
+  //     } else if (currentProvider.isAnthropic) {
+  //       response = await fetch(currentProvider.baseUrl, {
+  //         method: "POST",
+  //         signal: controller.signal,
+  //         headers: {
+  //           "Content-Type": "application/json",
+  //           "x-api-key": apiKey,
+  //           "anthropic-version": "2023-06-01",
+  //         },
+  //         body: JSON.stringify({
+  //           model,
+  //           max_tokens: 2048,
+  //           messages: [{ role: "user", content: prompt }],
+  //         }),
+  //       });
+  //     } else {
+  //       response = await fetch(currentProvider.baseUrl, {
+  //         method: "POST",
+  //         signal: controller.signal,
+  //         headers: {
+  //           "Content-Type": "application/json",
+  //           Authorization: `Bearer ${apiKey}`,
+  //         },
+  //         body: JSON.stringify({
+  //           model,
+  //           messages: [
+  //             {
+  //               role: "system",
+  //               content:
+  //                 "You are a BIDS dataset architect. Output only valid YAML without markdown fences or explanations.",
+  //             },
+  //             { role: "user", content: prompt },
+  //           ],
+  //           max_tokens: 2048,
+  //           temperature: 0.15,
+  //         }),
+  //       });
+  //     }
 
-      // Clean up markdown fences if present
-      plan = plan
-        .replace(/^```yaml\n?/g, "")
-        .replace(/\n?```$/g, "")
-        .trim();
+  //     const data = provider === "ollama" ? response : await response.json();
+  //     if (!response.ok && provider !== "ollama") {
+  //       throw new Error(data.error?.message || "Failed to generate BIDSPlan");
+  //     }
 
-      setBidsPlan(plan);
-      setStatus(`✓ BIDSPlan.yaml generated using ${currentProvider.name}`);
-    } catch (err: any) {
-      if (err.name === "AbortError") {
-        setStatus("❌ Generation cancelled");
-      } else {
-        setError(err.message || "Failed to generate BIDSPlan");
-        setStatus("❌ Error generating BIDSPlan");
-      }
-    } finally {
-      setLoading(false);
-      setAbortController(null);
-    }
-  };
+  //     let plan = currentProvider.isAnthropic
+  //       ? data.content[0].text
+  //       : data.choices[0].message.content;
+
+  //     // Clean up markdown fences if present
+  //     plan = plan
+  //       .replace(/^```yaml\n?/g, "")
+  //       .replace(/\n?```$/g, "")
+  //       .trim();
+
+  //     setBidsPlan(plan);
+  //     setStatus(`✓ BIDSPlan.yaml generated using ${currentProvider.name}`);
+  //   } catch (err: any) {
+  //     if (err.name === "AbortError") {
+  //       setStatus("❌ Generation cancelled");
+  //     } else {
+  //       setError(err.message || "Failed to generate BIDSPlan");
+  //       setStatus("❌ Error generating BIDSPlan");
+  //     }
+  //   } finally {
+  //     setLoading(false);
+  //     setAbortController(null);
+  //   }
+  // };
 
   const handleDownloadPlan = () => {
     const blob = new Blob([bidsPlan], { type: "text/yaml" });
@@ -1518,7 +1499,7 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
                 )}
               </FormControl>
 
-              {/* <TextField
+              <TextField
                 label="Describe your dataset (optional)"
                 placeholder='e.g. "DICOM files from 2 subjects, one male one female"'
                 value={describeText}
@@ -1526,7 +1507,8 @@ const LLMPanel: React.FC<LLMPanelProps> = ({
                 size="small"
                 multiline
                 rows={2}
-              /> */}
+                sx={{ mb: 1 }}
+              />
               <Button
                 fullWidth
                 size="small"
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts
index c3c89b0..8318a87 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/fileAnalyzers.ts
@@ -18,37 +18,106 @@ import { FileItem } from "redux/projects/types/projects.interface";
 // Partial mirror of executor.py → infer_subdirectory_from_suffix()
 //                               + categorize_scan_type()
 // ============================================================================
+// export const categorizeFile = (file: FileItem): string => {
+//   const name = file.name.toLowerCase();
+
+//   // Functional scans (task-based)
+//   if (name.includes("task-") && name.includes("bold")) return "functional-bold";
+//   if (name.endsWith(".snirf")) return "functional-nirs";
+//   if (name.endsWith(".nirs")) return "functional-nirs";
+//   if (name.endsWith(".mat")) return "functional-nirs";
+
+//   // Anatomical scans
+//   if (name.includes("t1w")) return "anatomical-T1w";
+//   if (name.includes("t2w") || name.includes("inplanet2"))
+//     return "anatomical-T2w";
+//   if (name.includes("flair")) return "anatomical-FLAIR";
+//   if (name.endsWith(".dcm")) return "anatomical-dicom";
+
+//   // JNIfTI — mirrors JNIFTI_EXT in constants.py: {'.jnii', '.bnii'}
+//   if (name.endsWith(".jnii") || name.endsWith(".bnii"))
+//     return "anatomical-jnifti";
+
+//   // Diffusion
+//   if (name.includes("dwi") || name.includes("diffusion")) return "diffusion";
+
+//   // Field maps
+//   if (name.includes("fieldmap") || name.includes("fmap")) return "fieldmap";
+
+//   // Array/HDF5 (non-SNIRF)
+//   if (name.endsWith(".h5") || name.endsWith(".hdf5")) return "array";
+
+//   // Fall back to fileType from fileProcessors.ts
+//   return file.fileType || "unknown";
+// };
+
 export const categorizeFile = (file: FileItem): string => {
   const name = file.name.toLowerCase();
 
-  // Functional scans (task-based)
-  if (name.includes("task-") && name.includes("bold")) return "functional-bold";
-  if (name.endsWith(".snirf")) return "functional-nirs";
-  if (name.endsWith(".nirs")) return "functional-nirs";
-  if (name.endsWith(".mat")) return "functional-nirs";
+  // Mirror detect_kind() priority order exactly:
+  // user_trio → jnifti → nirs → mri → table → array → text_doc → document → archive → other
+
+  // user_trio
+  if (
+    [
+      "dataset_description.json",
+      "participants.tsv",
+      "readme.md",
+      "readme.txt",
+      "readme.rst",
+      "readme",
+    ].includes(name)
+  )
+    return "user_trio";
+
+  // jnifti
+  if (name.endsWith(".jnii") || name.endsWith(".bnii")) return "jnifti";
+
+  // nirs — mirrors NIRS_EXT = {'.snirf', '.nirs', '.mat'}
+  if (
+    name.endsWith(".snirf") ||
+    name.endsWith(".nirs") ||
+    name.endsWith(".mat")
+  )
+    return "nirs";
+
+  // mri — mirrors MRI_EXT = {'.nii', '.dcm'} + .nii.gz
+  if (
+    name.endsWith(".nii.gz") ||
+    name.endsWith(".nii") ||
+    name.endsWith(".dcm")
+  )
+    return "mri";
 
-  // Anatomical scans
-  if (name.includes("t1w")) return "anatomical-T1w";
-  if (name.includes("t2w") || name.includes("inplanet2"))
-    return "anatomical-T2w";
-  if (name.includes("flair")) return "anatomical-FLAIR";
-  if (name.endsWith(".dcm")) return "anatomical-dicom";
+  // table
+  if ([".csv", ".tsv", ".xlsx", ".xls"].some((e) => name.endsWith(e)))
+    return "table";
 
-  // JNIfTI — mirrors JNIFTI_EXT in constants.py: {'.jnii', '.bnii'}
-  if (name.endsWith(".jnii") || name.endsWith(".bnii"))
-    return "anatomical-jnifti";
+  // array — mirrors ARRAY_EXT = {'.h5', '.hdf5', '.npy', '.npz'}
+  if ([".h5", ".hdf5", ".npy", ".npz"].some((e) => name.endsWith(e)))
+    return "array";
 
-  // Diffusion
-  if (name.includes("dwi") || name.includes("diffusion")) return "diffusion";
+  // text_doc — mirrors TEXT_EXT = {'.txt', '.md', '.rst', '.html', '.htm', '.log'}
+  if (
+    [".txt", ".md", ".rst", ".html", ".htm", ".log"].some((e) =>
+      name.endsWith(e)
+    )
+  )
+    return "text_doc";
 
-  // Field maps
-  if (name.includes("fieldmap") || name.includes("fmap")) return "fieldmap";
+  // document — mirrors DOC_EXT = {'.pdf', '.docx', '.doc', '.pptx', '.ppt', '.odt'}
+  if (
+    [".pdf", ".docx", ".doc", ".pptx", ".ppt", ".odt"].some((e) =>
+      name.endsWith(e)
+    )
+  )
+    return "document";
 
-  // Array/HDF5 (non-SNIRF)
-  if (name.endsWith(".h5") || name.endsWith(".hdf5")) return "array";
+  // archive
+  if ([".zip", ".tar", ".tgz", ".tar.gz"].some((e) => name.endsWith(e)))
+    return "archive";
 
-  // Fall back to fileType from fileProcessors.ts
-  return file.fileType || "unknown";
+  return "other";
 };
 
 // ============================================================================
@@ -102,19 +171,75 @@ export const getCountsByExtension = (
 //
 // Python reads files from disk; this reads from VFS FileItem.content.
 // ============================================================================
+// export const getUserContextText = (files: FileItem[]): string => {
+//   const readme = files.find((f) => f.name.toLowerCase().includes("readme"));
+//   const instructions = files.find(
+//     (f) =>
+//       f.name.toLowerCase().includes("conversion") ||
+//       f.name.toLowerCase().includes("instruction")
+//   );
+//   const participants = files.find((f) =>
+//     f.name.toLowerCase().includes("participant")
+//   );
+
+//   const datasetDescription = files.find(
+//     (f) => f.name.toLowerCase() === "dataset_description.json"
+//   );
+
+//   const pdfsAndDocs = files.filter(
+//     (f) =>
+//       f.source === "user" &&
+//       f.fileType === "office" &&
+//       f.content?.trim() &&
+//       f.name.toLowerCase() !== "participants.tsv" // already handled
+//   );
+//   const textFiles = files.filter(
+//     (f) =>
+//       f.source === "user" &&
+//       f.fileType === "text" &&
+//       f.content?.trim() &&
+//       ![
+//         "readme",
+//         "participants.tsv",
+//         "dataset_description.json",
+//         "readme.md",
+//         "readme.txt",
+//       ].includes(f.name.toLowerCase()) &&
+//       f.isUserMeta === true // only user-added meta files, not data sidecars
+//   );
+//   textFiles.forEach((f) => {
+//     parts.push(`TEXT FILE [${f.name}]:\n${f.content!.slice(0, 3000)}`);
+//   });
+
+//   const parts = [];
+//   if (datasetDescription?.content)
+//     parts.push(`DATASET DESCRIPTION:\n${datasetDescription.content}`);
+//   if (readme?.content) parts.push(`README:\n${readme.content}`);
+//   if (instructions?.content)
+//     parts.push(`INSTRUCTIONS:\n${instructions.content}`);
+//   if (participants?.content)
+//     parts.push(`PARTICIPANTS:\n${participants.content}`);
+//   pdfsAndDocs.forEach((f) => {
+//     parts.push(`DOCUMENT [${f.name}]:\n${f.content!.slice(0, 3000)}`);
+//   });
+//   return parts.join("\n\n");
+// };
 export const getUserContextText = (files: FileItem[]): string => {
-  const readme = files.find((f) => f.name.toLowerCase().includes("readme"));
+  const readme = files.find(
+    (f) => f.source === "user" && f.name.toLowerCase().includes("readme")
+  );
   const instructions = files.find(
     (f) =>
-      f.name.toLowerCase().includes("conversion") ||
-      f.name.toLowerCase().includes("instruction")
+      f.source === "user" &&
+      (f.name.toLowerCase().includes("conversion") ||
+        f.name.toLowerCase().includes("instruction"))
   );
-  const participants = files.find((f) =>
-    f.name.toLowerCase().includes("participant")
+  const participants = files.find(
+    (f) => f.source === "user" && f.name.toLowerCase().includes("participant")
   );
-
   const datasetDescription = files.find(
-    (f) => f.name.toLowerCase() === "dataset_description.json"
+    (f) =>
+      f.source === "user" && f.name.toLowerCase() === "dataset_description.json"
   );
 
   const pdfsAndDocs = files.filter(
@@ -122,10 +247,24 @@ export const getUserContextText = (files: FileItem[]): string => {
       f.source === "user" &&
       f.fileType === "office" &&
       f.content?.trim() &&
-      f.name.toLowerCase() !== "participants.tsv" // already handled
+      f.name.toLowerCase() !== "participants.tsv"
+  );
+  const textFiles = files.filter(
+    (f) =>
+      f.source === "user" &&
+      f.fileType === "text" &&
+      f.content?.trim() &&
+      ![
+        "readme",
+        "participants.tsv",
+        "dataset_description.json",
+        "readme.md",
+        "readme.txt",
+      ].includes(f.name.toLowerCase()) &&
+      f.isUserMeta === true
   );
 
-  const parts = [];
+  const parts: string[] = [];
   if (datasetDescription?.content)
     parts.push(`DATASET DESCRIPTION:\n${datasetDescription.content}`);
   if (readme?.content) parts.push(`README:\n${readme.content}`);
@@ -136,5 +275,9 @@ export const getUserContextText = (files: FileItem[]): string => {
   pdfsAndDocs.forEach((f) => {
     parts.push(`DOCUMENT [${f.name}]:\n${f.content!.slice(0, 3000)}`);
   });
+  textFiles.forEach((f) => {
+    parts.push(`TEXT FILE [${f.name}]:\n${f.content!.slice(0, 3000)}`);
+  });
+
   return parts.join("\n\n");
 };
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/fileProcessors.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/fileProcessors.ts
index b51d150..9897b15 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/fileProcessors.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/fileProcessors.ts
@@ -118,12 +118,32 @@ export const processFile = async (
       }
       entry.contentType = "hdf5";
     } else if (fileType === "neurojsonText") {
-      // Extract NeuroJSON text
       const text = await file.text();
-      // entry.content = text.slice(0, 5000);
       try {
         const json = JSON.parse(text);
-        entry.content = JSON.stringify(json, null, 2).slice(0, 5000);
+        // JNIfTI files — extract NIFTIHeader only, mirrors _extract_jnifti_header()
+        if (
+          file.name.toLowerCase().endsWith(".jnii") ||
+          file.name.toLowerCase().endsWith(".bnii")
+        ) {
+          const hdr = json?.NIFTIHeader ?? {};
+          const result: Record<string, any> = {};
+          for (const field of [
+            "Dim",
+            "VoxelSize",
+            "DataType",
+            "Intent",
+            "QForm",
+            "SForm",
+            "Description",
+            "NIIFormat",
+          ]) {
+            if (hdr[field] !== undefined) result[field] = hdr[field];
+          }
+          entry.content = JSON.stringify(result, null, 2);
+        } else {
+          entry.content = JSON.stringify(json, null, 2).slice(0, 5000);
+        }
       } catch (e) {
         entry.content = text.slice(0, 5000);
       }
@@ -344,9 +364,29 @@ export const processZip = async (
       else if (fileType === "neurojsonText") {
         try {
           const text = await zipEntry.async("text");
-          // entry.content = text.slice(0, 5000);
           const json = JSON.parse(text);
-          entry.content = JSON.stringify(json, null, 2).slice(0, 5000);
+          if (
+            fileName.toLowerCase().endsWith(".jnii") ||
+            fileName.toLowerCase().endsWith(".bnii")
+          ) {
+            const hdr = json?.NIFTIHeader ?? {};
+            const result: Record<string, any> = {};
+            for (const field of [
+              "Dim",
+              "VoxelSize",
+              "DataType",
+              "Intent",
+              "QForm",
+              "SForm",
+              "Description",
+              "NIIFormat",
+            ]) {
+              if (hdr[field] !== undefined) result[field] = hdr[field];
+            }
+            entry.content = JSON.stringify(result, null, 2);
+          } else {
+            entry.content = JSON.stringify(json, null, 2).slice(0, 5000);
+          }
           entry.contentType = "neurojson";
         } catch (e: any) {
           entry.content = `Error: ${e.message}`;
@@ -755,6 +795,14 @@ const parseDicomHeader = (buffer: ArrayBuffer): string => {
     const manufacturer = getString("x00080070");
     const rows = getString("x00280010");
     const cols = getString("x00280011");
+    const repetitionTime = getString("x00180080");
+    const echoTime = getString("x00180081");
+    const flipAngle = getString("x00181314");
+    const sliceThickness = getString("x00180050");
+    const magneticFieldStrength = getString("x00180087");
+    const manufacturerModel = getString("x00081090");
+    const softwareVersions = getString("x00181020");
+    const acquisitionDate = getString("x00080022");
 
     const lines = [`DICOM File`, `─`.repeat(50)];
 
@@ -767,7 +815,15 @@ const parseDicomHeader = (buffer: ArrayBuffer): string => {
     if (patientAge) lines.push(`Age: ${patientAge}`);
     if (manufacturer) lines.push(`Scanner: ${manufacturer}`);
     if (rows && cols) lines.push(`Image Size: ${rows} × ${cols}`);
-
+    if (repetitionTime) lines.push(`RepetitionTime: ${repetitionTime}`);
+    if (echoTime) lines.push(`EchoTime: ${echoTime}`);
+    if (flipAngle) lines.push(`FlipAngle: ${flipAngle}`);
+    if (sliceThickness) lines.push(`SliceThickness: ${sliceThickness}`);
+    if (magneticFieldStrength)
+      lines.push(`MagneticFieldStrength: ${magneticFieldStrength}`);
+    if (manufacturerModel) lines.push(`Model: ${manufacturerModel}`);
+    if (softwareVersions) lines.push(`SoftwareVersions: ${softwareVersions}`);
+    if (acquisitionDate) lines.push(`AcquisitionDate: ${acquisitionDate}`);
     return lines.join("\n");
   } catch (e: any) {
     return `DICOM File\nSize: ${(buffer.byteLength / 1024).toFixed(
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
index 24aa930..cc30bc8 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/filenameTokenizer.ts
@@ -358,72 +358,6 @@ export const buildLLMPayload = (
   };
 };
 
-/**
- * Analyze token statistics across all filenames.
- * Mirrors FilenamePatternAnalyzer.analyze_token_statistics()
- */
-// export const analyzeTokenStatistics = (
-//   filenames: string[]
-// ): TokenStatistics => {
-//   const allTokens: Record<string, number> = {};
-//   const prefixTokens: Record<string, number> = {}; // first token only
-
-//   for (const filename of filenames) {
-//     // Extract just filename from path
-//     const fname = filename.includes("/")
-//       ? filename.split("/").pop()!
-//       : filename;
-
-//     const tokens = tokenizeFilename(fname);
-
-//     // Count all tokens
-//     for (const token of tokens) {
-//       allTokens[token] = (allTokens[token] || 0) + 1;
-//     }
-
-//     // CRITICAL: use first TOKEN as prefix (not regex match)
-//     if (tokens.length > 0) {
-//       const firstToken = tokens[0];
-//       prefixTokens[firstToken] = (prefixTokens[firstToken] || 0) + 1;
-//     }
-//   }
-
-//   const dominantPrefixes = findDominantPrefixes(prefixTokens, filenames.length);
-
-//   return {
-//     totalFiles: filenames.length,
-//     tokenFrequency: allTokens,
-//     prefixFrequency: prefixTokens,
-//     dominantPrefixes,
-//   };
-// };
-
-/*
- * Find dominant prefixes — tokens appearing in >5% of files
- * that are not common words.
- * Mirrors FilenamePatternAnalyzer._find_dominant_prefixes()
- */
-// const findDominantPrefixes = (
-//   prefixCounter: Record<string, number>,
-//   totalFiles: number
-// ): DominantPrefix[] => {
-//   const threshold = totalFiles * 0.05; // 5% threshold
-
-//   return Object.entries(prefixCounter)
-//     .filter(([prefix, count]) => {
-//       if (count < threshold) return false;
-//       if (COMMON_WORDS.has(prefix.toLowerCase())) return false;
-//       return true;
-//     })
-//     .sort((a, b) => b[1] - a[1])
-//     .slice(0, 20)
-//     .map(([prefix, count]) => ({
-//       prefix,
-//       count,
-//       percentage: Math.round((count / totalFiles) * 1000) / 10,
-//     }));
-// };
-
 // ============================================================================
 // Integration Functions
 // Mirrors analyze_filenames_for_subjects() + _generate_recommendation() in filename_tokenizer.py
@@ -605,525 +539,3 @@ export const createBlockingQuestion = (
     options,
   },
 });
-
-// ============================================================================
-// extractSubjectAnalysis — mirrors build_bids_plan()'s subject extraction
-// ============================================================================
-
-/**
- * Full subject extraction mirroring autobidsify's judgment sequence:
- *
- * 1. Try directory structure patterns (sub-01, subject_01, site_sub01, 001)
- * 2. If fails → try filename token statistics (dominant prefix approach)
- * 3. Generate ID mapping (already_bids / numeric / semantic)
- */
-// export const extractSubjectAnalysis = (allFiles: string[], userNSubjects?: number | null, dominantPrefixes?: { prefix: string; count: number; percentage: number }[]): SubjectAnalysis => {
-//   // ── Step 1: Try directory structure (mirrors _extract_subjects_from_directory_structure)
-//   const fromDir = extractFromDirectoryStructure(allFiles);
-//   if (fromDir && fromDir.subject_records.length > 0) {
-//     const idMapping = generateIdMapping(fromDir);
-//     return { ...fromDir, id_mapping: idMapping };
-//   }
-
-//   // ── Step 2: Filename token statistics (mirrors filename_tokenizer approach)
-//   const fromTokens = extractFromTokenStatistics(allFiles);
-//   if (fromTokens && fromTokens.subject_records.length > 0) {
-//     const idMapping = generateIdMapping(fromTokens);
-//     return { ...fromTokens, id_mapping: idMapping };
-//   }
-
-//   // ── Fallback: empty result
-//   return {
-//     success: false,
-//     method: "none",
-//     subject_records: [],
-//     subject_count: 0,
-//     has_site_info: false,
-//     variants_by_subject: {},
-//     python_generated_filename_rules: [],
-//     id_mapping: {
-//       id_mapping: {},
-//       reverse_mapping: {},
-//       strategy_used: "none",
-//       metadata_columns: [],
-//     },
-//   };
-// };
-
-// ── Step 1: Directory structure patterns
-// Mirrors _extract_subjects_from_directory_structure() in planner.py
-// const extractFromDirectoryStructure = (
-//   allFiles: string[]
-// ): Omit<SubjectAnalysis, "id_mapping"> | null => {
-//   const patterns: Array<[RegExp, boolean, number, number | null, string]> = [
-//     [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"],
-//     [/^sub-(\w+)$/i, false, 1, null, "standard_bids"],
-//     [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"],
-//     [/^(\d{3,})$/, false, 1, null, "numeric_only"],
-//   ];
-
-//   const subjectRecords: SubjectRecord[] = [];
-//   const seenIds = new Set<string>();
-
-//   for (const filepath of allFiles) {
-//     const parts = filepath.split("/");
-//     for (const part of parts.slice(0, 2)) {
-//       for (const [
-//         regex,
-//         hasSite,
-//         idGroup,
-//         siteGroup,
-//         patternName,
-//       ] of patterns) {
-//         const match = part.match(regex);
-//         if (match) {
-//           const originalId = match[0];
-//           if (seenIds.has(originalId)) break;
-//           seenIds.add(originalId);
-//           subjectRecords.push({
-//             original_id: originalId,
-//             numeric_id: match[idGroup],
-//             site: hasSite && siteGroup ? match[siteGroup] : null,
-//             pattern_name: patternName,
-//             file_count: 0,
-//           });
-//           break;
-//         }
-//       }
-//     }
-//   }
-
-//   if (subjectRecords.length === 0) return null;
-
-//   subjectRecords.sort((a, b) => {
-//     const na = parseInt(a.numeric_id) || 0;
-//     const nb = parseInt(b.numeric_id) || 0;
-//     return na - nb;
-//   });
-
-//   return {
-//     success: true,
-//     method: "directory_structure",
-//     subject_records: subjectRecords,
-//     subject_count: subjectRecords.length,
-//     has_site_info: subjectRecords.some((r) => r.site !== null),
-//     variants_by_subject: {},
-//     python_generated_filename_rules: [],
-//   };
-// };
-
-// ── Step 2: Token statistics (dominant prefix approach)
-// Mirrors FilenamePatternAnalyzer + analyze_filenames_for_subjects() in filename_tokenizer.py
-// const extractFromTokenStatistics = (
-//   allFiles: string[]
-// ): Omit<SubjectAnalysis, "id_mapping"> | null => {
-//   // Extract just filenames (not full paths) — mirrors filename_tokenizer.py line:
-//   // filenames = [f.split('/')[-1] for f in all_files]
-//   const filenames = allFiles.map((f) =>
-//     f.includes("/") ? f.split("/").pop()! : f
-//   );
-
-//   const stats = analyzeTokenStatistics(filenames);
-
-//   if (stats.dominantPrefixes.length === 0) return null;
-
-//   // Count files per prefix
-//   const prefixFileCounts: Record<string, number> = {};
-//   for (const filename of filenames) {
-//     const tokens = tokenizeFilename(filename);
-//     if (tokens.length > 0) {
-//       const first = tokens[0];
-//       if (stats.dominantPrefixes.some((p) => p.prefix === first)) {
-//         prefixFileCounts[first] = (prefixFileCounts[first] || 0) + 1;
-//       }
-//     }
-//   }
-
-//   const subjectRecords: SubjectRecord[] = stats.dominantPrefixes.map(
-//     (p, i) => ({
-//       original_id: p.prefix,
-//       numeric_id: String(i + 1),
-//       site: null,
-//       pattern_name: "dominant_prefix",
-//       file_count: prefixFileCounts[p.prefix] || p.count,
-//     })
-//   );
-
-//   return {
-//     success: true,
-//     method: "dominant_prefix_fallback",
-//     subject_records: subjectRecords,
-//     subject_count: subjectRecords.length,
-//     has_site_info: false,
-//     variants_by_subject: {},
-//     python_generated_filename_rules: [],
-//   };
-// };
-
-/**
- * Mirrors _extract_numeric_id_from_identifier() in planner.py
- * BZZ003 → "003", sub-01 → "01", patient021 → "021"
- */
-const extractNumericIdFromIdentifier = (identifier: string): string | null => {
-  const numbers = identifier.match(/\d+/g);
-  if (!numbers) return null;
-  return numbers[numbers.length - 1]; // last numeric sequence, preserving leading zeros
-};
-
-// ── Step 1: Directory structure patterns
-// Mirrors _extract_subjects_from_directory_structure() in planner.py
-const SKIP_DIRS = new Set([
-  "anat",
-  "func",
-  "dwi",
-  "fmap",
-  "nirs",
-  "meg",
-  "eeg",
-  "beh",
-  "perf",
-  "derivatives",
-  "sourcedata",
-  "stimuli",
-  "walking",
-  "resting",
-  "resting_state",
-  "run",
-  "ses",
-  "pd",
-  "control",
-  "hc",
-  "task",
-  "sub",
-  "dataset",
-  "data",
-  "raw",
-  "bids",
-  "output",
-  "outputs",
-  "staging",
-  "_staging",
-  "mri",
-  "fnirs",
-  "edf",
-  "dicom",
-]);
-
-const extractFromDirectoryStructure = (
-  allFiles: string[]
-): Omit<SubjectAnalysis, "id_mapping"> | null => {
-  const patterns: Array<[RegExp, boolean, number, number | null, string]> = [
-    [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"], // Beijing_sub82352
-    [/^sub-(\w+)$/, false, 1, null, "standard_bids"], // sub-01
-    [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"], // subject_01
-    [/^\d{3,}$/, false, 1, null, "numeric_only"], // 001
-    [/^([A-Za-z]+\d+)$/, false, 1, null, "alphanum_id"], // PD01, Control01, HC03
-  ];
-
-  const subjectRecords: SubjectRecord[] = [];
-  const seenIds = new Set<string>();
-
-  for (const filepath of allFiles) {
-    const parts = filepath.split("/");
-    // Check ALL directory levels (not just first 2)
-    const dirsOnly = parts.slice(0, parts.length - 1);
-    // const dirsOnly = parts.slice(0, Math.min(2, parts.length - 1)); // only first 2 levels
-
-    for (const part of dirsOnly) {
-      // Skip known non-subject directory names
-      // if (SKIP_DIRS.has(part.toLowerCase())) continue;
-
-      for (const [
-        regex,
-        hasSite,
-        idGroup,
-        siteGroup,
-        patternName,
-      ] of patterns) {
-        const match = part.match(regex);
-        if (match) {
-          const originalId = match[0];
-          if (seenIds.has(originalId)) break;
-          seenIds.add(originalId);
-          subjectRecords.push({
-            original_id: originalId,
-            numeric_id: match[idGroup] || match[0],
-            site: hasSite && siteGroup ? match[siteGroup] : null,
-            pattern_name: patternName,
-            file_count: 0,
-          });
-          break;
-        }
-      }
-    }
-  }
-
-  if (subjectRecords.length === 0) return null;
-
-  subjectRecords.sort((a, b) => {
-    // const na = parseInt(a.numeric_id) || 0;
-    // const nb = parseInt(b.numeric_id) || 0;
-    // return na - nb;
-    const aMatch = a.original_id.match(/^([A-Za-z]+)(\d+)$/);
-    const bMatch = b.original_id.match(/^([A-Za-z]+)(\d+)$/);
-
-    if (aMatch && bMatch) {
-      const prefixCompare = aMatch[1].localeCompare(bMatch[1]);
-      if (prefixCompare !== 0) return prefixCompare;
-      return parseInt(aMatch[2]) - parseInt(bMatch[2]);
-    }
-
-    const na = parseInt(a.numeric_id) || 0;
-    const nb = parseInt(b.numeric_id) || 0;
-    return na - nb;
-  });
-
-  // Build group map: subject originalId → parent directory name
-  // const groupMap: Record<string, string> = {};
-  // for (const filepath of allFiles) {
-  //   const parts = filepath.split("/");
-  //   for (let i = 1; i < parts.length - 1; i++) {
-  //     if (seenIds.has(parts[i]) && !SKIP_DIRS.has(parts[i - 1].toLowerCase())) {
-  //       groupMap[parts[i]] = parts[i - 1];
-  //     }
-  //   }
-  // }
-
-  // // Attach group to each record
-  // for (const rec of subjectRecords) {
-  //   if (groupMap[rec.original_id]) {
-  //     rec.group = groupMap[rec.original_id];
-  //   }
-  // }
-
-  return {
-    success: true,
-    method: "directory_structure",
-    subject_records: subjectRecords,
-    subject_count: subjectRecords.length,
-    has_site_info: subjectRecords.some((r) => r.site !== null),
-    variants_by_subject: {},
-    python_generated_filename_rules: [],
-  };
-};
-
-const TRIO_FILENAMES = new Set([
-  "dataset_description.json",
-  "participants.tsv",
-  "readme.md",
-  "readme.txt",
-  "readme.rst",
-  "readme",
-]);
-// ── Step 2: Flat filename identifier extraction
-// Mirrors _extract_subjects_from_flat_filenames() in planner.py
-// KEY DIFFERENCE from old version: uses base identifier (before first _)
-// not tokenizer dominant prefixes
-const extractFromFlatFilenames = (
-  allFiles: string[]
-): Omit<SubjectAnalysis, "id_mapping"> | null => {
-  const identifierToFiles: Record<string, string[]> = {};
-
-  for (const filepath of allFiles) {
-    const filename = filepath.split("/").pop()!;
-
-    // Skip trio files
-    if (TRIO_FILENAMES.has(filename.toLowerCase())) continue;
-    // Skip non-data files (PDFs, docs, JSONs that aren't data)
-    if (!DATA_EXTENSIONS.test(filename)) continue;
-    // Remove extension(s): sub-01_ses-left2s_task-FRESHMOTOR_nirs.snirf → sub-01_ses-left2s_task-FRESHMOTOR_nirs
-    const nameNoExt = filename.replace(/(\.[^.]+)+$/, "");
-
-    // Extract base identifier — alphanumeric before first underscore
-    // sub-01_ses-left2s → sub-01
-    // BZZ003_rest → BZZ003
-    // VHMCT1mm-Hip → VHMCT1mm-Hip (no underscore, take full name)
-    const match = nameNoExt.match(/^([A-Za-z0-9\-]+)/);
-    if (match) {
-      const identifier = match[1];
-      if (!identifierToFiles[identifier]) identifierToFiles[identifier] = [];
-      identifierToFiles[identifier].push(filepath);
-    }
-  }
-
-  if (Object.keys(identifierToFiles).length === 0) return null;
-
-  // Sort by extracted numeric ID if possible (mirrors sort_key in planner.py)
-  const sortedIdentifiers = Object.keys(identifierToFiles).sort((a, b) => {
-    const na = extractNumericIdFromIdentifier(a);
-    const nb = extractNumericIdFromIdentifier(b);
-    if (na && nb) return parseInt(na) - parseInt(nb);
-    return a.localeCompare(b);
-  });
-
-  const subjectRecords: SubjectRecord[] = sortedIdentifiers.map(
-    (identifier, i) => ({
-      original_id: identifier,
-      numeric_id: String(i + 1),
-      site: null,
-      pattern_name: "filename_identifier",
-      file_count: identifierToFiles[identifier].length,
-    })
-  );
-
-  return {
-    success: true,
-    method: "flat_filename_identifiers",
-    subject_records: subjectRecords,
-    subject_count: subjectRecords.length,
-    has_site_info: false,
-    variants_by_subject: {},
-    python_generated_filename_rules: [],
-  };
-};
-
-export const extractSubjectAnalysis = (
-  allFiles: string[],
-  userNSubjects?: number | null,
-  dominantPrefixes?: { prefix: string; count: number; percentage: number }[]
-): SubjectAnalysis => {
-  // Step 1: directory structure
-  let subjectInfo = extractFromDirectoryStructure(allFiles);
-
-  // Step 2: flat filename fallback
-  if (!subjectInfo || subjectInfo.subject_records.length === 0) {
-    subjectInfo = extractFromFlatFilenames(allFiles);
-  }
-
-  if (!subjectInfo || subjectInfo.subject_records.length === 0) {
-    return {
-      success: false,
-      method: "none",
-      subject_records: [],
-      subject_count: 0,
-      has_site_info: false,
-      variants_by_subject: {},
-      python_generated_filename_rules: [],
-      id_mapping: {
-        id_mapping: {},
-        reverse_mapping: {},
-        strategy_used: "none",
-        metadata_columns: [],
-      },
-    };
-  }
-
-  // ── CRITICAL validation: mirrors planner.py lines 190-215
-  // If extracted count doesn't match user hint but dominant prefixes do,
-  // fall back to dominant prefixes (handles VHM/VHF body-part over-extraction)
-  const pythonCount = subjectInfo.subject_count;
-  if (
-    userNSubjects &&
-    pythonCount !== userNSubjects &&
-    dominantPrefixes &&
-    dominantPrefixes.length === userNSubjects
-  ) {
-    subjectInfo = {
-      success: true,
-      method: "dominant_prefix_fallback",
-      subject_records: dominantPrefixes.map((p, i) => ({
-        original_id: p.prefix,
-        numeric_id: String(i + 1),
-        site: null,
-        pattern_name: "dominant_prefix",
-        file_count: p.count,
-      })),
-      subject_count: dominantPrefixes.length,
-      has_site_info: false,
-      variants_by_subject: {},
-      python_generated_filename_rules: [],
-    };
-  }
-  // bug fix for subject mapping
-  // === original
-  // const idMapping = generateIdMapping(subjectInfo);
-  // return { ...subjectInfo, id_mapping: idMapping };
-  // ==== end
-  // ==== updates
-  // CRITICAL: n_subjects is authoritative (mirrors planner.py PROMPT_BIDS_PLAN)
-  // If analysis count doesn't match user input, fall back to sequential numbering
-  const expectedCount = userNSubjects;
-  if (expectedCount && subjectInfo.subject_count !== expectedCount) {
-    const idMap: Record<string, string> = {};
-    const reverseMap: Record<string, string> = {};
-    for (let i = 1; i <= expectedCount; i++) {
-      const bidsId = String(i).padStart(2, "0");
-      idMap[`sub-${bidsId}`] = bidsId;
-      reverseMap[bidsId] = `sub-${bidsId}`;
-    }
-    return {
-      ...subjectInfo,
-      subject_count: expectedCount,
-      id_mapping: {
-        id_mapping: idMap,
-        reverse_mapping: reverseMap,
-        strategy_used: "numeric_fallback",
-        metadata_columns: [],
-      },
-    };
-  }
-
-  const idMapping = generateIdMapping(subjectInfo);
-  return { ...subjectInfo, id_mapping: idMapping };
-};
-
-// ── ID mapping — mirrors _generate_subject_id_mapping() in planner.py
-const generateIdMapping = (
-  subjectInfo: Omit<SubjectAnalysis, "id_mapping">
-): SubjectAnalysis["id_mapping"] => {
-  const records = subjectInfo.subject_records;
-  const idMapping: Record<string, string> = {};
-  const reverseMapping: Record<string, string> = {};
-
-  // Detect already-BIDS format (sub-01, sub-02...)
-  const allAlreadyBids = records.every((r) => /^sub-\w+$/i.test(r.original_id));
-
-  if (allAlreadyBids) {
-    for (const rec of records) {
-      const bidsId = rec.original_id.replace(/^sub-/i, "");
-      idMapping[rec.original_id] = bidsId;
-      reverseMapping[bidsId] = rec.original_id;
-    }
-    return {
-      id_mapping: idMapping,
-      reverse_mapping: reverseMapping,
-      strategy_used: "already_bids",
-      metadata_columns: [],
-    };
-  }
-
-  // Numeric strategy: try to extract trailing numbers first
-  // BZZ003 → "003", patient021 → "021" (mirrors _extract_numeric_id_from_identifier)
-  const extractedNumbers: Record<string, string> = {};
-  for (const rec of records) {
-    const nums = rec.original_id.match(/\d+/g);
-    if (nums) extractedNumbers[rec.original_id] = nums[nums.length - 1];
-  }
-
-  const numericValues = Object.values(extractedNumbers);
-  const allUnique = new Set(numericValues).size === numericValues.length;
-
-  if (Object.keys(extractedNumbers).length === records.length && allUnique) {
-    // Use extracted numeric IDs (preserving leading zeros)
-    for (const rec of records) {
-      const bidsId = extractedNumbers[rec.original_id];
-      idMapping[rec.original_id] = bidsId;
-      reverseMapping[bidsId] = rec.original_id;
-    }
-  } else {
-    // Fall back to sequential numbering
-    for (let i = 0; i < records.length; i++) {
-      const orig = records[i].original_id;
-      const bidsId = String(i + 1);
-      idMapping[orig] = bidsId;
-      reverseMapping[bidsId] = orig;
-    }
-  }
-
-  return {
-    id_mapping: idMapping,
-    reverse_mapping: reverseMapping,
-    strategy_used: "numeric",
-    metadata_columns: ["original_id"],
-  };
-};
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/llm.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/llm.ts
new file mode 100644
index 0000000..4e17c34
--- /dev/null
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/llm.ts
@@ -0,0 +1,963 @@
+// src/components/DatasetOrganizer/utils/llm.ts
+//
+// Mirrors autobidsify/llm.py
+// Unified LLM caller supporting OpenAI, Qwen (Ollama), Anthropic, Groq, OpenRouter.
+//
+// Python equivalents:
+//   LLMHardFail                  → LLMHardFail class
+//   isQwenModel()                → is_qwen_model()
+//   isOpenAIModel()              → is_openai_model()
+//   isReasoningModel()           → is_reasoning_model()
+//   inferQwenTemperature()       → _infer_qwen_temperature()
+//   callLLM()                    → _call_llm()
+//   PROMPT_TRIO_DATASET_DESC     → PROMPT_TRIO_DATASET_DESC
+//   PROMPT_TRIO_README           → PROMPT_TRIO_README
+//   PROMPT_TRIO_PARTICIPANTS     → PROMPT_TRIO_PARTICIPANTS
+//   PROMPT_BIDS_PLAN             → PROMPT_BIDS_PLAN
+//   PROMPT_MAT_SNIRF_MAPPING     → PROMPT_MAT_SNIRF_MAPPING
+//   llmTrioDatasetDescription()  → llm_trio_dataset_description()
+//   llmTrioReadme()              → llm_trio_readme()
+//   llmTrioParticipants()        → llm_trio_participants()
+//   llmBidsPlan()                → llm_bids_plan()
+//   llmMapMatToSnirf()           → llm_map_mat_to_snirf()
+//
+// DIFFERENCES FROM llm.py — see bottom of file for explanation
+import { OllamaService } from "services/ollama.service";
+
+// ============================================================================
+// LLMHardFail
+// Mirrors LLMHardFail exception class in llm.py
+// ============================================================================
+
+export class LLMHardFail extends Error {
+  step: string;
+  error_type: string;
+  message: string;
+
+  constructor(step: string, errorType: string, message: string) {
+    super(`[${step}] ${errorType}: ${message}`);
+    this.step = step;
+    this.error_type = errorType;
+    this.message = message;
+  }
+}
+
+// ============================================================================
+// Provider detection
+// Mirrors is_qwen_model(), is_openai_model(), is_reasoning_model()
+// ============================================================================
+
+export const isQwenModel = (model: string): boolean =>
+  model.toLowerCase().startsWith("qwen");
+
+export const isOpenAIModel = (model: string): boolean =>
+  model.toLowerCase().startsWith("gpt") ||
+  model.toLowerCase().startsWith("o1") ||
+  model.toLowerCase().startsWith("o3");
+
+export const isReasoningModel = (model: string): boolean => {
+  const m = model.toLowerCase();
+  return m.startsWith("o1") || m.startsWith("o3") || m.startsWith("gpt-5");
+};
+
+// TS addition: Anthropic is a separate provider not in Python
+// Python has OpenAI + Qwen only. TS adds Anthropic, Groq, OpenRouter.
+export const isAnthropicModel = (model: string): boolean =>
+  model.toLowerCase().startsWith("claude");
+
+// ============================================================================
+// Temperature inference for Qwen
+// Mirrors _infer_qwen_temperature() in llm.py
+// ============================================================================
+
+export const inferQwenTemperature = (
+  model: string,
+  baseTemperature: number | null
+): number | null => {
+  if (baseTemperature === null) return null;
+
+  const m = model.toLowerCase();
+
+  if (["think", "careful", "compare", "reason"].some((kw) => m.includes(kw)))
+    return Math.min(baseTemperature, 0.15);
+
+  if (["next", "fast", "turbo", "lite"].some((kw) => m.includes(kw)))
+    return Math.max(baseTemperature, 0.4);
+
+  return Math.max(baseTemperature, 0.3);
+};
+
+// ============================================================================
+// LLM config type
+// TS-only: Python uses env vars + CLI args; TS gets config from UI state
+// ============================================================================
+
+export interface LLMConfig {
+  provider: string;
+  model: string;
+  apiKey: string;
+  baseUrl: string;
+  isAnthropic?: boolean;
+  noApiKey?: boolean;
+}
+
+// ============================================================================
+// callLLM()
+// Mirrors _call_llm() unified entry point in llm.py
+//
+// Python routing:
+//   qwen*        → _call_qwen()    (Ollama / REST API / DashScope)
+//   gpt* o1* o3* → _call_openai() (OpenAI API)
+//
+// TS routing adds Anthropic, Groq, OpenRouter (not in Python)
+// because NeuroJSON.io supports more providers than the CLI does.
+// ============================================================================
+
+export const callLLM = async (
+  systemPrompt: string,
+  userPayload: string,
+  step: string,
+  llmConfig: LLMConfig,
+  temperature: number | null = null,
+  signal?: AbortSignal
+): Promise<string> => {
+  const { provider, model, apiKey, baseUrl, isAnthropic, noApiKey } = llmConfig;
+
+  // ── Qwen via Ollama proxy ─────────────────────────────────────────
+  // Mirrors _call_qwen() → _call_qwen_ollama() / _call_qwen_rest_api()
+  // Python supports local Ollama + REST API + DashScope.
+  // TS only supports the REST API proxy (OllamaService routes to jin.neu.edu:11434).
+  if (provider === "ollama" || isQwenModel(model)) {
+    const temp = inferQwenTemperature(model, temperature);
+    try {
+      const res = await OllamaService.chat(
+        model,
+        [
+          { role: "system", content: systemPrompt },
+          { role: "user", content: userPayload },
+        ],
+        temp ?? undefined
+      );
+      const content = res?.choices?.[0]?.message?.content ?? "";
+      if (content.trim()) return content.trim();
+      throw new LLMHardFail(
+        step,
+        "EmptyResponse",
+        "Ollama returned empty content"
+      );
+    } catch (e) {
+      if (e instanceof LLMHardFail) throw e;
+      const msg = String(e).toLowerCase();
+      if (msg.includes("connection") || msg.includes("refused"))
+        throw new LLMHardFail(
+          step,
+          "OllamaNotRunning",
+          "Cannot connect to Ollama proxy"
+        );
+      throw new LLMHardFail(step, "QwenError", String(e));
+    }
+  }
+
+  // ── Anthropic Claude ──────────────────────────────────────────────
+  // TS addition — not in Python llm.py
+  if (isAnthropic || isAnthropicModel(model)) {
+    try {
+      const res = await fetch(baseUrl, {
+        method: "POST",
+        signal,
+        headers: {
+          "Content-Type": "application/json",
+          "x-api-key": apiKey,
+          "anthropic-version": "2023-06-01",
+        },
+        body: JSON.stringify({
+          model,
+          max_tokens: isReasoningModel(model) ? 32000 : 16000,
+          messages: [
+            { role: "user", content: `${systemPrompt}\n\n${userPayload}` },
+          ],
+        }),
+      });
+      const data = await res.json();
+      if (!res.ok)
+        throw new LLMHardFail(
+          step,
+          "AnthropicError",
+          data?.error?.message ?? res.statusText
+        );
+      const content = data?.content?.[0]?.text ?? "";
+      if (content.trim()) return content.trim();
+      throw new LLMHardFail(
+        step,
+        "EmptyResponse",
+        "Anthropic returned empty content"
+      );
+    } catch (e) {
+      if (e instanceof LLMHardFail) throw e;
+      throw new LLMHardFail(step, "AnthropicError", String(e));
+    }
+  }
+
+  // ── OpenAI-compatible (OpenAI, Groq, OpenRouter) ──────────────────
+  // Mirrors _call_openai() in llm.py.
+  // Groq and OpenRouter use the same OpenAI-compatible API format.
+  try {
+    const params: Record<string, any> = {
+      model,
+      messages: [
+        { role: "system", content: systemPrompt },
+        { role: "user", content: userPayload },
+      ],
+    };
+
+    if (isReasoningModel(model)) {
+      // Mirrors: params["max_completion_tokens"] = 32000 (no temperature)
+      params.max_completion_tokens = 32000;
+    } else {
+      // Mirrors: params["max_completion_tokens"] = 16000 + temperature
+      params.max_completion_tokens = 16000;
+      if (temperature !== null) params.temperature = temperature;
+    }
+
+    const res = await fetch(baseUrl, {
+      method: "POST",
+      signal,
+      headers: {
+        "Content-Type": "application/json",
+        ...(noApiKey ? {} : { Authorization: `Bearer ${apiKey}` }),
+      },
+      body: JSON.stringify(params),
+    });
+    const data = await res.json();
+    if (!res.ok)
+      throw new LLMHardFail(
+        step,
+        "OpenAIError",
+        data?.error?.message ?? res.statusText
+      );
+    const content = data?.choices?.[0]?.message?.content ?? "";
+    if (content.trim()) return content.trim();
+    throw new LLMHardFail(
+      step,
+      "EmptyResponse",
+      "OpenAI returned empty content"
+    );
+  } catch (e) {
+    if (e instanceof LLMHardFail) throw e;
+    throw new LLMHardFail(step, "UnexpectedError", String(e));
+  }
+};
+
+// ============================================================================
+// Prompts
+// Mirrors all PROMPT_* constants in llm.py
+// These are the EXACT strings from llm.py — no changes.
+// ============================================================================
+
+export const PROMPT_TRIO_DATASET_DESC = `You are a BIDS dataset_description.json metadata extractor.
+
+═══════════════════════════════════════════════════════
+YOUR JOB
+═══════════════════════════════════════════════════════
+
+Extract dataset metadata from the input. Return ONLY valid JSON, no markdown.
+
+═══════════════════════════════════════════════════════
+CRITICAL RULES
+═══════════════════════════════════════════════════════
+
+1. LICENSE — output as "raw_license" (plain string, NOT normalized):
+   - Copy exactly what the user wrote, e.g. "CC0", "CC BY 4.0",
+     "Creative Commons Zero", "public domain", "MIT license"
+   - Do NOT try to normalize or format it — Python will do that
+   - If the user wrote "License: CC0" → raw_license: "CC0"
+   - If the document says "released under Creative Commons" → raw_license: "Creative Commons"
+   - If no license mentioned anywhere → omit raw_license
+
+2. AUTHORS — extract from ALL available sources:
+   - Search in order: user_hints.user_text → documents[]
+   - Look for: explicit author lists, citation patterns, "Created by",
+     "Principal Investigator", "Contact", "Contributors" sections
+   - If full names are available, use them: ["Last FM", "Last FM"]
+   - If only "et al." citation exists, keep first author + et al.: ["Shafto MA et al."]
+   - Do NOT infer, guess, or use outside knowledge to expand author lists
+   - Do NOT fabricate names not present in any input source
+   - If no author information found anywhere, omit Authors field entirely
+
+   EXAMPLES (follow exactly):
+
+   Input: "Smith et al. (2023). A neuroimaging study..."
+   Output: "Authors": ["Smith et al."]
+
+   Input: "Created by John Doe, Jane Smith and Bob Lee"
+   Output: "Authors": ["John Doe", "Jane Smith", "Bob Lee"]
+
+   Input: "Data collected by the CamCAN team. Contact: info@cam.ac.uk"
+   Output: (omit Authors field)
+
+   Input: "Shafto et al. (2014). The Cambridge Centre for Ageing..."
+   Output: "Authors": ["Shafto et al."]
+
+3. NAME — infer from context:
+   - Look for explicit dataset name in user_hints.user_text
+   - If not found, infer from the scientific context
+   - Keep it short and descriptive
+
+4. MISSING FIELDS — omit rather than guess:
+   - If you cannot determine a field with reasonable confidence, omit it
+   - Never invent information not present in the input
+
+═══════════════════════════════════════════════════════
+OUTPUT FORMAT
+═══════════════════════════════════════════════════════
+
+{
+  "dataset_description": {
+    "Name": "...",
+    "BIDSVersion": "1.10.0",
+    "DatasetType": "raw",
+    "Authors": ["First Last", "First Last"]
+  },
+  "raw_license": "CC0",
+  "extraction_log": {
+    "Name": "inferred from user_text: '...'",
+    "raw_license": "found in user_text: 'License: CC0'",
+    "Authors": "extracted from citation in user_text"
+  },
+  "questions": []
+}
+
+Notes:
+- raw_license goes at the TOP LEVEL (not inside dataset_description)
+- dataset_description should NOT contain a "License" field — Python adds it after normalization
+- BIDSVersion must always be "1.10.0"
+- DatasetType must always be "raw"
+- Output ONLY valid JSON, no extra text, no markdown fences
+
+FIELD SOURCE RULES (STRICT - violations cause data integrity failure):
+┌─────────────────┬────────────────────────────────────────────────────┐
+│ Field           │ Allowed sources                                    │
+├─────────────────┼────────────────────────────────────────────────────┤
+│ Authors         │ user_hints.user_text or documents[] ONLY           │
+│                 │ NEVER use training knowledge to expand et al.      │
+│ raw_license     │ user_hints.user_text or documents[] ONLY           │
+│ Name            │ may infer from context if not explicit             │
+│ BIDSVersion     │ always "1.10.0" (fixed)                            │
+│ DatasetType     │ always "raw" (fixed)                               │
+└─────────────────┴────────────────────────────────────────────────────┘`;
+
+export const PROMPT_TRIO_README = `Generate README.md for BIDS dataset.
+
+CRITICAL: Use user_hints.user_text as primary source for README content.
+
+Create comprehensive README with sections:
+- Overview
+- Dataset Description
+- Data Acquisition
+- File Organization
+- Usage Notes
+- References
+
+Output: Direct Markdown text (no JSON wrapper)`;
+
+export const PROMPT_BIDS_PLAN = `You are a BIDS dataset architect with complete decision-making authority.
+
+═══════════════════════════════════════════════════════════════════════
+SUPPORTED FORMATS AND CONVERSION RULES
+═══════════════════════════════════════════════════════════════════════
+
+MRI FORMATS (modality: mri):
+  • DICOM (.dcm)           → convert_to: nifti   (dcm2niix)
+  • NIfTI (.nii, .nii.gz)  → format_ready: true  (copy directly)
+  • JNIfTI (.jnii, .bnii)  → convert_to: nifti
+
+fNIRS FORMATS (modality: nirs):
+  • SNIRF (.snirf)         → format_ready: true  (copy directly)
+  • Homer3 (.nirs)         → convert_to: snirf
+  • MATLAB (.mat)          → convert_to: snirf
+
+═══════════════════════════════════════════════════════════════════════
+SUBJECT IDENTIFICATION — MOST IMPORTANT STEP
+═══════════════════════════════════════════════════════════════════════
+
+Your first job is to correctly identify all subjects from the file list.
+The dataset may use ANY of the following structures:
+
+STRUCTURE 1 — Already BIDS (sub-XX directories)
+  sub-01/nirs/sub-01_task-rest_nirs.snirf
+  sub-02/nirs/sub-02_task-rest_nirs.snirf
+  → Use 'already_bids' strategy. Strip 'sub-' prefix.
+
+STRUCTURE 2 — Site-prefixed directories
+  Beijing_sub82352/anat/scan.nii.gz
+  Newark_sub41006/anat/scan.nii.gz
+  → Use directory names as subject identifiers.
+
+STRUCTURE 3 — Flat files with numeric suffix
+  VHMCT1mm-Hip (134).dcm  (prefix VHM = subject 1)
+  VHFCT1mm-Hip (45).dcm   (prefix VHF = subject 2)
+  → Use filename prefix as subject identifier.
+
+STRUCTURE 4 — Group/subject nested directories
+  PD/PD_01.snirf
+  PD/PD_02.snirf
+  control/control_01.snirf
+  control/control_20.snirf
+  → Each unique filename base (PD_01, PD_02 ... control_01 ... control_20)
+    is ONE subject. The parent directory (PD / control) is the GROUP,
+    not the subject. Add 'group' column to participant_metadata.
+  → Assign numeric IDs: PD_01→1, PD_02→2 ... control_01→21 ... control_20→40
+
+STRUCTURE 5 — Task/group/subject nested directories
+  walking/PD/PD_01.snirf
+  walking/control/control_01.snirf
+  → Same as Structure 4. Ignore the task-level directory when identifying subjects.
+    The task name goes into the BIDS filename (task-walking), not the subject ID.
+
+STRUCTURE 6 — Pure numeric directories
+  001/scan.dcm
+  002/scan.dcm
+  → Use directory number as subject ID.
+
+CRITICAL RULES FOR SUBJECT COUNTING:
+1. python_subject_analysis.subject_count is a HINT, not authoritative.
+2. user_hints.n_subjects is the AUTHORITATIVE count.
+   If provided, your assignment_rules MUST produce exactly that many subjects.
+3. Count the actual unique files/directories to determine the true number.
+4. For group/subject nested structures: count UNIQUE FILES, not directories.
+   (PD/ and control/ are 2 directories but may contain 40 subjects total)
+
+═══════════════════════════════════════════════════════════════════════
+GROUP METADATA
+═══════════════════════════════════════════════════════════════════════
+
+When the dataset has clinically meaningful groups (PD vs control,
+patient vs healthy, treated vs untreated):
+- Add a 'group' column to participant_metadata for EVERY subject.
+- Use the exact group label from the directory or filename.
+
+Example for PD dataset with 40 subjects:
+  participant_metadata:
+    '1':  {original_id: 'PD_01',      group: 'PD'}
+    '2':  {original_id: 'PD_02',      group: 'PD'}
+    ...
+    '21': {original_id: 'control_01', group: 'control'}
+    ...
+    '40': {original_id: 'control_20', group: 'control'}
+
+═══════════════════════════════════════════════════════════════════════
+ASSIGNMENT RULES
+═══════════════════════════════════════════════════════════════════════
+
+Each rule maps source files to one BIDS subject ID.
+
+CRITICAL: 'subject' field must be BARE ID — no 'sub-' prefix.
+  ✓ subject: '1'      → executor creates sub-1
+  ✗ subject: 'sub-1'  → executor creates sub-sub-1
+
+For group/subject nested structures, use the filename as the match token:
+  assignment_rules:
+    - subject: '1'
+      original: 'PD_01'
+      match: ['*PD_01*']
+    - subject: '21'
+      original: 'control_01'
+      match: ['*control_01*']
+
+For prefix-based flat structures:
+  assignment_rules:
+    - subject: '1'
+      original: 'VHM'
+      match: ['*VHM*']
+    - subject: '2'
+      original: 'VHF'
+      match: ['*VHF*']
+
+═══════════════════════════════════════════════════════════════════════
+FORMAT_READY AND CONVERT_TO RULES
+═══════════════════════════════════════════════════════════════════════
+
+format_ready: true  → .nii/.nii.gz (MRI) or .snirf (fNIRS) — copy directly
+format_ready: false → needs conversion:
+  .dcm / .jnii / .bnii → convert_to: nifti
+  .mat / .nirs         → convert_to: snirf
+convert_to: "none"   → only when format_ready: true
+
+═══════════════════════════════════════════════════════════════════════
+FILENAME RULES — TASK INFERENCE
+═══════════════════════════════════════════════════════════════════════
+
+For fNIRS: infer task name from directory structure or user description.
+  walking/ directory → task-walking
+  fingertapping/ or tapping/ → task-fingertapping
+  resting/ or rest/ → task-rest
+
+For MRI: use acq- to distinguish different scan series from same subject.
+  VHFCT1mm-Ankle.dcm → acq-ankle_T1w
+  VHFCT1mm-Head.dcm  → acq-head_T1w
+
+═══════════════════════════════════════════════════════════════════════
+OUTPUT FORMAT
+═══════════════════════════════════════════════════════════════════════
+
+subjects:
+  labels: [list of bare BIDS IDs, e.g. ['1','2',...,'40']]
+  count: N
+  source: llm_analysis
+  id_strategy: numeric / semantic / already_bids
+
+assignment_rules:
+  - subject: 'bare_id'
+    original: 'exact_identifier_from_filename_or_dirname'
+    match: ['*identifier*']
+
+participant_metadata:
+  'bare_id':
+    original_id: 'xxx'
+    group: 'PD'          # if applicable
+    sex: 'M'             # if available
+    age: '65'            # if available
+
+mappings:
+  - modality: nirs
+    match: ['**/*.snirf']
+    exclude: []
+    format_ready: true
+    convert_to: none
+    filename_rules:
+      - match_pattern: '.*'
+        bids_template: 'sub-X_task-walking_nirs.snirf'
+
+OUTPUT: Raw YAML only (no markdown, no explanation)`;
+
+export const PROMPT_MAT_SNIRF_MAPPING = `You are an fNIRS data format expert.
+
+You will receive a JSON summary of one or more MATLAB .mat files from the
+same structural group. The summary contains a "flat_vars" dict where all
+scipy struct wrappers have already been unwrapped — what you see reflects
+the actual data shape and content.
+
+flat_vars key conventions:
+- Top-level variable:     "d", "t", "fs"
+- Struct field:           "dat.signal", "SD.Lambda", "dat.fs"
+- "likely_data": true     marks tall 2D float arrays (n_samples > n_channels)
+- "value"                 means scalar
+- "values"                means small array with known content
+- "string_array" dtype    means channel labels or string metadata
+
+Use flat_vars keys EXACTLY as they appear. Do not invent new paths.
+
+═══════════════════════════════════════════════════════════
+SNIRF REQUIRED FIELDS
+═══════════════════════════════════════════════════════════
+
+dataTimeSeries  — 2D float (n_samples × n_channels)
+time            — 1D float (n_samples,), unit: seconds
+wavelengths     — 1D array of wavelength values in nm
+measurementList — per-channel source/detector/wavelength/dataType indices
+
+═══════════════════════════════════════════════════════════
+DATA ASSEMBLY TYPES
+═══════════════════════════════════════════════════════════
+
+Choose the correct type based on how the data is stored:
+
+TYPE 1 — "single": data is in one variable (most common)
+  Use when: one tall 2D array holds all channels
+  Example: Homer3 "d", or "dat.signal"
+  {
+    "type": "single",
+    "var": "d",
+    "transpose": false
+  }
+  Set transpose: true if shape is (n_channels, n_samples) instead of (n_samples, n_channels)
+  FORBIDDEN: Do NOT use array indexing syntax like "data.values[0]" or "data[0]".
+  The Python executor does not support cell array indexing.
+  Only dot-notation paths are supported: "data.X", "dat.signal", "SD.Lambda".
+
+  CRITICAL — struct variables: if the top-level variable is a MATLAB struct
+  (i.e. flat_vars shows sub-fields like "data.X", "data.fs", "data.trial"),
+  you MUST use the full dot-notation path to the numeric field, NOT the
+  struct variable name itself.
+
+  Example: flat_vars shows:
+    "data.X":     {"shape": [N, C], "likely_data": true}
+    "data.fs":    {"value": 10.0}
+    "data.trial": {"shape": [1, 75]}
+  Correct:   "var": "data.X"     ← full dot-notation path
+  WRONG:     "var": "data"       ← this is the struct, not the data array
+
+  Similarly for time:
+    "data.fs" is a scalar → use as fs_var in time_assembly
+    Correct: {"type": "generate", "fs_var": "data.fs"}
+
+TYPE 2 — "stack_columns": data split across ch1, ch2, ... chN variables
+  Use when: flat_vars contains many variables named ch1, ch2, ch3 ... chN
+  each being a 1D or column vector of the same length
+  {
+    "type": "stack_columns",
+    "var_pattern": "ch",
+    "var_range": [1, 40]
+  }
+  var_pattern: the common prefix (e.g. "ch", "channel", "nirs")
+  var_range: [first_index, last_index] inclusive
+  Use "vars" list instead of var_pattern+var_range if naming is non-numeric:
+  {
+    "type": "stack_columns",
+    "vars": ["left_pfc", "right_pfc", "motor"]
+  }
+
+TYPE 3 — "hbo_hbr": HbO and HbR stored as separate matrices
+  Use when: two 2D arrays named HbO/HbR or oxy/deoxy exist with same shape
+  {
+    "type": "hbo_hbr",
+    "hbo_var": "HbO",
+    "hbr_var": "HbR"
+  }
+  Result: columns are concatenated [HbO | HbR] → (n_samples, n_channels)
+
+═══════════════════════════════════════════════════════════
+TIME ASSEMBLY TYPES
+═══════════════════════════════════════════════════════════
+
+TYPE 1 — "var": time vector exists as a variable
+  {
+    "type": "var",
+    "var": "t"
+  }
+
+TYPE 2 — "generate": no time variable, generate from sampling rate
+  Prefer fs_var (read from file) over fs_value (hardcoded)
+  {
+    "type": "generate",
+    "fs_var": "dat.fs",
+    "fs_value": 13.33
+  }
+  If neither fs_var nor fs_value is known, set fs_value to null
+  (executor will default to 10.0 Hz)
+
+═══════════════════════════════════════════════════════════
+WAVELENGTHS ASSEMBLY TYPES
+═══════════════════════════════════════════════════════════
+
+TYPE 1 — "var": wavelengths stored in a variable
+  {
+    "type": "var",
+    "var": "SD.Lambda"
+  }
+
+TYPE 2 — "value": hardcode the values
+  Use when no wavelength variable found, or data is already concentration (HbO/HbR)
+  {
+    "type": "value",
+    "values": [760, 850]
+  }
+
+═══════════════════════════════════════════════════════════
+OTHER FIELDS
+═══════════════════════════════════════════════════════════
+
+measlist_var:
+  2D array shape (n_channels, 4), cols = [srcIdx, detIdx, aux, dataTypeCode]
+  Common: "SD.MeasList"
+  null if not found
+
+n_sources_var:
+  dot-notation path to a scalar variable whose value is the number of sources (optodes).
+  Look in flat_vars for a key whose:
+    - value is a small integer (typically 2–64)
+    - name semantically suggests source count: contains "nSrc", "nSource", "source",
+      "Src", "nS" or similar
+  Use the EXACT key as it appears in flat_vars. Do NOT invent paths.
+  null if no such variable found.
+
+n_detectors_var:
+  dot-notation path to a scalar variable whose value is the number of detectors (optodes).
+  Look in flat_vars for a key whose:
+    - value is a small integer (typically 2–64)
+    - name semantically suggests detector count: contains "nDet", "nDetector",
+      "detector", "Det", "nD" or similar
+  Use the EXACT key as it appears in flat_vars. Do NOT invent paths.
+  null if no such variable found.
+
+data_type_code:
+  1 = raw intensity (default)
+  2 = dOD (optical density change)
+  4 = HbO/HbR concentration
+  Set to 4 if data_assembly type is "hbo_hbr" or var names suggest concentration
+
+confidence: "high" | "medium" | "low"
+
+═══════════════════════════════════════════════════════
+DECISION GUIDE
+═══════════════════════════════════════════════════════
+
+Step 0 — Detect multi-block structure:
+  Use "top_level_shapes" (NOT flat_vars) to detect multi-block structures.
+  top_level_shapes shows the RAW shape of each variable BEFORE any unwrapping,
+  which is the only reliable way to see that e.g. "data" is a (1,4) cell array.
+
+  Detection rule — ALL three conditions must be true:
+    1. top_level_shapes[key].is_object == true
+    2. top_level_shapes[key].shape == [1, N] with N > 1
+    3. flat_vars contains sub-fields of that key (e.g. "data.X", "data.fs")
+       meaning each element of the cell array is a struct with data fields
+
+  If all three conditions are met:
+    → n_blocks = N  (the second dimension of the shape)
+    → block_data_field = the sub-field name holding the signal matrix
+      (look for the tall 2D array in flat_vars, e.g. "data.X" with likely_data=true)
+    → data_assembly.var = full dot-notation path to signal field in ONE block
+      (e.g. "data.X") — the executor iterates over blocks automatically
+
+  If the top-level variable is a plain 2D float matrix: n_blocks=1.
+  If uncertain: n_blocks=1  (safe default — no data is lost).
+
+  EXAMPLES:
+    top_level_shapes: {"data": {"shape": [1,4], "is_object": true, "is_struct": false}}
+    flat_vars has: "data.X" (likely_data=true), "data.fs" (scalar), "data.trial"
+    → n_blocks=4, block_data_field="X", data_assembly.var="data.X"
+
+    top_level_shapes: {"d": {"shape": [3000, 52], "is_object": false}}
+    → n_blocks=1, standard single-block processing
+
+Step 1 — Identify data_assembly type:
+  - Is there one tall 2D float array?        → "single"
+  - Are there many ch1...chN variables?      → "stack_columns"
+  - Are there HbO and HbR arrays?            → "hbo_hbr"
+
+Step 2 — Identify time_assembly type:
+  - Is there a 1D array matching n_samples?  → "var"
+  - Is there a scalar fs/Fs/srate?           → "generate" with fs_var
+  - Neither?                                 → "generate" with fs_value from notes or null
+
+Step 3 — Identify wavelengths_assembly type:
+  - Is there a small float array 600-1000?   → "var"
+  - No wavelength info found?                → "value" with [760, 850]
+
+Step 4 — Set data_type_code:
+  - Raw NIR intensity data                   → 1
+  - Optical density (log ratio)              → 2
+  - Hemoglobin concentration (HbO/HbR)       → 4
+
+═══════════════════════════════════════════════════════════
+OUTPUT FORMAT — JSON only, no markdown, no explanation
+═══════════════════════════════════════════════════════════
+
+{
+  "data_assembly": {
+    "type": "single",
+    "var": "d",
+    "transpose": false
+  },
+  "time_assembly": {
+    "type": "var",
+    "var": "t"
+  },
+  "wavelengths_assembly": {
+    "type": "var",
+    "var": "SD.Lambda"
+  },
+  "wavelengths_default": [760, 850],
+  "measlist_var": "SD.MeasList",
+  "n_sources_var": null,
+  "n_detectors_var": null,
+  "n_blocks": 1,
+  "block_data_field": null,
+  "data_type_code": 1,
+  "notes": "Homer3 format: standard d/t/SD structure detected",
+  "confidence": "high"
+}
+
+Additional examples:
+
+stack_columns case (ch1...ch40):
+{
+  "data_assembly": {
+    "type": "stack_columns",
+    "var_pattern": "ch",
+    "var_range": [1, 40]
+  },
+  "time_assembly": {
+    "type": "generate",
+    "fs_var": "nfo.fs",
+    "fs_value": 13.33
+  },
+  "wavelengths_assembly": {
+    "type": "value",
+    "values": [760, 850]
+  },
+  "wavelengths_default": [760, 850],
+  "measlist_var": null,
+  "n_sources_var": null,
+  "n_detectors_var": null,
+  "n_blocks": 1,
+  "block_data_field": null,
+  "data_type_code": 4,
+  "notes": "Data split across 40 channel variables ch1-ch40, concentration format",
+  "confidence": "medium"
+}
+
+hbo_hbr case:
+{
+  "data_assembly": {
+    "type": "hbo_hbr",
+    "hbo_var": "HbO",
+    "hbr_var": "HbR"
+  },
+  "time_assembly": {
+    "type": "var",
+    "var": "time"
+  },
+  "wavelengths_assembly": {
+    "type": "value",
+    "values": [760, 850]
+  },
+  "wavelengths_default": [760, 850],
+  "measlist_var": null,
+  "n_sources_var": null,
+  "n_detectors_var": null,
+  "n_blocks": 1,
+  "block_data_field": null,
+  "data_type_code": 4,
+  "notes": "HbO and HbR stored separately, will be concatenated column-wise",
+  "confidence": "high"
+}`;
+
+// ============================================================================
+// Public LLM call wrappers
+// Mirrors llm_trio_dataset_description(), llm_bids_plan(), etc. in llm.py
+// Each function mirrors its Python counterpart including temperature.
+// ============================================================================
+
+export const llmTrioDatasetDescription = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(
+    PROMPT_TRIO_DATASET_DESC,
+    payload,
+    "Trio_DatasetDesc",
+    llmConfig,
+    0.1,
+    signal
+  );
+
+export const llmTrioReadme = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(PROMPT_TRIO_README, payload, "Trio_README", llmConfig, 0.4, signal);
+
+// generate_participants no longer calls the LLM at all — it just generates simple sequential IDs and defers to the plan stage for complex datasets.
+// export const llmTrioParticipants = (
+//   payload: string,
+//   llmConfig: LLMConfig,
+//   signal?: AbortSignal
+// ): Promise<string> =>
+//   callLLM(PROMPT_TRIO_PARTICIPANTS, payload, "Trio_Participants", llmConfig, 0.2, signal);
+
+export const llmBidsPlan = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(PROMPT_BIDS_PLAN, payload, "BIDSPlan", llmConfig, 0.15, signal);
+
+export const llmMapMatToSnirf = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(
+    PROMPT_MAT_SNIRF_MAPPING,
+    payload,
+    "MAT_SNIRF_Mapping",
+    llmConfig,
+    0.05,
+    signal
+  );
+
+// Python stubs themselves are essentially empty instructions,
+// these stages aren't really functional in autobidsify either yet
+// they're just scaffolding for future implementation.
+export const PROMPT_NIRS_DRAFT = `fNIRS-to-SNIRF mapper (Draft).
+  Output JSON (ONLY valid JSON):
+  {
+    "draft": {...},
+    "confidence": 0.8,
+    "questions": [...]
+  }`;
+
+export const PROMPT_NIRS_NORMALIZE = `fNIRS-to-SNIRF mapper (Normalize).
+  Output JSON (ONLY valid JSON):
+  {
+    "normalized": {...},
+    "questions": [...]
+  }`;
+
+export const PROMPT_MRI_VOXEL_DRAFT = `MRI voxelization planner (Draft).
+  Output JSON (ONLY valid JSON):
+  {
+    "volume_candidates": [...],
+    "meta_candidates": {...},
+    "confidence": 0.8
+  }`;
+
+export const PROMPT_MRI_VOXEL_FINAL = `MRI voxelization planner (Final).
+  Output JSON (ONLY valid JSON):
+  {
+    "conversions": [...],
+    "questions": []
+  }`;
+
+export const llmNirsDraft = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(PROMPT_NIRS_DRAFT, payload, "NIRS_Draft", llmConfig, 0.2, signal);
+
+export const llmNirsNormalize = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(
+    PROMPT_NIRS_NORMALIZE,
+    payload,
+    "NIRS_Normalize",
+    llmConfig,
+    0.1,
+    signal
+  );
+
+export const llmMriVoxelDraft = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(
+    PROMPT_MRI_VOXEL_DRAFT,
+    payload,
+    "MRI_Voxel_Draft",
+    llmConfig,
+    0.2,
+    signal
+  );
+
+export const llmMriVoxelFinal = (
+  payload: string,
+  llmConfig: LLMConfig,
+  signal?: AbortSignal
+): Promise<string> =>
+  callLLM(
+    PROMPT_MRI_VOXEL_FINAL,
+    payload,
+    "MRI_Voxel_Final",
+    llmConfig,
+    0.1,
+    signal
+  );
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/llmHelpers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/llmHelpers.ts
index 3f2a6a1..0bbbea2 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/llmHelpers.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/llmHelpers.ts
@@ -6,14 +6,304 @@ import {
   getUserContextText,
 } from "./fileAnalyzers";
 import {
-  extractSubjectAnalysis,
+  analyzeFilenamesForSubjects,
   analyzeTokenStatistics,
 } from "./filenameTokenizer";
 import { FileItem } from "redux/projects/types/projects.interface";
 
-/**
- * Build structured file summary for LLM
- */
+// ============================================================================
+// FileStructureAnalyzer
+// Mirrors universal_core.py FileStructureAnalyzer
+// Works on allFiles: string[] (relative paths) from VFS
+// ============================================================================
+
+const analyzeDirectoryStructure = (allFiles: string[]): Record<string, any> => {
+  const depthCounter: Record<number, number> = {};
+  const uniqueDirs = new Set<string>();
+  const levelDirs: Record<number, Set<string>> = {};
+
+  for (const filepath of allFiles) {
+    const parts = filepath.split("/");
+    const depth = parts.length - 1;
+    depthCounter[depth] = (depthCounter[depth] || 0) + 1;
+
+    for (let level = 0; level < parts.length - 1; level++) {
+      uniqueDirs.add(parts[level]);
+      if (!levelDirs[level]) levelDirs[level] = new Set();
+      levelDirs[level].add(parts[level]);
+    }
+  }
+
+  // Infer structure template — mirrors _infer_structure_template()
+  const firstLevel = levelDirs[0] ? [...levelDirs[0]].slice(0, 10) : [];
+  const hasSubKeyword = firstLevel.some((d) => d.toLowerCase().includes("sub"));
+  const nLevels = Object.keys(levelDirs).length;
+
+  let template = "flat";
+  if (hasSubKeyword) {
+    if (nLevels === 1) template = "{subject}";
+    else if (nLevels === 2) template = "{subject}/{scantype}";
+    else if (nLevels === 3) template = "{subject}/{scantype}/{format}";
+    else template = "{subject}/nested";
+  } else if (nLevels > 0) {
+    template = `custom_${nLevels}_levels`;
+  }
+
+  return {
+    max_depth: Math.max(0, ...Object.keys(depthCounter).map(Number)),
+    depth_distribution: depthCounter,
+    unique_dir_names: [...uniqueDirs].sort().slice(0, 100),
+    dir_level_patterns: Object.fromEntries(
+      Object.entries(levelDirs).map(([k, v]) => [k, [...v].sort().slice(0, 20)])
+    ),
+    total_unique_dirs: uniqueDirs.size,
+    structure_template: template,
+  };
+};
+
+const detectSubjectIdentifiers = (
+  allFiles: string[],
+  userHint: number | null
+): Record<string, any> => {
+  const firstLevelDirs = new Set<string>();
+  for (const filepath of allFiles) {
+    const parts = filepath.split("/");
+    if (parts.length > 1) firstLevelDirs.add(parts[0]);
+  }
+
+  const candidates: any[] = [];
+  const totalFiles = allFiles.length;
+
+  // Pattern 1: Site_subID (e.g. Beijing_sub82352)
+  const p1Matches: Record<string, any> = {};
+  for (const dir of firstLevelDirs) {
+    const m = dir.match(/^([A-Za-z]+)_sub(\d+)$/i);
+    if (m) p1Matches[m[2]] = { site: m[1], original: dir };
+  }
+  if (Object.keys(p1Matches).length > 0) {
+    candidates.push({
+      type: "directory_pattern",
+      pattern_name: "site_sub_id",
+      pattern_display: "{site}_sub{id}",
+      extraction_regex: `([A-Za-z]+)_sub(\\d+)`,
+      subject_group: 2,
+      site_group: 1,
+      count: Object.keys(p1Matches).length,
+      sample_ids: Object.keys(p1Matches).sort().slice(0, 10),
+      metadata: { has_site: true },
+      avg_files_per_subject:
+        Object.keys(p1Matches).length > 0
+          ? totalFiles / Object.keys(p1Matches).length
+          : 0,
+    });
+  }
+
+  // Pattern 2: sub-ID or subID (BIDS standard)
+  const p2Matches = new Set<string>();
+  for (const dir of firstLevelDirs) {
+    const m = dir.match(/^sub-?(\w+)$/i);
+    if (m) p2Matches.add(m[1]);
+  }
+  if (p2Matches.size > 0) {
+    candidates.push({
+      type: "directory_pattern",
+      pattern_name: "bids_standard",
+      pattern_display: "sub-{id}",
+      extraction_regex: `sub-?(\\w+)`,
+      subject_group: 1,
+      site_group: null,
+      count: p2Matches.size,
+      sample_ids: [...p2Matches].sort().slice(0, 10),
+      metadata: { has_site: false },
+      avg_files_per_subject:
+        p2Matches.size > 0 ? totalFiles / p2Matches.size : 0,
+    });
+  }
+
+  // Pattern 3: Numeric directories (e.g. 001, 025)
+  const p3Matches = new Set<string>();
+  for (const dir of firstLevelDirs) {
+    if (/^\d{2,6}$/.test(dir)) p3Matches.add(dir);
+  }
+  if (p3Matches.size > 0) {
+    candidates.push({
+      type: "directory_pattern",
+      pattern_name: "numeric_only",
+      pattern_display: "{id}",
+      extraction_regex: `^(\\d+)$`,
+      subject_group: 1,
+      site_group: null,
+      count: p3Matches.size,
+      sample_ids: [...p3Matches].sort().slice(0, 10),
+      metadata: { numeric_only: true },
+      avg_files_per_subject:
+        p3Matches.size > 0 ? totalFiles / p3Matches.size : 0,
+    });
+  }
+
+  // Pattern 4: patient_ID or subject_ID in filenames
+  const p4Matches = new Set<string>();
+  for (const filepath of allFiles) {
+    const filename = filepath.split("/").pop()!;
+    const m = filename.match(/(?:patient|subject)[_-]?(\d+)/i);
+    if (m) p4Matches.add(m[1]);
+  }
+  if (p4Matches.size > 0) {
+    candidates.push({
+      type: "filename_pattern",
+      pattern_name: "patient_or_subject_id",
+      pattern_display: "{prefix}_{id}",
+      extraction_regex: `(?:patient|subject)[_-]?(\\d+)`,
+      subject_group: 1,
+      site_group: null,
+      count: p4Matches.size,
+      sample_ids: [...p4Matches].sort().slice(0, 10),
+      metadata: {},
+      avg_files_per_subject:
+        p4Matches.size > 0 ? totalFiles / p4Matches.size : 0,
+    });
+  }
+
+  // Pattern 5: Alphanumeric IDs (PD01, Control01, HC03)
+  const p5Matches = new Set<string>();
+  for (const dir of firstLevelDirs) {
+    if (/^[A-Za-z]+\d+$/.test(dir)) p5Matches.add(dir);
+  }
+  if (p5Matches.size > 0) {
+    candidates.push({
+      type: "directory_pattern",
+      pattern_name: "alphanum_id",
+      pattern_display: "{prefix}{id}",
+      extraction_regex: `^([A-Za-z]+)(\\d+)$`,
+      subject_group: 2,
+      site_group: null,
+      count: p5Matches.size,
+      sample_ids: [...p5Matches].sort().slice(0, 10),
+      metadata: {},
+      avg_files_per_subject:
+        p5Matches.size > 0 ? totalFiles / p5Matches.size : 0,
+    });
+  }
+
+  if (candidates.length === 0) {
+    return {
+      best_candidate: null,
+      confidence: "none",
+      candidates: [],
+      total_candidates_evaluated: 0,
+    };
+  }
+
+  // Score candidates — mirrors _score_identifier_candidate()
+  for (const c of candidates) {
+    let score = 0;
+    const count = c.count;
+
+    if (userHint) {
+      if (count === userHint) score += 50;
+      else if (Math.abs(count - userHint) <= 2) score += 30;
+      else if (Math.abs(count - userHint) <= 5) score += 10;
+    }
+
+    const avg = c.avg_files_per_subject;
+    if (avg >= 5) score += 20;
+    else if (avg >= 2) score += 15;
+    else if (avg >= 1) score += 5;
+
+    if (count >= 2 && count <= 200) score += 15;
+    else if (count > 200 && count <= 500) score += 5;
+
+    if (c.type === "directory_pattern") score += 10;
+    if (c.metadata?.has_site) score += 5;
+    c.score = score;
+  }
+
+  candidates.sort((a, b) => b.score - a.score);
+  const best = candidates[0];
+
+  let confidence: "high" | "medium" | "low" | "none" = "none";
+  if (best.score > 80) confidence = "high";
+  else if (best.score > 60) confidence = "medium";
+  else confidence = "low";
+
+  return {
+    candidates: candidates.slice(0, 5),
+    best_candidate: best,
+    confidence,
+    total_candidates_evaluated: candidates.length,
+  };
+};
+
+const detectDuplicateFilenames = (
+  allFiles: string[]
+): Record<string, string[]> => {
+  const filenameToPaths: Record<string, string[]> = {};
+  for (const filepath of allFiles) {
+    const filename = filepath.split("/").pop()!;
+    if (!filenameToPaths[filename]) filenameToPaths[filename] = [];
+    filenameToPaths[filename].push(filepath);
+  }
+  return Object.fromEntries(
+    Object.entries(filenameToPaths).filter(([, paths]) => paths.length > 1)
+  );
+};
+
+const buildDirectoryTreeSummary = (
+  allFiles: string[],
+  maxSubjects: number = 50
+): Record<string, any> => {
+  const subjectToStructure: Record<string, Record<string, string[]>> = {};
+
+  for (const filepath of allFiles) {
+    const parts = filepath.split("/");
+    if (parts.length < 2) continue;
+    const subjectDir = parts[0];
+    const remainingPath = parts.slice(1, -1).join("/") || "root";
+    const filename = parts[parts.length - 1];
+    const pattern = filename.replace(/\d+/g, "N").replace(/\s*\([^)]*\)/g, "");
+
+    if (!subjectToStructure[subjectDir]) subjectToStructure[subjectDir] = {};
+    if (!subjectToStructure[subjectDir][remainingPath])
+      subjectToStructure[subjectDir][remainingPath] = [];
+    if (!subjectToStructure[subjectDir][remainingPath].includes(pattern))
+      subjectToStructure[subjectDir][remainingPath].push(pattern);
+  }
+
+  const allSubjects = Object.keys(subjectToStructure).sort();
+  let sampledSubjects = allSubjects;
+  if (allSubjects.length > maxSubjects) {
+    const mid = Math.floor(allSubjects.length / 2);
+    sampledSubjects = [
+      ...allSubjects.slice(0, 15),
+      ...allSubjects.slice(mid - 10, mid + 10),
+      ...allSubjects.slice(-15),
+    ]
+      .filter((v, i, a) => a.indexOf(v) === i)
+      .slice(0, maxSubjects);
+  }
+
+  const summary: Record<string, any> = {};
+  for (const subject of sampledSubjects) {
+    summary[subject] = Object.fromEntries(
+      Object.entries(subjectToStructure[subject]).map(([path, patterns]) => [
+        path,
+        patterns.slice(0, 5),
+      ])
+    );
+  }
+
+  return {
+    subject_structure_samples: summary,
+    total_subjects_detected: allSubjects.length,
+    sampled_subjects: sampledSubjects.length,
+  };
+};
+
+// ============================================================================
+// TS-only UI helpers
+// ============================================================================
+
+// Build structured file summary for LLM
 export const buildFileSummary = (files: FileItem[]): string => {
   let summary = "";
 
@@ -66,14 +356,6 @@ export const buildFileSummary = (files: FileItem[]): string => {
     hdf5: "format: SNIRF → format_ready: true",
   };
 
-  // dataFiles.forEach((f) => {
-  //   const category = categorizeFile(f);
-  //   const fmt = formatLabel[f.fileType || ""] || ""; // add
-  //   summary += `  - ${f.name} [${category}]`;
-  //   if (fmt) summary += ` <${fmt}>`; // add
-  //   if (f.sourcePath) summary += ` (${f.sourcePath})`;
-  //   summary += "\n";
-  // });
   const byType: Record<string, typeof dataFiles> = {};
   dataFiles.forEach((f) => {
     const key = f.fileType || "other";
@@ -107,75 +389,71 @@ export const buildFileSummary = (files: FileItem[]): string => {
 /**
  * Analyze file patterns
  */
-export const analyzeFilePatterns = (files: FileItem[]): string => {
-  const dataFiles = files.filter((f) => f.type === "file" && !f.isUserMeta);
-  const filenames = dataFiles.map((f) => f.name);
-
-  const extensions = [
-    ...new Set(
-      filenames.map((name) => {
-        const parts = name.toLowerCase().split(".");
-        return parts.length > 1 ? parts[parts.length - 1] : "none";
-      })
-    ),
-  ];
-
-  // Categorize files
-  const categorized: Record<string, string[]> = {
-    anatomical: [],
-    functional: [],
-    diffusion: [],
-    other: [],
-  };
-
-  dataFiles.forEach((f) => {
-    const category = categorizeFile(f);
-    if (category.startsWith("anatomical")) {
-      categorized.anatomical.push(f.name);
-    } else if (category.startsWith("functional")) {
-      categorized.functional.push(f.name);
-    } else if (category.includes("diffusion")) {
-      categorized.diffusion.push(f.name);
-    } else {
-      categorized.other.push(f.name);
-    }
-  });
-
-  return `
-FILENAME ANALYSIS:
-${"=".repeat(70)}
-Total data files: ${dataFiles.length}
-File types: ${extensions.join(", ")}
-
-File Categories:
-  Anatomical scans: ${categorized.anatomical.length}
-  Functional scans: ${categorized.functional.length}
-  Diffusion scans: ${categorized.diffusion.length}
-  Other files: ${categorized.other.length}
-
-Sample filenames (first 10):
-${filenames
-  .slice(0, 10)
-  .map((name) => `  - ${name}`)
-  .join("\n")}
-${
-  filenames.length > 10 ? `\n  ... and ${filenames.length - 10} more files` : ""
-}
-`;
-};
-
-/**
- * Get user context (README, instructions, participant info)
- */
-export const getUserContext = (files: FileItem[]): string => {
-  const userText = getUserContextText(files);
-  if (!userText) return "No user-provided context available.";
-  return `USER-PROVIDED CONTEXT:\n${"=".repeat(70)}\n${userText}`;
-};
-
-/**
- * Get file annotations (notes)
- */
+// export const analyzeFilePatterns = (files: FileItem[]): string => {
+//   const dataFiles = files.filter((f) => f.type === "file" && !f.isUserMeta);
+//   const filenames = dataFiles.map((f) => f.name);
+
+//   const extensions = [
+//     ...new Set(
+//       filenames.map((name) => {
+//         const parts = name.toLowerCase().split(".");
+//         return parts.length > 1 ? parts[parts.length - 1] : "none";
+//       })
+//     ),
+//   ];
+
+//   // Categorize files
+//   const categorized: Record<string, string[]> = {
+//     anatomical: [],
+//     functional: [],
+//     diffusion: [],
+//     other: [],
+//   };
+
+//   dataFiles.forEach((f) => {
+//     const category = categorizeFile(f);
+//     if (category === "mri" || category === "jnifti") {
+//       categorized.anatomical.push(f.name);
+//     } else if (category === "nirs") {
+//       categorized.functional.push(f.name);
+//     } else if (category === "array") {
+//       categorized.diffusion.push(f.name);
+//     } else {
+//       categorized.other.push(f.name);
+//     }
+//   });
+
+//   return `
+// FILENAME ANALYSIS:
+// ${"=".repeat(70)}
+// Total data files: ${dataFiles.length}
+// File types: ${extensions.join(", ")}
+
+// File Categories:
+//   Anatomical scans: ${categorized.anatomical.length}
+//   Functional scans: ${categorized.functional.length}
+//   Diffusion scans: ${categorized.diffusion.length}
+//   Other files: ${categorized.other.length}
+
+// Sample filenames (first 10):
+// ${filenames
+//   .slice(0, 10)
+//   .map((name) => `  - ${name}`)
+//   .join("\n")}
+// ${
+//   filenames.length > 10 ? `\n  ... and ${filenames.length - 10} more files` : ""
+// }
+// `;
+// };
+
+// Get user context (README, instructions, participant info)
+// export const getUserContext = (files: FileItem[]): string => {
+//   const userText = getUserContextText(files);
+//   if (!userText) return "No user-provided context available.";
+//   return `USER-PROVIDED CONTEXT:\n${"=".repeat(70)}\n${userText}`;
+// };
+
+// Get file annotations (notes)
 export const getFileAnnotations = (files: FileItem[]): string => {
   const filesWithNotes = files.filter((f) => f.note);
   if (filesWithNotes.length === 0) return "";
@@ -201,225 +479,90 @@ export const downloadJSON = (data: any, filename: string) => {
   URL.revokeObjectURL(url);
 };
 
-/**
- * Download text file(not using this function yet)
- */
-export const downloadText = (text: string, filename: string) => {
-  const blob = new Blob([text], { type: "text/plain" });
-  const url = URL.createObjectURL(blob);
-  const a = document.createElement("a");
-  a.href = url;
-  a.download = filename;
-  a.click();
-  URL.revokeObjectURL(url);
-};
-
-/**
- * Build evidence bundle structure
- */
-export const buildEvidenceBundle = (
-  files: FileItem[],
-  baseDirectoryPath: string,
-  userOverrides?: {
-    nSubjects: number | null;
-    modalityHint: string;
-    describeText: string;
-  }
-): any => {
-  const counts = getCountsByExtension(files);
-  const userText = getUserContextText(files);
-
-  // add for samples ---start---
-  const dataFiles = files.filter(
-    (f) => f.source === "user" && f.type === "file"
-  );
-
-  // Mirror autobidsify's _intelligent_file_sampling()
-  // Group by file type, take up to 5 samples per type
-  const samplesByType: Record<string, FileItem[]> = {};
+// ============================================================================
+// functions mirror to evidence.py
+// ============================================================================
+
+// ============================================================================
+// detect_kind from evidence.py maps to categorizeFile in fileAnalyzers.ts
+// ============================================================================
+
+// ============================================================================
+// Intelligent file sampling — mirrors _intelligent_file_sampling() in evidence.py
+// Groups files by extension then by filename pattern, samples up to 5 per extension.
+// ============================================================================
+
+const intelligentFileSampling = (
+  dataFiles: FileItem[],
+  targetSamplesPerExt: number = 5
+): FileItem[] => {
+  // Group by extension — mirrors by_ext in Python
+  const byExt: Record<string, FileItem[]> = {};
   dataFiles.forEach((f) => {
-    const key = f.fileType || "other";
-    if (!samplesByType[key]) samplesByType[key] = [];
-    if (samplesByType[key].length < 5) {
-      samplesByType[key].push(f);
-    }
+    const name = f.name.toLowerCase();
+    const ext = name.endsWith(".nii.gz")
+      ? ".nii.gz"
+      : "." + (name.split(".").pop() || "other");
+    if (!byExt[ext]) byExt[ext] = [];
+    byExt[ext].push(f);
   });
 
-  const samples = Object.values(samplesByType)
-    .flat()
-    .map((f) => ({
-      relpath: f.sourcePath || f.name,
-      filename: f.name,
-      suffix: f.name.split(".").pop() || "",
-      kind: f.fileType || "other",
-      size: 0,
-    }));
+  const sampledFiles: FileItem[] = [];
 
-  // ----end---
-
-  // add this for subject_analysis.json
-  // const allFiles = files
-  //   .filter((f) => f.source === "user" && f.type === "file")
-  //   .map((f) => f.sourcePath || f.name);
-  const allFiles = files
-    .filter((f) => f.source === "user" && f.type === "file")
-    .map((f) => {
-      const path = f.sourcePath || f.name;
-      // Strip leading folder name — mirrors Python's relative-to-data_root paths
-      // "1-FRESH-Motor-snirf/sub-01_ses-..." → "sub-01_ses-..."
-      const parts = path.split("/");
-      return parts.length > 1 ? parts.slice(1).join("/") : path;
+  Object.entries(byExt).forEach(([ext, fileList]) => {
+    // Group by filename pattern — mirrors pattern_groups in Python
+    const patternGroups: Record<string, FileItem[]> = {};
+    fileList.forEach((f) => {
+      const pattern = f.name.replace(/\d+/g, "N").replace(/\s*\([^)]*\)/g, "");
+      if (!patternGroups[pattern]) patternGroups[pattern] = [];
+      patternGroups[pattern].push(f);
     });
 
-  const subjectAnalysis = extractSubjectAnalysis(allFiles);
-  // ← end
+    const nPatterns = Object.keys(patternGroups).length;
+    const spp = Math.max(1, Math.floor(targetSamplesPerExt / nPatterns));
 
-  // ── filename analysis (must come AFTER subjectAnalysis)
-  const justFilenames = allFiles.map((f) =>
-    f.includes("/") ? f.split("/").pop()! : f
-  );
-  const tokenStats = analyzeTokenStatistics(justFilenames);
-  const dominantCount = tokenStats.dominantPrefixes.length;
-  const userNSubjects = subjectAnalysis.subject_count || null;
-  let filenameConfidence: "high" | "medium" | "low" | "none" = "none";
-  if (dominantCount > 0) {
-    if (userNSubjects && dominantCount === userNSubjects)
-      filenameConfidence = "high";
-    else if (dominantCount >= 2 && dominantCount <= 10)
-      filenameConfidence = "medium";
-    else filenameConfidence = "low";
-  }
-  const filenameAnalysis = {
-    python_statistics: {
-      total_files: tokenStats.totalFiles,
-      token_frequency: tokenStats.tokenFrequency,
-      prefix_frequency: tokenStats.prefixFrequency,
-      dominant_prefixes: tokenStats.dominantPrefixes,
-      unique_token_count: Object.keys(tokenStats.tokenFrequency).length,
-      unique_prefix_count: Object.keys(tokenStats.prefixFrequency).length,
-    },
-    confidence: filenameConfidence,
-    recommendation: buildFilenameRecommendation(
-      tokenStats.dominantPrefixes,
-      userNSubjects
-    ),
-  };
+    let extSamples: FileItem[] = [];
+    const extSampledSet = new Set<string>();
 
-  // subject count decision logic:
-  const finalSubjectCount =
-    userOverrides?.nSubjects ?? // user wins
-    subjectAnalysis.subject_count ??
-    tokenStats.dominantPrefixes.length ??
-    null;
+    // Take spp files from each pattern group
+    Object.values(patternGroups).forEach((group) => {
+      group.slice(0, spp).forEach((f) => {
+        extSamples.push(f);
+        extSampledSet.add(f.id);
+      });
+    });
 
-  const participantEvidence = buildParticipantMetadataEvidence(
-    allFiles,
-    // pass the already-built documents array
-    files
-      .filter(
-        (f) => f.source === "user" && f.content && f.content.trim().length > 0
-      )
-      .map((f) => ({
-        relpath: f.sourcePath || f.name,
-        filename: f.name,
-        content: f.content || "",
-      }))
-  );
+    // Top-up to targetSamplesPerExt if under
+    if (extSamples.length < targetSamplesPerExt) {
+      const sorted = [...Object.values(patternGroups)].sort(
+        (a, b) => b.length - a.length
+      );
+      for (const group of sorted) {
+        if (extSamples.length >= targetSamplesPerExt) break;
+        for (const f of group) {
+          if (extSamples.length >= targetSamplesPerExt) break;
+          if (!extSampledSet.has(f.id)) {
+            extSamples.push(f);
+            extSampledSet.add(f.id);
+          }
+        }
+      }
+    }
 
-  return {
-    root: baseDirectoryPath,
-    counts_by_ext: counts,
-    samples,
-    all_files: allFiles,
-    filename_analysis: filenameAnalysis, // NEW
-    participant_metadata_evidence: participantEvidence, // NEW
-    subject_detection: {
-      method: "hybrid_analysis",
-      path_based_count: subjectAnalysis.subject_count,
-      path_based_confidence: subjectAnalysis.success ? "medium" : "none",
-      filename_based_count: tokenStats.dominantPrefixes.length,
-      filename_based_confidence: filenameConfidence,
-      final_count: finalSubjectCount,
-      count_source:
-        userOverrides?.nSubjects != null
-          ? "user_provided"
-          : subjectAnalysis.success
-          ? subjectAnalysis.method
-          : "filename_based",
-      best_pattern: subjectAnalysis.subject_records[0]?.pattern_name || "none",
-    },
-    documents: files
-      .filter((f) => {
-        if (f.source !== "user") return false; // exclude AI files
-        if (!f.content || f.content.trim().length === 0) return false;
-
-        // ✅ Text files - primary source
-        if (["text", "office", "meta"].includes(f.fileType || "")) return true;
-
-        // ✅ NIfTI headers - useful for LLM to understand scan parameters
-        if (f.fileType === "nifti" && f.contentType === "nifti") return true;
-
-        // ✅ HDF5/SNIRF structure - useful for fNIRS datasets
-        if (f.fileType === "hdf5" && f.contentType === "hdf5") return true;
-
-        // ✅ NeuroJSON - already JSON text
-        if (f.fileType === "neurojsonText") return true;
-
-        // ✅ Catch undefined fileType but has content (your current bug)
-        if (f.fileType === undefined && f.content) return true;
-
-        return false;
-      })
-      .map((f) => ({
-        relpath: f.sourcePath || f.name,
-        filename: f.name,
-        type: f.fileType || "unknown",
-        content: f.content || "",
-        purpose: "experimental_protocol_or_metadata",
-      })),
-    user_hints: {
-      user_text: userText,
-      modality_hint: userOverrides?.modalityHint || detectModality(files),
-      n_subjects: finalSubjectCount,
-    },
-    // subject_analysis: subjectAnalysis,
-    trio_found: {
-      "dataset_description.json": files.some(
-        (f) => f.source === "user" && f.name === "dataset_description.json"
-      ),
-      "README.md": files.some(
-        (f) =>
-          f.source === "user" &&
-          (f.name === "README.md" ||
-            f.name === "README.txt" ||
-            f.name === "README.rst" ||
-            f.name === "readme.md")
-      ),
-      "participants.tsv": files.some(
-        (f) => f.source === "user" && f.name === "participants.tsv"
-      ),
-    },
-  };
-};
+    sampledFiles.push(...extSamples);
+  });
 
-const buildFilenameRecommendation = (
-  dominantPrefixes: { prefix: string; count: number; percentage: number }[],
-  userNSubjects: number | null
-): string => {
-  if (dominantPrefixes.length === 0)
-    return "No clear filename patterns detected. Recommend user describe subject identification.";
-  if (userNSubjects && dominantPrefixes.length === userNSubjects) {
-    const prefixStr = dominantPrefixes.map((p) => p.prefix).join(", ");
-    return `HIGH CONFIDENCE: Detected ${dominantPrefixes.length} dominant prefixes (${prefixStr}) matching user hint of ${userNSubjects} subjects.`;
-  }
-  if (dominantPrefixes.length >= 2 && dominantPrefixes.length <= 5)
-    return `MEDIUM CONFIDENCE: Detected ${dominantPrefixes.length} potential subject groups. Will send to LLM for validation.`;
-  return `LOW CONFIDENCE: Found ${dominantPrefixes.length} prefix patterns, which may or may not represent subjects. LLM will analyze.`;
+  return sampledFiles;
 };
 
+// ============================================================================
+// mirror _collect_participant_metadata_evidence() in evidence.py
+// ============================================================================
+
 const buildParticipantMetadataEvidence = (
   allFiles: string[],
-  documents: { relpath: string; filename: string; content: string }[]
+  documents: { relpath: string; filename: string; content: string }[],
+  files: FileItem[]
 ): Record<string, any> => {
   const evidence: Record<string, any> = {};
 
@@ -457,6 +600,25 @@ const buildParticipantMetadataEvidence = (
   // Evidence 2: DICOM headers (already extracted into documents content)
   // Skip re-reading — not feasible client-side
 
+  const dicomFiles = files.filter(
+    (f) => f.source === "user" && f.fileType === "dicom" && f.content
+  );
+  if (dicomFiles.length > 0) {
+    const dicomSamples = dicomFiles.slice(0, 10).map((f) => ({
+      filename: f.name,
+      extracted_header: f.content?.slice(0, 300) || "",
+    }));
+    evidence.dicom_headers = {
+      found: true,
+      sampled_count: dicomSamples.length,
+      total_dicom_files: dicomFiles.length,
+      samples: dicomSamples,
+      note: "DICOM headers extracted client-side",
+    };
+  } else {
+    evidence.dicom_headers = { found: false };
+  }
+
   // Evidence 3: filename semantic patterns
   const genderKws = [
     "male",
@@ -606,81 +768,200 @@ const buildParticipantMetadataEvidence = (
   return evidence;
 };
 
-/**
- * Extract subject identifiers from file list
- * Mirrors autobidsify's _extract_subjects_from_flat_filenames()
- */
-export const extractSubjectsFromFiles = (
-  files: FileItem[]
-): {
-  subjects: { originalId: string; bidsId: string }[];
-  strategy: string;
-} => {
+// ============================================================================
+// Build evidence bundle structure
+// mirror _build_evidence_bundle_internal() and build_evidence_bundle() in evidence.py
+// ============================================================================
+
+export const buildEvidenceBundle = (
+  files: FileItem[],
+  baseDirectoryPath: string,
+  userOverrides?: {
+    nSubjects: number | null;
+    modalityHint: string;
+    describeText: string;
+  }
+): any => {
+  const counts = getCountsByExtension(files);
+  // const userText = getUserContextText(files);
+  const fileContextText = getUserContextText(files);
+  const userText = [userOverrides?.describeText?.trim(), fileContextText]
+    .filter(Boolean)
+    .join("\n\n");
+
+  // add for samples ---start---
   const dataFiles = files.filter(
     (f) => f.source === "user" && f.type === "file"
   );
 
-  // Count occurrences of each base identifier
-  const identifierCounts: Record<string, number> = {};
-  // dataFiles.forEach((f) => {
-  //   const nameNoExt = f.name.replace(/\.[^/.]+$/, "").replace(/\.nii$/, "");
-  //   const match = nameNoExt.match(/^([A-Za-z0-9\-]+)/);
-  //   if (match) {
-  //     const id = match[1];
-  //     identifierCounts[id] = (identifierCounts[id] || 0) + 1;
-  //   }
-  // });
-  dataFiles.forEach((f) => {
-    const nameNoExt = f.name
-      .replace(/\.nii\.gz$/i, "")
-      .replace(/\.[^/.]+$/, "")
-      .replace(/\s*\([^)]*\)/, ""); // remove (309) etc.
-
-    // Split on first digit sequence or underscore — take prefix only
-    // VHMCT1mm → VHMCT, sub-01 → sub-01, BZZ003 → BZZ
-    const match = nameNoExt.match(/^([A-Za-z]+(?:-[A-Za-z]+)*)/);
-    if (match) {
-      const id = match[1];
-      identifierCounts[id] = (identifierCounts[id] || 0) + 1;
-    }
+  const sampledFiles = intelligentFileSampling(dataFiles);
+  const samples = sampledFiles.map((f) => ({
+    relpath: f.sourcePath || f.name,
+    filename: f.name,
+    suffix: f.name.split(".").pop() || "",
+    kind: categorizeFile(f),
+    size: 0,
+    header_info: f.content ? { raw: f.content.slice(0, 500) } : undefined,
+  }));
+
+  const allFiles = files
+    .filter((f) => f.source === "user" && f.type === "file")
+    .map((f) => {
+      const path = f.sourcePath || f.name;
+      // Strip leading folder name — mirrors Python's relative-to-data_root paths
+      // "1-FRESH-Motor-snirf/sub-01_ses-..." → "sub-01_ses-..."
+      const parts = path.split("/");
+      return parts.length > 1 ? parts.slice(1).join("/") : path;
+    });
+
+  // ── FileStructureAnalyzer — mirrors universal_core.py
+  const dirStructure = analyzeDirectoryStructure(allFiles);
+  const subjectDetectionResult = detectSubjectIdentifiers(
+    allFiles,
+    userOverrides?.nSubjects ?? null
+  );
+  const duplicates = detectDuplicateFilenames(allFiles);
+  const treeSummary = buildDirectoryTreeSummary(allFiles, 50);
+  const pathBasedCount = subjectDetectionResult.best_candidate?.count ?? 0;
+  const pathBasedConfidence = subjectDetectionResult.confidence;
+
+  const filenameAnalysisRaw = analyzeFilenamesForSubjects(allFiles, {
+    n_subjects: userOverrides?.nSubjects ?? null,
+    user_text: userOverrides?.describeText ?? "",
   });
+  const { llm_payload, ...filenameAnalysis } = filenameAnalysisRaw;
+  const tokenStats = filenameAnalysis.python_statistics;
+  const filenameConfidence = filenameAnalysis.confidence;
 
-  // Sort by frequency — most common identifiers are likely subjects
-  // const sorted = Object.entries(identifierCounts).sort((a, b) => b[1] - a[1]);
-
-  // Step 2: Keep only identifiers that appear in multiple files
-  // (single-file identifiers are likely body parts, not subjects)
-  const totalFiles = dataFiles.length;
-  const threshold = Math.max(2, Math.floor(totalFiles * 0.05)); // at least 5% of files
-
-  const filtered = Object.entries(identifierCounts)
-    .filter(([, count]) => count >= threshold)
-    .sort((a, b) => b[1] - a[1]);
-
-  // If filtering leaves nothing, fall back to all identifiers
-  const candidates =
-    filtered.length > 0
-      ? filtered
-      : Object.entries(identifierCounts).sort((a, b) => b[1] - a[1]);
-  // Step 3: Use numeric strategy for >10 subjects
-  const strategy = candidates.length > 10 ? "numeric" : "numeric";
-  // const strategy = sorted.length > 10 ? "numeric" : "semantic";
-
-  // const subjects = sorted.map(([originalId], i) => ({
-  //   originalId,
-  //   bidsId:
-  //     strategy === "numeric"
-  //       ? String(i + 1)
-  //       : originalId.replace(/[^a-zA-Z0-9]/g, ""),
-  // }));
-  const subjects = candidates.map(([originalId], i) => ({
-    originalId,
-    bidsId: String(i + 1),
-  }));
+  // subject count decision logic:
+  let finalSubjectCount: number | null;
+  let countSource: string;
+
+  if (userOverrides?.nSubjects != null) {
+    finalSubjectCount = userOverrides.nSubjects;
+    countSource = "user_provided";
+  } else if (pathBasedConfidence === "high") {
+    finalSubjectCount = pathBasedCount;
+    countSource = "path_based_high_confidence";
+  } else if (
+    (filenameConfidence === "high" || filenameConfidence === "medium") &&
+    pathBasedCount === 0
+  ) {
+    finalSubjectCount = tokenStats.dominantPrefixes.length;
+    countSource = "filename_based";
+  } else if (pathBasedCount > 0) {
+    finalSubjectCount = pathBasedCount;
+    countSource = "path_based";
+  } else {
+    finalSubjectCount = 1;
+    countSource = "fallback";
+  }
+
+  const documents = files
+    .filter((f) => {
+      if (f.source !== "user") return false;
+      if (!f.content || f.content.trim().length === 0) return false;
+      if (["text", "office", "meta"].includes(f.fileType || "")) return true;
+      if (f.fileType === "nifti" && f.contentType === "nifti") return true;
+      if (f.fileType === "hdf5" && f.contentType === "hdf5") return true;
+      if (f.fileType === "neurojsonText") return true;
+      if (f.fileType === undefined && f.content) return true;
+      return false;
+    })
+    .map((f) => ({
+      relpath: f.sourcePath || f.name,
+      filename: f.name,
+      type: f.fileType || "unknown",
+      content: f.content || "",
+      purpose: "experimental_protocol_or_metadata",
+    }));
+
+  const participantEvidence = buildParticipantMetadataEvidence(
+    allFiles,
+    documents,
+    files
+  );
 
-  return { subjects, strategy };
+  return {
+    root: baseDirectoryPath,
+    counts_by_ext: counts,
+    samples,
+    all_files: allFiles,
+    filename_analysis: filenameAnalysis, // NEW
+    participant_metadata_evidence: participantEvidence, // NEW
+    subject_detection: {
+      method: "hybrid_analysis",
+      path_based_count: pathBasedCount,
+      path_based_confidence: pathBasedConfidence,
+      filename_based_count: tokenStats.dominantPrefixes.length,
+      filename_based_confidence: filenameConfidence,
+      final_count: finalSubjectCount,
+      count_source: countSource,
+      best_pattern:
+        subjectDetectionResult.best_candidate?.pattern_display || "none",
+    },
+    structure_analysis: {
+      directory_structure: dirStructure,
+      subject_detection: subjectDetectionResult,
+      duplicate_files: Object.fromEntries(
+        Object.entries(duplicates).slice(0, 20)
+      ),
+      tree_summary_for_llm: treeSummary,
+      analyzer_confidence: subjectDetectionResult.confidence,
+    },
+
+    documents: documents,
+    document_summary: {
+      total_documents: documents.length,
+      document_types: [...new Set(documents.map((d) => d.type))],
+      total_text_length: documents.reduce(
+        (sum, d) => sum + d.content.length,
+        0
+      ),
+    },
+    sampling_strategy: {
+      method: "pattern_based",
+      target_per_ext: 5,
+      total_files_sampled: sampledFiles.length,
+    },
+    user_hints: {
+      user_text: userText,
+      modality_hint: userOverrides?.modalityHint || detectModality(files),
+      n_subjects: finalSubjectCount,
+    },
+    trio_found: {
+      "dataset_description.json": files.some(
+        (f) => f.source === "user" && f.name === "dataset_description.json"
+      ),
+      "README.md": files.some(
+        (f) =>
+          f.source === "user" &&
+          (f.name === "README.md" ||
+            f.name === "README.txt" ||
+            f.name === "README.rst" ||
+            f.name === "readme.md")
+      ),
+      "participants.tsv": files.some(
+        (f) => f.source === "user" && f.name === "participants.tsv"
+      ),
+    },
+    trio_promoted: {
+      dataset_description: [],
+      readme: [],
+      participants: [],
+    },
+    data_source: {
+      type: "directory",
+      original_path: baseDirectoryPath,
+      actual_path: baseDirectoryPath,
+    },
+  };
 };
 
+// ============================================================================
+// mirror ingest_data() in ingest.py
+// ============================================================================
+
 export const buildIngestInfo = (
   baseDirectoryPath: string
   // outputDir: string
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts
index be03f83..22c635a 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/llmPrompts.ts
@@ -1,177 +1,21 @@
 // src/components/DatasetOrganizer/utils/llmPrompts.ts
+//
+// NeuroJSON.io-only prompts — functions that have no Python equivalent.
+//
+// All PROMPT_* constants and LLM wrapper functions now live in llm.ts,
+// mirroring autobidsify's llm.py where prompts and callers are co-located.
+//
+// This file only contains:
+//   getConversionScriptPrompt() — generates a standalone Python conversion
+//   script from the user's file structure. Autobidsify IS the conversion
+//   script, so this feature has no Python equivalent.
+
+// ============================================================================
+// getConversionScriptPrompt()
+// NeuroJSON.io-only — no Python equivalent in autobidsify.
+// Called by LLMPanel.tsx → handleGenerate() ("Generate Script" button)
+// ============================================================================
 
-/**
- * Prompt for dataset_description.json generation
- * Based on auto-bidsify's PROMPT_TRIO_DATASET_DESC
- */
-export const getDatasetDescriptionPrompt = (
-  userText: string,
-  evidenceBundle?: any
-): string => {
-  const documentsContext =
-    // evidenceBundle?.documents
-    //   ?.map((d: any) => `[${d.filename}]:\n${d.content}`)
-    //   .join("\n\n") || "";
-    evidenceBundle?.documents
-      ?.map((d: any) => `[${d.filename}]:\n${(d.content || "").slice(0, 500)}`)
-      .join("\n\n") || "";
-
-  return `You are a BIDS dataset_description.json generator.
-  
-  CRITICAL: Use the following user-provided content to extract dataset information!
-  
-  USER-PROVIDED CONTENT:
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  ${userText || "(no readme/instructions provided)"}
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-   ALL UPLOADED DOCUMENTS (search these for dataset name, authors, etc.):
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  ${documentsContext || "(no documents)"}
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-  Also consider the dataset folder name for clues about the dataset name:
-  File paths start with: ${evidenceBundle?.root || ""}
-  
-  CRITICAL RULES:
-  - Authors MUST be array: ["Name 1", "Name 2", "Name 3"]
-  - DO NOT include empty strings "" or empty arrays []
-  - DO NOT use placeholders like "Extract" or "Dataset Name"
-  - Extract ACTUAL dataset name from content
-  - License: use "PD" if not specified, normalize "CC BY 4.0" to "CC-BY-4.0"
-  
-  Extract from user-provided content:
-  - Dataset name (look for study title, project name, experiment name)
-  - Authors/institutions mentioned
-  - Funding sources (if mentioned)
-  - License information
-  
-  Output ONLY valid JSON (no markdown fences, no explanations):
-  {
-    "Name": "Actual Dataset Name Here",
-    "BIDSVersion": "1.10.0",
-    "DatasetType": "raw",
-    "License": "PD",
-    "Authors": ["Actual Author Name"]
-  }`;
-};
-
-/**
- * Prompt for README.md generation
- * Based on auto-bidsify's PROMPT_TRIO_README
- */
-export const getReadmePrompt = (userText: string): string => {
-  return `Generate a comprehensive BIDS README.md file.
-  
-  CRITICAL: Use the following user-provided content as the PRIMARY source!
-  
-  USER-PROVIDED CONTENT:
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  ${userText}
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  
-  Create a comprehensive README with these sections:
-  - ## Overview (extract from user content)
-  - ## Dataset Description (expand on user content)
-  - ## Data Acquisition (if information available)
-  - ## File Organization (describe BIDS structure)
-  - ## Usage Notes
-  - ## References (if mentioned in user content)
-  
-  Use the user-provided content to inform ALL sections.
-  Expand and structure the information, but stay true to the original content.
-  
-  OUTPUT: Direct Markdown text only (no JSON wrapper, no code fences)`;
-};
-
-/**
- * Prompt for participants.tsv generation
- * Based on auto-bidsify's PROMPT_TRIO_PARTICIPANTS
- */
-export const getParticipantsPrompt = (userText: string): string => {
-  return `You are a BIDS participants.tsv column schema generator.
-
-USER-PROVIDED CONTENT:
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-${userText}
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-YOUR JOB: Decide which columns belong in participants.tsv based ONLY on what is explicitly stated in the user content above.
-
-STRICT RULES:
-- participant_id is ALWAYS required
-- ONLY add columns for demographics EXPLICITLY mentioned in the content
-- DO NOT invent age, sex, handedness, or any column not directly stated
-- If no demographic info is mentioned, return ONLY participant_id
-
-Output ONLY valid JSON (no markdown fences, no explanation):
-{
-  "columns": [
-    {"name": "participant_id", "required": true}
-  ]
-}
-
-Examples:
-- Content mentions "1 male, 1 female" → add {"name": "sex", "levels": ["M", "F"]}
-- Content mentions "patients and controls" → add {"name": "group", "levels": ["patient", "control"]}
-- Content mentions nothing about demographics → return only participant_id
-`;
-};
-
-// export const getParticipantsPrompt = (userText: string): string => {
-//   return `Generate a BIDS participants.tsv file.
-
-//   CRITICAL: Extract participant metadata from the following user-provided content!
-
-//   USER-PROVIDED CONTENT:
-//   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-//   ${userText}
-//   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-//   STRICT RULES:
-//   - First column MUST be "participant_id"
-//   - Use tab (\\t) as delimiter
-//   - ONLY include columns for data EXPLICITLY mentioned in the user content above
-//   - DO NOT invent or assume age, sex, handedness, or any other column unless it is directly stated in the content
-//   - If no demographic data is mentioned, output ONLY participant_id column
-//   - If only subject IDs are known, output the minimal form below
-
-//   MINIMAL FORM (use this when no demographics are mentioned):
-//   participant_id
-//   sub-01
-//   sub-02
-
-//   Extract participant information:
-//   - Subject IDs (look for "sub-01", "2 subjects", "participants: sub-01 and sub-02", etc.)
-//   - Demographics if available:
-//     - "1 male, 1 female" → sex column: M, F
-//     - "ages 25-65" → age column
-//     - "patients and controls" → group column
-//     - "right-handed" → handedness column
-
-//   Rules:
-//   - First column MUST be "participant_id"
-//   - Use tab (\\t) as delimiter
-//   - Include only columns with actual data (no empty columns)
-//   - If only subject IDs known, output: participant_id\\nsub-01\\nsub-02
-
-//   Examples:
-//   - If text says "2 subjects: sub-01 and sub-02" with no demographics:
-//     participant_id
-//     sub-01
-//     sub-02
-
-//   - If text says "sub-01 (25y, male), sub-02 (30y, female)":
-//     participant_id\\tage\\tsex
-//     sub-01\\t25\\tM
-//     sub-02\\t30\\tF
-
-//   OUTPUT: Direct TSV text only (no JSON, no code fences, no markdown)`;
-// };
-
-/**
- * Main prompt for BIDS conversion script generation
- */
 export const getConversionScriptPrompt = (
   baseDirectoryPath: string,
   fileSummary: string,
@@ -180,188 +24,12 @@ export const getConversionScriptPrompt = (
   annotations: string
 ): string => {
   return `You are a BIDS conversion expert specializing in neuroimaging data.
-  
-  ╔════════════════════════════════════════════════════════════════╗
-  ║ TASK: Generate Python script to convert dataset to BIDS       ║
-  ╚════════════════════════════════════════════════════════════════╝
-  
-  BASE DIRECTORY: ${baseDirectoryPath}
-  
-  ${fileSummary}
-  
-  ${filePatterns}
-  
-  ${userContext}
-  
-  ${annotations}
-  
-  CRITICAL FILE CATEGORIZATION RULES:
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  Files are marked with categories. YOU MUST respect these categories:
-  
-  - [anatomical-T1w] → Goes to sub-XX/anat/ folder, rename to sub-XX_T1w.nii.gz
-  - [anatomical-T2w] → Goes to sub-XX/anat/ folder, rename to sub-XX_T2w.nii.gz
-  - [functional-bold] → Goes to sub-XX/func/ folder, rename to sub-XX_task-<name>_run-XX_bold.nii.gz
-  - [functional-nirs] → Goes to sub-XX/func/ folder, rename to sub-XX_task-<name>_nirs.snirf
-  - [anatomical-dicom] → Convert to NIfTI using dcm2niix, then goes to sub-XX/anat/
-  - [diffusion] → Goes to sub-XX/dwi/ folder
-  - [fieldmap] → Goes to sub-XX/fmap/ folder
-
-  FORMAT CONVERSION RULES:
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  Some files require conversion before copying to BIDS:
-  
-  - <format: DICOM → convert_to: nifti (dcm2niix)>
-      → Run: subprocess.run(['dcm2niix', '-o', dest_dir, '-f', bids_filename, src_file])
-      → Output goes to sub-XX/anat/
-  
-  - <format: MATLAB → convert_to: snirf>
-      → Use MNE-Python: mne.export.export_raw(dst, raw, fmt='snirf')
-      → OR note in script that manual conversion is needed
-      → Output goes to sub-XX/nirs/
-  
-  - <format: Homer3 → convert_to: snirf>
-      → Same as MATLAB conversion above
-      → Output goes to sub-XX/nirs/
-  
-  - <format: NIfTI → format_ready: true>
-      → Direct copy, no conversion needed
-  
-  - <format: SNIRF → format_ready: true>
-      → Direct copy, no conversion needed
-  
-  FILENAME-BASED DETECTION (if category unclear):
-  - Contains "task-" AND "bold" → ALWAYS functional (func/ folder)
-  - Contains "T1w" → ALWAYS anatomical (anat/ folder)
-  - Contains "T2w" OR "inplaneT2" → ALWAYS anatomical (anat/ folder)
-  - Ends with ".snirf" → ALWAYS functional (func/ folder)
-  - Ends with ".dcm" → ALWAYS needs dcm2niix conversion → anat/ folder
-  - Ends with ".mat" → ALWAYS needs snirf conversion → nirs/ folder  
-  - Ends with ".nirs" → ALWAYS needs snirf conversion → nirs/ folder
-  
-  ⚠️ CRITICAL: NEVER put task-based files in anat/ folder!
-  ⚠️ CRITICAL: NEVER put T1w/T2w files in func/ folder!
-  
-  CRITICAL INSTRUCTIONS:
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  
-  1. The BIDS metadata files (dataset_description.json, README.md, participants.tsv)
-     have ALREADY been generated above. Your script MUST:
-     ✓ Use the EXACT content from dataset_description.json (copy it verbatim)
-     ✓ Use the EXACT participant IDs from participants.tsv
-     ✓ Write these files as-is to the BIDS directory
-  
-  2. All file paths are RELATIVE to base directory: ${baseDirectoryPath}
-     When accessing files: os.path.join(base_dir, relative_path)
-     
-     Example:
-     File shown as: "Balloon Analog Risk-taking Task/sub-01_T1w.nii.gz"
-     Full path: os.path.join('${baseDirectoryPath}', 'Balloon Analog Risk-taking Task', 'sub-01_T1w.nii.gz')
-  
-  3. BIDS directory structure to create:
-     bids_dataset/
-     ├── dataset_description.json  ← Write exact content from above
-     ├── README.md                  ← Write exact content from above
-     ├── participants.tsv           ← Write exact content from above
-     └── sub-XX/
-         ├── anat/                  ← Anatomical scans only!
-         │   ├── sub-XX_T1w.nii.gz
-         │   └── sub-XX_T2w.nii.gz
-         └── func/                  ← Functional scans only!
-             └── sub-XX_task-<name>_run-XX_bold.nii.gz
-  
-  4. For EACH data file, you must:
-     a) Extract subject ID from filename (e.g., "sub-01" from "sub-01_T1w.nii.gz")
-     b) Determine modality from file category:
-        - [anatomical-*] → modality = 'anat'
-        - [functional-*] → modality = 'func'
-        - [diffusion] → modality = 'dwi'
-     c) Construct source path including any parent folders
-     d) Create destination path: bids_dir/sub-XX/modality/new_filename
-     e) Copy the file
-     f) Create JSON sidecar (for imaging files)
-  
-  5. Handle run numbers correctly:
-     - Functional scans often have run-01, run-02, run-03
-     - Extract run number ONLY from files that have "_run-" in filename
-     - Anatomical scans typically don't have run numbers
-  
-  6. Error handling:
-     - Wrap file operations in try-except
-     - Print progress messages
-     - Print errors but continue processing
-  
-  OUTPUT REQUIREMENTS:
-  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-  Generate a complete, runnable Python script that:
-  - Imports: os, shutil, json, pathlib
-  - Defines base_dir and bids_dir
-  - Creates BIDS directory structure (based on participants.tsv)
-  - Writes the three metadata files (exact content from above)
-  - Loops through data files and processes each one
-  - Includes clear comments explaining each step
-  - Has error handling and progress messages
-  
-  OUTPUT ONLY THE PYTHON SCRIPT (no markdown code fences, no explanations before or after).`;
-};
-
-/**
- * Prompt for BIDSPlan.yaml generation
- * Based on autobidsify's PROMPT_BIDS_PLAN
- */
-export const getBIDSPlanPrompt = (
-  fileSummary: string,
-  filePatterns: string,
-  userContext: string,
-  subjectInfo: {
-    subjects: { originalId: string; bidsId: string }[];
-    strategy: string;
-  },
-  countsByExt: Record<string, number>,
-  sampleFiles: string,
-  evidenceBundle: any
-): string => {
-  const subjectAnalysis = evidenceBundle.subject_analysis;
-  const assignmentRules = subjectInfo.subjects
-    .slice(0, 50) // cap at 50
-    .map(
-      (s) =>
-        `- match:\n  - '*${s.originalId}*'\n  original: ${s.originalId}\n  subject: '${s.bidsId}'`
-    )
-    .join("\n");
-
-  const subjectLabels = subjectInfo.subjects
-    .slice(0, 50)
-    .map((s) => `  - '${s.bidsId}'`)
-    .join("\n");
 
-  const participantMetadata = subjectInfo.subjects
-    .slice(0, 50)
-    .map((s) => `  '${s.bidsId}':\n    original_id: ${s.originalId}`)
-    .join("\n");
+╔════════════════════════════════════════════════════════════════╗
+║ TASK: Generate Python script to convert dataset to BIDS       ║
+╚════════════════════════════════════════════════════════════════╝
 
-  const countsText = Object.entries(countsByExt)
-    .map(([ext, count]) => `  ${ext}: ${count} files`)
-    .join("\n");
-
-  const pythonSubjectAnalysisText = subjectAnalysis
-    ? `\nPYTHON SUBJECT ANALYSIS (for context only — do NOT re-detect subjects):\n${JSON.stringify(
-        {
-          method: subjectAnalysis.method,
-          subject_count: subjectAnalysis.subject_count,
-          subject_examples: (subjectAnalysis.subject_records || [])
-            .slice(0, 5)
-            .map((r: any) => ({
-              original: r.original_id,
-              file_count: r.file_count,
-            })),
-        },
-        null,
-        2
-      )}\n`
-    : "";
-
-  return `You are a BIDS dataset architect. Generate a BIDSPlan.yaml file.
+BASE DIRECTORY: ${baseDirectoryPath}
 
 ${fileSummary}
 
@@ -369,216 +37,49 @@ ${filePatterns}
 
 ${userContext}
 
-${pythonSubjectAnalysisText}
-
-CONVERSION RULES (CRITICAL):
-- .dcm  → format_ready: false, convert_to: nifti,  modality: mri
-- .nii/.nii.gz → format_ready: true,  convert_to: none,  modality: mri
-- .jnii/.bnii  → format_ready: false, convert_to: nifti,  modality: mri
-- .mat  → format_ready: false, convert_to: snirf, modality: nirs
-- .nirs → format_ready: false, convert_to: snirf, modality: nirs
-- .snirf → format_ready: true, convert_to: none,  modality: nirs
-
-YOUR ONLY JOB: Generate the mappings section based on the file types present.
-Copy assignment_rules, participant_metadata, and subjects sections EXACTLY as shown in the OUTPUT below.
-
-OUTPUT (Raw YAML only, no markdown, no explanation):
-
-assignment_rules:
-${assignmentRules}
-
-FILE EXTENSION COUNTS (use these to determine which mappings to generate):
-${countsText}
-
-SAMPLE FILENAMES (use these to determine correct bids_template and match_pattern):
-${sampleFiles}
-
-MAPPINGS FORMAT (ONE entry per file extension, use glob patterns NOT individual filenames):
+${annotations}
 
-Example 1 - DICOM:
-  mappings:
-  - modality: mri
-    match: ['*.dcm', '**/*.dcm']
-    format_ready: false
-    convert_to: nifti
-    filename_rules:
-      - match_pattern: '.*'
-        bids_template: 'sub-X_T1w.nii.gz'
+CRITICAL FILE CATEGORIZATION RULES:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+- [anatomical-T1w]    → sub-XX/anat/sub-XX_T1w.nii.gz
+- [anatomical-T2w]    → sub-XX/anat/sub-XX_T2w.nii.gz
+- [functional-bold]   → sub-XX/func/sub-XX_task-<n>_run-XX_bold.nii.gz
+- [functional-nirs]   → sub-XX/nirs/sub-XX_task-<n>_nirs.snirf
+- [anatomical-dicom]  → convert with dcm2niix → sub-XX/anat/
+- [anatomical-jnifti] → convert with jnifti_converter → sub-XX/anat/
+- [diffusion]         → sub-XX/dwi/
+- [fieldmap]          → sub-XX/fmap/
+
+FORMAT CONVERSION RULES:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+- <format: DICOM → convert_to: nifti>
+    → subprocess.run(['dcm2niix', '-o', dest_dir, '-f', bids_filename, src_file])
+- <format: MATLAB → convert_to: snirf>
+    → Use MNE-Python or note manual conversion needed → sub-XX/nirs/
+- <format: Homer3 → convert_to: snirf>
+    → Same as MATLAB → sub-XX/nirs/
+- <format: NIfTI → format_ready: true>   → direct copy
+- <format: SNIRF → format_ready: true>   → direct copy
+
+⚠️ NEVER put task-based files in anat/ folder!
+⚠️ NEVER put T1w/T2w files in func/ folder!
+
+CRITICAL INSTRUCTIONS:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+1. BIDS metadata files (dataset_description.json, README.md, participants.tsv)
+   have ALREADY been generated. Script MUST use their EXACT content.
 
-Example 2 - fNIRS .mat:
-  mappings:
-  - modality: nirs
-    match: ['*.mat', '**/*.mat']
-    format_ready: false
-    convert_to: snirf
-    filename_rules:
-      - match_pattern: '.*'
-        bids_template: 'sub-X_task-rest_nirs.snirf'
+2. All paths are RELATIVE to: ${baseDirectoryPath}
+   Access with: os.path.join('${baseDirectoryPath}', relative_path)
 
-Example 3 - Mixed:
-  mappings:
-  - modality: mri
-    match: ['*.nii.gz']
-    format_ready: true
-    convert_to: none
-    filename_rules:
-      - match_pattern: '.*T1.*'
-        bids_template: 'sub-X_T1w.nii.gz'
-  - modality: nirs
-    match: ['*.mat']
-    format_ready: false
-    convert_to: snirf
-    filename_rules:
-      - match_pattern: '.*'
-        bids_template: 'sub-X_task-rest_nirs.snirf'
+3. For EACH data file:
+   a) Extract subject ID from filename
+   b) Determine modality from file category
+   c) Create destination: bids_dir/sub-XX/modality/new_filename
+   d) Copy the file
+   e) Create JSON sidecar for imaging files
 
-participant_metadata:
-${participantMetadata}
+4. Error handling: wrap in try-except, print progress, continue on errors
 
-subjects:
-  count: ${subjectInfo.subjects.length}
-  id_strategy: ${subjectInfo.strategy}
-  labels:
-${subjectLabels}
-  source: python_extracted`;
+OUTPUT ONLY THE PYTHON SCRIPT (no markdown code fences, no explanations).`;
 };
-
-/**
- * Prompt for BIDSPlan.yaml generation
- * Mirrors autobidsify's PROMPT_BIDS_PLAN + build_bids_plan()'s optimized_bundle
- */
-// export const getBIDSPlanPrompt = (evidenceBundle: any): string => {
-//   // ── Pull subject analysis from evidence bundle (generated by extractSubjectAnalysis)
-//   const subjectAnalysis = evidenceBundle.subject_analysis;
-//   const idMapping: Record<string, string> =
-//     subjectAnalysis?.id_mapping?.id_mapping || {};
-//   const subjectRecords: any[] = subjectAnalysis?.subject_records || [];
-
-//   // ── Build assignment_rules (mirrors planner.py's _apply_python_rules_to_plan)
-//   const assignmentRules = subjectRecords
-//     .slice(0, 50)
-//     .map(
-//       (r) =>
-//         `- match:\n  - '*${r.original_id}*'\n  original: ${
-//           r.original_id
-//         }\n  subject: '${idMapping[r.original_id] ?? r.numeric_id}'`
-//     )
-//     .join("\n");
-
-//   // ── Build subjects section
-//   const subjectLabels = subjectRecords
-//     .slice(0, 50)
-//     .map((r) => `  - '${idMapping[r.original_id] ?? r.numeric_id}'`)
-//     .join("\n");
-
-//   // ── Build participant_metadata section
-//   const participantMetadata = subjectRecords
-//     .slice(0, 50)
-//     .map(
-//       (r) =>
-//         `  '${idMapping[r.original_id] ?? r.numeric_id}':\n    original_id: ${
-//           r.original_id
-//         }`
-//     )
-//     .join("\n");
-
-//   // ── Build file extension counts
-//   const countsText = Object.entries(
-//     evidenceBundle.counts_by_ext as Record<string, number>
-//   )
-//     .map(([ext, count]) => `  ${ext}: ${count} files`)
-//     .join("\n");
-
-//   // ── Build sample files (mirrors optimized_bundle.sample_files)
-//   const sampleFiles =
-//     (evidenceBundle.sample as Array<{ relpath: string }>)
-//       ?.map((s) => `  - ${s.relpath}`)
-//       .join("\n") ?? "";
-
-//   // ── Build python_subject_analysis block (mirrors planner.py's optimized_bundle)
-//   const subjectExamples = subjectRecords.slice(0, 10).map((r) => ({
-//     original: r.original_id,
-//     bids_id: idMapping[r.original_id] ?? r.numeric_id,
-//   }));
-
-//   const pythonSubjectAnalysis = JSON.stringify(
-//     {
-//       success: subjectAnalysis?.success ?? false,
-//       method: subjectAnalysis?.method ?? "none",
-//       subject_count: subjectAnalysis?.subject_count ?? 0,
-//       subject_examples: subjectExamples,
-//       id_mapping: subjectAnalysis?.id_mapping ?? {},
-//     },
-//     null,
-//     2
-//   );
-
-//   return `You are a BIDS dataset architect with complete decision-making authority.
-
-// ═══════════════════════════════════════════════════════════════════════
-// SUPPORTED FORMATS AND CONVERSION RULES (v10 - CRITICAL)
-// ═══════════════════════════════════════════════════════════════════════
-
-// MRI FORMATS (modality: mri):
-//   Input formats:
-//     • DICOM (.dcm)           → Convert to NIfTI using dcm2niix
-//     • NIfTI (.nii, .nii.gz)  → Already BIDS-ready, copy directly
-//     • JNIfTI (.jnii, .bnii)  → Convert to NIfTI using jnifti_converter
-//   BIDS output: .nii.gz files only
-
-// fNIRS FORMATS (modality: nirs):
-//   Input formats:
-//     • SNIRF (.snirf)         → Already BIDS-ready, copy directly
-//     • Homer3 (.nirs)         → Convert to SNIRF
-//     • MATLAB (.mat)          → Convert to SNIRF
-//   BIDS output: .snirf files only
-
-// FORMAT_READY AND CONVERT_TO RULES:
-//   format_ready: true  → .nii/.nii.gz (MRI) or .snirf (fNIRS) — just copy
-//   format_ready: false → .dcm (convert_to: nifti), .jnii/.bnii (convert_to: nifti),
-//                         .mat (convert_to: snirf), .nirs (convert_to: snirf)
-
-// CRITICAL: assignment_rules subject values must be BARE IDs (no 'sub-' prefix).
-//   ✓ subject: '1'       ← correct
-//   ✗ subject: 'sub-1'   ← wrong, executor adds sub- automatically
-
-// YOUR ONLY JOB: Generate the mappings section based on the file types present.
-// Copy assignment_rules, participant_metadata, and subjects sections EXACTLY as shown below.
-
-// ═══════════════════════════════════════════════════════════════════════
-// PYTHON SUBJECT ANALYSIS (use this — do NOT re-detect subjects yourself)
-// ═══════════════════════════════════════════════════════════════════════
-// ${pythonSubjectAnalysis}
-
-// FILE EXTENSION COUNTS:
-// ${countsText}
-
-// SAMPLE FILE PATHS (use these for match patterns and bids_template):
-// ${sampleFiles}
-
-// ═══════════════════════════════════════════════════════════════════════
-// OUTPUT (Raw YAML only, no markdown, no explanation)
-// ═══════════════════════════════════════════════════════════════════════
-
-// assignment_rules:
-// ${assignmentRules}
-
-// mappings:
-//   - modality: mri             # example — generate based on file types present
-//     match: ['*.dcm', '**/*.dcm']
-//     format_ready: false
-//     convert_to: nifti
-//     filename_rules:
-//       - match_pattern: '.*'
-//         bids_template: 'sub-X_T1w.nii.gz'
-
-// participant_metadata:
-// ${participantMetadata}
-
-// subjects:
-//   count: ${subjectAnalysis?.subject_count ?? 0}
-//   id_strategy: ${subjectAnalysis?.id_mapping?.strategy_used ?? "numeric"}
-//   labels:
-// ${subjectLabels}
-//   source: python_extracted`;
-// };
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts
index e69de29..5b203e8 100644
--- a/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/plannerHelpers.ts
@@ -0,0 +1,870 @@
+// src/components/DatasetOrganizer/utils/plannerHelpers.ts
+//
+// Mirrors autobidsify/converters/planner.py
+// Owns Stage 5 of the pipeline: evidence bundle → BIDSPlan.yaml + participants.tsv
+//
+// Python equivalents:
+//   DATA_EXTENSIONS / TRIO_FILENAMES / SKIP_DIRS  → planner.py _DATA_EXTS / TRIO_NAMES
+//   extractNumericIdFromIdentifier()              → planner.py _sort_key lambda
+//   extractFromDirectoryStructure()               → planner.py _extract_subjects_from_directory_structure()
+//   extractFromFlatFilenames()                    → planner.py _extract_subjects_from_flat_filenames()
+//   generateIdMapping()                           → planner.py _write_participants_from_plan() logic
+//   extractSubjectAnalysis()                      → planner.py build_bids_plan() Steps 1 + 4
+//   parseLLMJsonResponse()                        → planner.py _parse_llm_json_response()
+//   buildOptimizedBundle()                        → planner.py build_bids_plan() Step 2
+//   getBidsPlanPrompt()                           → planner.py PROMPT_BIDS_PLAN
+//   parsePlanYaml()                               → planner.py yaml.safe_load() block
+//   collectExtraColumns()                         → planner.py _collect_extra_columns()
+//   validateSubjectCount()                        → planner.py build_bids_plan() Step 4
+//   writeParticipantsFromPlan()                   → planner.py _write_participants_from_plan()
+//   mergeParticipantsFromPlan()                   → planner.py _merge_participants_from_llm_metadata()
+//   buildBidsPlan()                               → planner.py build_bids_plan() main entry point
+import { validatePlanCoverage } from "./executorHelpers";
+import { llmBidsPlan, LLMConfig } from "./llm";
+import { load as yamlLoad } from "js-yaml";
+
+// ============================================================================
+// Types
+// ============================================================================
+export interface SubjectRecord {
+  original_id: string;
+  numeric_id: string;
+  site: string | null;
+  pattern_name: string;
+  file_count: number;
+  group?: string;
+}
+
+export interface SubjectAnalysis {
+  success: boolean;
+  method: string;
+  subject_records: SubjectRecord[];
+  subject_count: number;
+  has_site_info: boolean;
+  variants_by_subject: Record<string, any>;
+  python_generated_filename_rules: any[];
+  //   id_mapping: {
+  //     id_mapping: Record<string, string>;
+  //     reverse_mapping: Record<string, string>;
+  //     strategy_used: string;
+  //     metadata_columns: string[];
+  //   };
+}
+
+export interface BuildBidsPlanOptions {
+  evidenceBundle: any;
+  llmConfig: LLMConfig;
+  signal?: AbortSignal;
+  onStatus?: (msg: string) => void;
+}
+
+export interface BuildBidsPlanResult {
+  planYaml: string;
+  subjectAnalysis: SubjectAnalysis;
+  participantsTsv: string;
+  coverageWarnings: string[];
+}
+
+// ============================================================================
+// Constants
+// Mirrors planner.py _DATA_EXTS, evidence.py TRIO_NAMES
+// ============================================================================
+
+const DATA_EXTENSIONS =
+  //   /\.(snirf|nii|nii\.gz|dcm|mat|nirs|jnii|bnii|h5|hdf5|edf|bdf)$/i;
+  /\.(snirf|nii|nii\.gz|dcm|mat|nirs|jnii|bnii)$/i;
+
+const TRIO_FILENAMES = new Set([
+  "dataset_description.json",
+  "participants.tsv",
+  "readme.md",
+  "readme.txt",
+  "readme.rst",
+  "readme",
+]);
+
+const SKIP_DIRS = new Set([
+  "anat",
+  "func",
+  "dwi",
+  "fmap",
+  "nirs",
+  "meg",
+  "eeg",
+  "beh",
+  "perf",
+  "derivatives",
+  "sourcedata",
+  "stimuli",
+  "walking",
+  "resting",
+  "resting_state",
+  "run",
+  "ses",
+  "pd",
+  "control",
+  "hc",
+  "task",
+  "sub",
+  "dataset",
+  "data",
+  "raw",
+  "bids",
+  "output",
+  "outputs",
+  "staging",
+  "_staging",
+  "mri",
+  "fnirs",
+  "edf",
+  "dicom",
+]);
+
+// ============================================================================
+// Mirrors planner.py _parse_llm_json_response()
+// ============================================================================
+
+export const parseLLMJsonResponse = (
+  text: string,
+  stepName: string
+): any | null => {
+  if (!text?.trim()) {
+    console.warn(`${stepName}: LLM returned empty response`);
+    return null;
+  }
+  let t = text.trim();
+  if (t.startsWith("```json")) t = t.slice(7);
+  else if (t.startsWith("```")) t = t.split("\n").slice(1).join("\n");
+  if (t.endsWith("```")) t = t.slice(0, -3);
+  t = t.trim();
+
+  try {
+    return JSON.parse(t);
+  } catch {}
+
+  const match = t.match(/\{[\s\S]*\}/);
+  if (match) {
+    try {
+      return JSON.parse(match[0]);
+    } catch {}
+  }
+
+  console.warn(
+    `${stepName}: Failed to parse JSON. Preview: ${t.slice(0, 200)}`
+  );
+  return null;
+};
+
+// ============================================================================
+// Subject extraction
+// Mirrors _extract_numeric_id_from_identifier() in planner.py
+// BZZ003 → "003", sub-01 → "01", patient021 → "021"
+// ============================================================================
+
+const extractNumericIdFromIdentifier = (identifier: string): string | null => {
+  const numbers = identifier.match(/\d+/g);
+  if (!numbers) return null;
+  return numbers[numbers.length - 1];
+};
+
+// ── Step 1: Directory structure patterns
+// Mirrors _extract_subjects_from_directory_structure() in planner.py
+
+const extractFromDirectoryStructure = (
+  allFiles: string[]
+): Omit<SubjectAnalysis, "id_mapping"> | null => {
+  const patterns: Array<[RegExp, boolean, number, number | null, string]> = [
+    [/^([A-Za-z]+)_sub(\d+)$/i, true, 2, 1, "site_prefixed"], // Beijing_sub82352
+    [/^sub-(\w+)$/, false, 1, null, "standard_bids"], // sub-01
+    [/^subject[_-]?(\d+)$/i, false, 1, null, "simple"], // subject_01
+    [/^\d{3,}$/, false, 1, null, "numeric_only"], // 001
+    [/^([A-Za-z]+\d+)$/, false, 1, null, "alphanum_id"], // PD01, Control01, HC03
+  ];
+
+  const subjectRecords: SubjectRecord[] = [];
+  const seenIds = new Set<string>();
+
+  for (const filepath of allFiles) {
+    const parts = filepath.split("/");
+
+    // const dirsOnly = parts.slice(0, parts.length - 1); // Check ALL directory levels (not just first 2)
+    const dirsOnly = parts.slice(0, Math.min(2, parts.length - 1)); // only first 2 levels
+
+    for (const part of dirsOnly) {
+      // Skip known non-subject directory names
+      if (SKIP_DIRS.has(part.toLowerCase())) continue;
+
+      for (const [
+        regex,
+        hasSite,
+        idGroup,
+        siteGroup,
+        patternName,
+      ] of patterns) {
+        const match = part.match(regex);
+        if (match) {
+          const originalId = match[0];
+          if (seenIds.has(originalId)) break;
+          seenIds.add(originalId);
+          subjectRecords.push({
+            original_id: originalId,
+            numeric_id: match[idGroup] || match[0],
+            site: hasSite && siteGroup ? match[siteGroup] : null,
+            pattern_name: patternName,
+            file_count: 0,
+          });
+          break;
+        }
+      }
+    }
+  }
+
+  if (subjectRecords.length === 0) return null;
+
+  subjectRecords.sort((a, b) => {
+    const aMatch = a.original_id.match(/^([A-Za-z]+)(\d+)$/);
+    const bMatch = b.original_id.match(/^([A-Za-z]+)(\d+)$/);
+    if (aMatch && bMatch) {
+      const cmp = aMatch[1].localeCompare(bMatch[1]);
+      if (cmp !== 0) return cmp;
+      return parseInt(aMatch[2]) - parseInt(bMatch[2]);
+    }
+    return (parseInt(a.numeric_id) || 0) - (parseInt(b.numeric_id) || 0);
+  });
+
+  // Detect group from parent directory — mirrors PROMPT_BIDS_PLAN Structure 4
+  // e.g. PD/PD_01.snirf → group: "PD"
+  const subjectToParent: Record<string, string> = {};
+  for (const filepath of allFiles) {
+    const parts = filepath.split("/");
+    if (parts.length >= 3) {
+      const potentialGroup = parts[0];
+      const potentialSubject = parts[1];
+      if (
+        seenIds.has(potentialSubject) &&
+        !SKIP_DIRS.has(potentialGroup.toLowerCase())
+      ) {
+        subjectToParent[potentialSubject] = potentialGroup;
+      }
+    }
+  }
+  for (const rec of subjectRecords) {
+    if (subjectToParent[rec.original_id]) {
+      rec.group = subjectToParent[rec.original_id];
+    }
+  }
+
+  return {
+    success: true,
+    method: "directory_structure",
+    subject_records: subjectRecords,
+    subject_count: subjectRecords.length,
+    has_site_info: subjectRecords.some((r) => r.site !== null),
+    variants_by_subject: {},
+    python_generated_filename_rules: [],
+  };
+};
+
+// ── Step 2: Flat filename identifier extraction
+// Mirrors _extract_subjects_from_flat_filenames() in planner.py
+
+const extractFromFlatFilenames = (
+  allFiles: string[]
+): Omit<SubjectAnalysis, "id_mapping"> | null => {
+  const identifierToFiles: Record<string, string[]> = {};
+
+  for (const filepath of allFiles) {
+    const filename = filepath.split("/").pop()!;
+
+    // Skip trio files
+    if (TRIO_FILENAMES.has(filename.toLowerCase())) continue;
+    // Skip non-data files (PDFs, docs, JSONs that aren't data)
+    if (!DATA_EXTENSIONS.test(filename)) continue;
+    // Remove extension(s): sub-01_ses-left2s_task-FRESHMOTOR_nirs.snirf → sub-01_ses-left2s_task-FRESHMOTOR_nirs
+    const nameNoExt = filename.replace(/(\.[^.]+)+$/, "");
+
+    // Extract base identifier — alphanumeric before first underscore
+    // sub-01_ses-left2s → sub-01
+    // BZZ003_rest → BZZ003
+    // VHMCT1mm-Hip → VHMCT1mm-Hip (no underscore, take full name)
+    const match = nameNoExt.match(/^([A-Za-z0-9\-]+)/);
+    if (match) {
+      const identifier = match[1];
+      if (!identifierToFiles[identifier]) identifierToFiles[identifier] = [];
+      identifierToFiles[identifier].push(filepath);
+    }
+  }
+
+  if (Object.keys(identifierToFiles).length === 0) return null;
+
+  // Sort by extracted numeric ID if possible (mirrors sort_key in planner.py)
+  const sortedIdentifiers = Object.keys(identifierToFiles).sort((a, b) => {
+    const na = extractNumericIdFromIdentifier(a);
+    const nb = extractNumericIdFromIdentifier(b);
+    if (na && nb) return parseInt(na) - parseInt(nb);
+    return a.localeCompare(b);
+  });
+
+  const subjectRecords: SubjectRecord[] = sortedIdentifiers.map(
+    (identifier, i) => ({
+      original_id: identifier,
+      numeric_id: String(i + 1),
+      site: null,
+      pattern_name: "filename_identifier",
+      file_count: identifierToFiles[identifier].length,
+    })
+  );
+
+  // Detect group from "GROUP_NN" pattern — mirrors PROMPT_BIDS_PLAN Structure 4
+  for (const rec of subjectRecords) {
+    const groupMatch = rec.original_id.match(/^([A-Za-z]+)[_\-](\d+)$/);
+    if (groupMatch) rec.group = groupMatch[1];
+  }
+
+  return {
+    success: true,
+    method: "flat_filename_identifiers",
+    subject_records: subjectRecords,
+    subject_count: subjectRecords.length,
+    has_site_info: false,
+    variants_by_subject: {},
+    python_generated_filename_rules: [],
+  };
+};
+
+// ── ID mapping — mirrors _generate_subject_id_mapping() in planner.py
+// const generateIdMapping = (
+//     subjectInfo: Omit<SubjectAnalysis, "id_mapping">
+//   ): SubjectAnalysis["id_mapping"] => {
+//     const records = subjectInfo.subject_records;
+//     const idMapping: Record<string, string>     = {};
+//     const reverseMapping: Record<string, string> = {};
+
+//     const allAlreadyBids = records.every((r) => /^sub-\w+$/i.test(r.original_id));
+//     if (allAlreadyBids) {
+//       for (const rec of records) {
+//         const bidsId = rec.original_id.replace(/^sub-/i, "");
+//         idMapping[rec.original_id] = bidsId;
+//         reverseMapping[bidsId]     = rec.original_id;
+//       }
+//       return { id_mapping: idMapping, reverse_mapping: reverseMapping,
+//                strategy_used: "already_bids", metadata_columns: [] };
+//     }
+
+//     const extractedNumbers: Record<string, string> = {};
+//     for (const rec of records) {
+//       const nums = rec.original_id.match(/\d+/g);
+//       if (nums) extractedNumbers[rec.original_id] = nums[nums.length - 1];
+//     }
+//     const numericValues = Object.values(extractedNumbers);
+//     const allUnique = new Set(numericValues).size === numericValues.length;
+
+//     if (Object.keys(extractedNumbers).length === records.length && allUnique) {
+//       for (const rec of records) {
+//         const bidsId = extractedNumbers[rec.original_id];
+//         idMapping[rec.original_id] = bidsId;
+//         reverseMapping[bidsId]     = rec.original_id;
+//       }
+//     } else {
+//       for (let i = 0; i < records.length; i++) {
+//         const orig   = records[i].original_id;
+//         const bidsId = String(i + 1);
+//         idMapping[orig]      = bidsId;
+//         reverseMapping[bidsId] = orig;
+//       }
+//     }
+
+//     return { id_mapping: idMapping, reverse_mapping: reverseMapping,
+//              strategy_used: "numeric", metadata_columns: ["original_id"] };
+//   };
+
+// export const extractSubjectAnalysis = (
+//   allFiles: string[],
+//   userNSubjects?: number | null,
+//   dominantPrefixes?: { prefix: string; count: number; percentage: number }[]
+// ): SubjectAnalysis => {
+//   // Step 1: directory structure
+//   let subjectInfo = extractFromDirectoryStructure(allFiles);
+
+//   // Step 2: flat filename fallback
+//   if (!subjectInfo || subjectInfo.subject_records.length === 0) {
+//     subjectInfo = extractFromFlatFilenames(allFiles);
+//   }
+
+//   if (!subjectInfo || subjectInfo.subject_records.length === 0) {
+//     return {
+//       success: false,
+//       method: "none",
+//       subject_records: [],
+//       subject_count: 0,
+//       has_site_info: false,
+//       variants_by_subject: {},
+//       python_generated_filename_rules: [],
+//       id_mapping: {
+//         id_mapping: {},
+//         reverse_mapping: {},
+//         strategy_used: "none",
+//         metadata_columns: [],
+//       },
+//     };
+//   }
+
+//   // ── CRITICAL validation: mirrors planner.py lines 190-215
+//   // If extracted count doesn't match user hint but dominant prefixes do,
+//   // fall back to dominant prefixes (handles VHM/VHF body-part over-extraction)
+//   const pythonCount = subjectInfo.subject_count;
+//   if (
+//     userNSubjects &&
+//     pythonCount !== userNSubjects &&
+//     dominantPrefixes &&
+//     dominantPrefixes.length === userNSubjects
+//   ) {
+//     subjectInfo = {
+//       success: true,
+//       method: "dominant_prefix_fallback",
+//       subject_records: dominantPrefixes.map((p, i) => ({
+//         original_id: p.prefix,
+//         numeric_id: String(i + 1),
+//         site: null,
+//         pattern_name: "dominant_prefix",
+//         file_count: p.count,
+//       })),
+//       subject_count: dominantPrefixes.length,
+//       has_site_info: false,
+//       variants_by_subject: {},
+//       python_generated_filename_rules: [],
+//     };
+//   }
+//   // bug fix for subject mapping
+//   // === original
+//   // const idMapping = generateIdMapping(subjectInfo);
+//   // return { ...subjectInfo, id_mapping: idMapping };
+//   // ==== end
+//   // ==== updates
+//   // CRITICAL: n_subjects is authoritative (mirrors planner.py PROMPT_BIDS_PLAN)
+//   // If analysis count doesn't match user input, fall back to sequential numbering
+//   const expectedCount = userNSubjects;
+//   if (expectedCount && subjectInfo.subject_count !== expectedCount) {
+//     const idMap: Record<string, string> = {};
+//     const reverseMap: Record<string, string> = {};
+//     for (let i = 1; i <= expectedCount; i++) {
+//       const bidsId = String(i).padStart(2, "0");
+//       idMap[`sub-${bidsId}`] = bidsId;
+//       reverseMap[bidsId] = `sub-${bidsId}`;
+//     }
+//     return {
+//       ...subjectInfo,
+//       subject_count: expectedCount,
+//       id_mapping: {
+//         id_mapping: idMap,
+//         reverse_mapping: reverseMap,
+//         strategy_used: "numeric_fallback",
+//         metadata_columns: [],
+//       },
+//     };
+//   }
+
+//   const idMapping = generateIdMapping(subjectInfo);
+//   return { ...subjectInfo, id_mapping: idMapping };
+// };
+
+// ============================================================================
+// collectExtraColumns()
+// Mirrors _collect_extra_columns() in planner.py
+// ============================================================================
+
+const collectExtraColumns = (
+  metadata: Record<string, Record<string, any>>
+): string[] => {
+  const seen = new Set<string>();
+  const cols: string[] = [];
+  for (const meta of Object.values(metadata)) {
+    for (const col of Object.keys(meta)) {
+      if (!seen.has(col) && col !== "participant_id") {
+        seen.add(col);
+        cols.push(col);
+      }
+    }
+  }
+  return cols;
+};
+
+// ============================================================================
+// writeParticipantsFromPlan()
+// Mirrors _write_participants_from_plan() in planner.py
+// ============================================================================
+
+export const writeParticipantsFromPlan = (
+  planYaml: any,
+  userNSubjects: number | null
+): string => {
+  const rules: any[] = planYaml?.assignment_rules || [];
+  const labels: any[] = planYaml?.subjects?.labels || [];
+  const metadata: Record<string, any> = planYaml?.participant_metadata || {};
+
+  // Collect ordered subject IDs from assignment_rules first, then labels
+  const seen = new Set<string>();
+  const ordered: string[] = [];
+  for (const rule of rules) {
+    const sid = String(rule?.subject ?? "");
+    if (sid && !seen.has(sid)) {
+      seen.add(sid);
+      ordered.push(sid);
+    }
+  }
+  if (ordered.length === 0) {
+    for (const lbl of labels) {
+      const sid = String(lbl);
+      if (sid && !seen.has(sid)) {
+        seen.add(sid);
+        ordered.push(sid);
+      }
+    }
+  }
+
+  if (userNSubjects && ordered.length < userNSubjects) {
+    console.warn(
+      `writeParticipantsFromPlan: plan has ${ordered.length} subjects ` +
+        `but user specified ${userNSubjects}. LLM assignment_rules may be incomplete.`
+    );
+  }
+
+  const extraColumns = collectExtraColumns(metadata);
+  // Always include original_id — mirrors Python metadata_columns: ["original_id"]
+  // const allExtra = ["original_id", ...extraColumns.filter((c) => c !== "original_id")];
+  // const columns  = ["participant_id", ...allExtra];
+  const columns = ["participant_id", ...extraColumns];
+
+  const sortKey = (sid: string): [number, number, string] => {
+    const n = parseInt(sid);
+    return isNaN(n) ? [1, 0, sid] : [0, n, sid];
+  };
+
+  const sortedIds = [...ordered].sort((a, b) => {
+    const [at, an, as_] = sortKey(a);
+    const [bt, bn, bs] = sortKey(b);
+    if (at !== bt) return at - bt;
+    if (an !== bn) return an - bn;
+    return as_.localeCompare(bs);
+  });
+
+  const header = columns.join("\t");
+  const rows = sortedIds.map((sid) => {
+    const meta = metadata[sid] || {};
+    return columns
+      .map((col) => {
+        if (col === "participant_id") return `sub-${sid}`;
+        return String(meta[col] ?? "n/a");
+      })
+      .join("\t");
+  });
+
+  return [header, ...rows].join("\n");
+};
+
+// ============================================================================
+// mergeParticipantsFromPlan()
+// Mirrors _merge_participants_from_llm_metadata() in planner.py
+// Appends extra columns from BIDSPlan participant_metadata into existing TSV
+// ============================================================================
+
+export const mergeParticipantsFromPlan = (
+  existingTsv: string,
+  planYaml: any
+): string => {
+  const metadata = planYaml?.participant_metadata || {};
+  const extraColumns = collectExtraColumns(metadata);
+  if (extraColumns.length === 0) return existingTsv;
+
+  const lines = existingTsv.split("\n").filter((l) => l.trim());
+  if (lines.length === 0) return existingTsv;
+
+  const header = lines[0].split("\t");
+  const newCols = extraColumns.filter((c) => !header.includes(c));
+  if (newCols.length === 0) return existingTsv;
+
+  const newHeader = [...header, ...newCols].join("\t");
+  const newRows = lines.slice(1).map((line) => {
+    const cells = line.split("\t");
+    const sid = cells[0].replace(/^sub-/, "");
+    const meta = metadata[sid] || {};
+    return [...cells, ...newCols.map((col) => String(meta[col] ?? "n/a"))].join(
+      "\t"
+    );
+  });
+
+  return [newHeader, ...newRows].join("\n");
+};
+
+// ============================================================================
+// sampleDataFiles()
+// Mirrors build_bids_plan() Step 2 sampling:
+//   ≤200 files → use all; >200 → sample beginning + middle + end
+// ============================================================================
+
+const sampleDataFiles = (allFiles: string[], maxFiles = 200): string[] => {
+  const dataFiles = allFiles.filter((f) => {
+    const low = f.toLowerCase();
+    if (low.endsWith(".nii.gz")) return true;
+    const ext = low.includes(".") ? "." + low.split(".").pop()! : "";
+    return new Set([
+      ".snirf",
+      ".nirs",
+      ".mat",
+      ".dcm",
+      ".nii",
+      ".jnii",
+      ".bnii",
+      ".nii.gz",
+    ]).has(ext);
+  });
+
+  if (dataFiles.length <= maxFiles) return dataFiles;
+
+  const n = dataFiles.length;
+  const indices = new Set([
+    ...Array.from({ length: Math.min(50, n) }, (_, i) => i),
+    ...Array.from({ length: 50 }, (_, i) => Math.floor(n / 2) - 25 + i),
+    ...Array.from({ length: Math.min(50, n) }, (_, i) => n - 50 + i),
+  ]);
+  return [...indices]
+    .filter((i) => i >= 0 && i < n)
+    .sort((a, b) => a - b)
+    .map((i) => dataFiles[i]);
+};
+
+// ============================================================================
+// buildOptimizedBundle()
+// Mirrors build_bids_plan() Step 2 — strips evidence bundle to lean payload.
+// Python explicitly excludes documents[], participant_metadata_evidence,
+// full all_files[] to keep the prompt token count low.
+// ============================================================================
+
+const buildOptimizedBundle = (
+  evidenceBundle: any,
+  subjectAnalysis: SubjectAnalysis
+): object => {
+  const allFiles: string[] = evidenceBundle?.all_files || [];
+  const sampleFiles = sampleDataFiles(allFiles, 200);
+
+  const DATA_EXT_SET = new Set([
+    ".snirf",
+    ".nirs",
+    ".mat",
+    ".dcm",
+    ".nii",
+    ".jnii",
+    ".bnii",
+    ".nii.gz",
+  ]);
+  const countsFiltered = Object.fromEntries(
+    Object.entries(evidenceBundle?.counts_by_ext || {}).filter(([ext]) =>
+      DATA_EXT_SET.has(ext.toLowerCase())
+    )
+  );
+
+  return {
+    root: evidenceBundle?.root,
+    counts_by_ext: countsFiltered,
+    user_hints: evidenceBundle?.user_hints,
+    total_files: allFiles.length,
+    data_files: allFiles.filter((f) => DATA_EXTENSIONS.test(f)).length,
+    sample_files: sampleFiles,
+    structure_hint: evidenceBundle?.structure_hint,
+    python_subject_analysis: {
+      success: subjectAnalysis.success,
+      method: subjectAnalysis.method,
+      subject_count: subjectAnalysis.subject_count,
+      subject_examples: subjectAnalysis.subject_records
+        .slice(0, 20)
+        .map((r) => ({
+          original: r.original_id,
+          numeric_id: r.numeric_id,
+          site: r.site ?? null,
+          //   group: r.group ?? null,
+        })),
+      note:
+        "This is a HINT from heuristic detection. " +
+        "Trust user_hints.n_subjects over this count. " +
+        "Use your own analysis of sample_files to determine the true subject structure.",
+    },
+  };
+};
+
+// ============================================================================
+// parsePlanYaml()
+// Mirrors yaml.safe_load() + validation in build_bids_plan() Step 3
+// ============================================================================
+
+export const parsePlanYaml = (responseText: string): any | null => {
+  let text = responseText.trim();
+  if (text.startsWith("```yaml")) text = text.slice(7);
+  else if (text.startsWith("```")) text = text.split("\n").slice(1).join("\n");
+  if (text.endsWith("```")) text = text.slice(0, -3);
+  text = text.trim();
+
+  try {
+    const parsed = yamlLoad(text);
+    if (parsed && typeof parsed === "object") return parsed;
+    return { _raw: text, _parseError: "parsed value is not an object" };
+  } catch (e) {
+    console.warn("parsePlanYaml: YAML parse failed", e);
+    return { _raw: text, _parseError: String(e) };
+  }
+};
+
+// ============================================================================
+// validateSubjectCount()
+// Mirrors build_bids_plan() Step 4
+// Trusts LLM assignment_rules; only patches the count field if needed
+// ============================================================================
+
+const validateSubjectCount = (
+  planYaml: any,
+  userNSubjects: number | null
+): any => {
+  const llmCount = planYaml?.subjects?.count ?? 0;
+  if (userNSubjects && llmCount !== userNSubjects) {
+    console.warn(
+      `validateSubjectCount: LLM count (${llmCount}) ≠ user count (${userNSubjects}). ` +
+        `Trusting LLM assignment_rules, updating count field only.`
+    );
+    return {
+      ...planYaml,
+      subjects: { ...(planYaml.subjects || {}), count: userNSubjects },
+    };
+  }
+  return planYaml;
+};
+
+// ============================================================================
+// buildBidsPlan()
+// Main entry point — mirrors build_bids_plan() in planner.py
+// Called by handleGeneratePlan() in LLMPanel.tsx
+// ============================================================================
+
+export const buildBidsPlan = async (
+  opts: BuildBidsPlanOptions
+): Promise<BuildBidsPlanResult> => {
+  const { evidenceBundle, llmConfig, signal, onStatus } = opts;
+  const log = (msg: string) => {
+    console.log(msg);
+    onStatus?.(msg);
+  };
+
+  const allFiles: string[] = evidenceBundle?.all_files || [];
+  const userHints = evidenceBundle?.user_hints || {};
+  const userNSubjects: number | null = userHints?.n_subjects ?? null;
+
+  // ── Step 1: Python structural hints (advisory) ────────────────────
+  log("Step 1: Extracting subject hints...");
+
+  let rawSubjectInfo = extractFromDirectoryStructure(allFiles);
+  if (!rawSubjectInfo || rawSubjectInfo.subject_records.length === 0) {
+    log("  Directory-level detection failed, trying flat filename analysis...");
+    rawSubjectInfo = extractFromFlatFilenames(allFiles);
+  }
+
+  // Attach id_mapping so buildOptimizedBundle() has full SubjectAnalysis shape
+  //   const subjectAnalysis: SubjectAnalysis = rawSubjectInfo
+  //     ? { ...rawSubjectInfo, id_mapping: generateIdMapping(rawSubjectInfo) }
+  //     : {
+  //         success: false, method: "none", subject_records: [],
+  //         subject_count: 0, has_site_info: false,
+  //         variants_by_subject: {}, python_generated_filename_rules: [],
+  //         id_mapping: { id_mapping: {}, reverse_mapping: {},
+  //                       strategy_used: "none", metadata_columns: [] },
+  //       };
+  const subjectAnalysis: SubjectAnalysis = rawSubjectInfo ?? {
+    success: false,
+    method: "none",
+    subject_records: [],
+    subject_count: 0,
+    has_site_info: false,
+    variants_by_subject: {},
+    python_generated_filename_rules: [],
+  };
+  log(
+    `  ${subjectAnalysis.subject_count} subjects (method: ${subjectAnalysis.method})`
+  );
+
+  // ── Step 2: Build optimized LLM payload ───────────────────────────
+  log("Step 2: Building LLM payload...");
+  const optimizedBundle = buildOptimizedBundle(evidenceBundle, subjectAnalysis);
+  const payload = JSON.stringify(optimizedBundle, null, 2);
+  log(`  Sample files: ${(optimizedBundle as any).sample_files?.length ?? 0}`);
+
+  // ── Step 3: Call LLM ──────────────────────────────────────────────
+  log(`Step 3: Calling LLM (${llmConfig.model})...`);
+  const raw = await llmBidsPlan(payload, llmConfig, signal);
+  if (!raw) throw new Error("LLM returned empty response for BIDS plan");
+
+  // ── Step 3b: Parse YAML ───────────────────────────────────────────
+  let planYaml = parsePlanYaml(raw);
+  if (!planYaml || planYaml._parseError) {
+    throw new Error(
+      `BIDS plan YAML parsing failed: ${planYaml?._parseError ?? "unknown"}`
+    );
+  }
+
+  // ── Step 4: Validate subject count ───────────────────────────────
+  log("Step 4: Validating subject count...");
+  planYaml = validateSubjectCount(planYaml, userNSubjects);
+  log(`  Final count: ${planYaml?.subjects?.count ?? "unknown"}`);
+
+  // ── Step 4b: Validate plan coverage against sample files ─────────
+  // Uses executorHelpers.validatePlanCoverage() — surfaces LLM pattern errors early
+  const sampleFiles: string[] = (optimizedBundle as any).sample_files || [];
+  const coverage = validatePlanCoverage(sampleFiles, planYaml?.mappings || []);
+  if (coverage.warnings.length > 0) {
+    coverage.warnings.forEach((w) => log(`  ⚠ ${w}`));
+  } else {
+    log(`  ✓ Coverage: ${coverage.coveragePercent}% of sample files matched`);
+  }
+
+  // ── Step 5: Write participants.tsv ────────────────────────────────
+  log("Step 5: Generating participants.tsv...");
+  const participantsTsv = writeParticipantsFromPlan(planYaml, userNSubjects);
+
+  // ── Step 6: Merge extra metadata columns ──────────────────────────
+  log("Step 6: Merging participant metadata columns...");
+  const mergedTsv = mergeParticipantsFromPlan(participantsTsv, planYaml);
+
+  // ── Step 7: Inject plan metadata ─────────────────────────────────
+  // Mirrors plan_yaml["metadata"] = {...} in planner.py Step 7
+  planYaml.metadata = {
+    generated_at: new Date().toISOString(),
+    model: llmConfig.model,
+    id_strategy: "auto",
+  };
+
+  // ── Step MAT: inject mat_mapping_path into nirs mappings ─────────
+  log("Step MAT: .mat mapping deferred to local execute step");
+  const matFiles = allFiles.filter((f) => f.toLowerCase().endsWith(".mat"));
+  if (matFiles.length > 0) {
+    for (const mapping of planYaml?.mappings || []) {
+      if (mapping?.modality === "nirs") {
+        const patterns: string[] = mapping?.match || [];
+        const coversMat =
+          patterns.length === 0 ||
+          patterns.some(
+            (p: string) => p.toLowerCase().includes(".mat") || p === "**/*.mat"
+          );
+        if (coversMat) {
+          mapping.mat_mapping_path = "_staging/mat_mapping.json";
+        }
+      }
+    }
+  }
+
+  // Preserve raw YAML string for saving
+  const planYamlStr = raw.startsWith("```") ? planYaml._raw ?? raw : raw;
+
+  log("✓ BIDSPlan complete");
+  return {
+    planYaml: planYamlStr,
+    subjectAnalysis,
+    participantsTsv: mergedTsv,
+    coverageWarnings: coverage.warnings,
+  };
+};
diff --git a/src/components/User/Dashboard/DatasetOrganizer/utils/trioHelpers.ts b/src/components/User/Dashboard/DatasetOrganizer/utils/trioHelpers.ts
new file mode 100644
index 0000000..064cb1d
--- /dev/null
+++ b/src/components/User/Dashboard/DatasetOrganizer/utils/trioHelpers.ts
@@ -0,0 +1,762 @@
+// src/components/DatasetOrganizer/utils/trioHelpers.ts
+//
+// Mirrors autobidsify/stages/trio.py
+// Owns Stage 4 of the pipeline: evidence bundle → trio files
+// (dataset_description.json, README.md, participants.tsv)
+//
+// Python equivalents:
+//   normalizeLicenseLocally()       → normalize_license_locally()
+//   checkTrioStatus()               → check_trio_status()
+//   isMarkdownContent()             → _is_markdown_content()
+//   validateDatasetDescription()    → _validate_dataset_description()
+//   fixFieldTypes()                 → _fix_field_types()
+//   parseLLMJsonResponse()          → _parse_llm_json_response()
+//   generateDatasetDescription()    → generate_dataset_description()
+//   generateReadme()                → generate_readme()
+//   generateParticipants()          → generate_participants()
+//   generateTrioFiles()             → trio_generate_all() — main entry point
+import {
+  callLLM,
+  llmTrioDatasetDescription,
+  llmTrioReadme,
+  LLMConfig,
+} from "./llm";
+import { FileItem } from "redux/projects/types/projects.interface";
+import { OllamaService } from "services/ollama.service";
+
+export type TrioLLMConfig = LLMConfig;
+// ── License whitelist — mirrors LICENSE_WHITELIST in constants.py ─────────────
+export const LICENSE_WHITELIST = new Set([
+  "PDDL",
+  "CC0",
+  "PD",
+  "CC-BY-4.0",
+  "CC-BY-SA-4.0",
+  "BSD-3-Clause",
+  "BSD-2-Clause",
+  "CDDL-1.0",
+  "MPL",
+  "MIT",
+  "GPL-2.0",
+  "GPL-2.0+",
+  "GPL-3.0",
+  "GPL-3.0+",
+  "LGPL-3.0+",
+  "GFDL-1.3",
+  "CC-BY-NC-4.0",
+  "CC-BY-NC-SA-4.0",
+  "CC-BY-NC-ND-4.0",
+  "Non-Standard",
+]);
+
+// ============================================================================
+// checkTrioStatus()
+// Mirrors check_trio_status() in trio.py
+// Checks VFS FileItem[] instead of disk
+// ============================================================================
+
+export interface TrioStatus {
+  dataset_description: {
+    exists: boolean;
+    source: "user" | "ai" | null;
+    content: string | null;
+  };
+  readme: {
+    exists: boolean;
+    source: "user" | "ai" | null;
+    content: string | null;
+  };
+  participants: {
+    exists: boolean;
+    source: "user" | "ai" | null;
+    content: string | null;
+  };
+}
+
+export const checkTrioStatus = (files: FileItem[]): TrioStatus => {
+  const findFile = (predicate: (f: FileItem) => boolean) =>
+    files.find(predicate) ?? null;
+
+  const dd = findFile((f) => f.name === "dataset_description.json");
+  const readme = findFile((f) =>
+    ["README.md", "README.txt", "README.rst", "readme.md"].includes(f.name)
+  );
+  const participants = findFile((f) => f.name === "participants.tsv");
+
+  return {
+    dataset_description: {
+      exists: !!dd,
+      source: dd ? (dd.source as "user" | "ai") : null,
+      content: dd?.content ?? null,
+    },
+    readme: {
+      exists: !!readme,
+      source: readme ? (readme.source as "user" | "ai") : null,
+      content: readme?.content ?? null,
+    },
+    participants: {
+      exists: !!participants,
+      source: participants ? (participants.source as "user" | "ai") : null,
+      content: participants?.content ?? null,
+    },
+  };
+};
+
+// ============================================================================
+// normalizeLicenseLocally()
+// Mirrors normalize_license_locally() in trio.py
+// Full alias table — handles natural language, abbreviations, typos
+// ============================================================================
+
+export const normalizeLicenseLocally = (licenseStr: string): string | null => {
+  if (!licenseStr) return null;
+
+  // Normalize: strip separators, uppercase
+  const key = licenseStr.toUpperCase().replace(/[\s\-\._]+/g, "");
+
+  const ALIAS_TABLE: Record<string, string[]> = {
+    CC0: [
+      "CC0",
+      "CC010",
+      "CC01",
+      "CREATIVECOMMONSZERO",
+      "CREATIVECOMMONS0",
+      "CC0UNIVERSALPUBLICDOMAIN",
+      "CC010UNIVERSAL",
+      "CC0UNIVERSAL",
+      "ZERORIGHTSPUBLICDOMAIN",
+      "CC0LICENSE",
+    ],
+    PD: ["PD", "PUBLICDOMAIN", "PUBLIEDOMAIN"],
+    PDDL: ["PDDL", "PDDL10", "PUBLICDOMAINDEDICATIONLICENSE"],
+    "CC-BY-4.0": [
+      "CCBY40",
+      "CCBY4",
+      "CCBY",
+      "CREATIVECOMMONSATTRIBUTION40",
+      "CREATIVECOMMONSATTRIBUTION4",
+      "CREATIVECOMMONSATTRIBUTION40INTERNATIONAL",
+    ],
+    "CC-BY-SA-4.0": [
+      "CCBYSA40",
+      "CCBYSA4",
+      "CCBYSA",
+      "CREATIVECOMMONSATTRIBUTIONSHAREALIKE40",
+    ],
+    "CC-BY-NC-4.0": [
+      "CCBYNC40",
+      "CCBYNC4",
+      "CCBYNC",
+      "CREATIVECOMMONSATTRIBUTIONNONCOMMERCIAL40",
+    ],
+    "CC-BY-NC-SA-4.0": ["CCBYNCSA40", "CCBYNCSA4"],
+    "CC-BY-NC-ND-4.0": ["CCBYNCND40", "CCBYNCND4"],
+    MIT: ["MIT", "MITLICENSE", "MITOPENSOURCE"],
+    "BSD-3-Clause": ["BSD3CLAUSE", "BSD3", "BSDNEW", "BSDREVISED"],
+    "BSD-2-Clause": ["BSD2CLAUSE", "BSD2", "BSDORIGINAL", "BSDSIMPLIFIED"],
+    "GPL-2.0": ["GPL20", "GPL2", "GNUGPL2"],
+    "GPL-2.0+": ["GPL20+", "GPL2+", "GPL2ORLATER"],
+    "GPL-3.0": ["GPL30", "GPL3", "GNUGPL3"],
+    "GPL-3.0+": ["GPL30+", "GPL3+", "GPL3ORLATER"],
+    "LGPL-3.0+": ["LGPL30+", "LGPL3+", "LGPL3ORLATER"],
+    MPL: ["MPL", "MPL20", "MPL2", "MOZILLAPUBLICLICENSE"],
+    "CDDL-1.0": ["CDDL", "CDDL10"],
+    "GFDL-1.3": ["GFDL", "GFDL13"],
+    "Non-Standard": [
+      "NONSTANDARD",
+      "CUSTOM",
+      "OTHER",
+      "PROPRIETARY",
+      "RESTRICTED",
+    ],
+  };
+
+  for (const [canonical, variants] of Object.entries(ALIAS_TABLE)) {
+    if (variants.includes(key)) return canonical;
+  }
+
+  return "Non-Standard";
+};
+
+// ============================================================================
+// parseLLMJsonResponse()
+// Mirrors _parse_llm_json_response() in trio.py
+// Shared utility — also used in plannerHelpers.ts
+// ============================================================================
+
+export const parseLLMJsonResponse = (
+  text: string,
+  stepName: string
+): any | null => {
+  if (!text?.trim()) {
+    console.warn(`${stepName}: LLM returned empty response`);
+    return null;
+  }
+
+  let t = text.trim();
+  if (t.startsWith("```json")) t = t.slice(7);
+  else if (t.startsWith("```")) t = t.split("\n").slice(1).join("\n");
+  if (t.endsWith("```")) t = t.slice(0, -3);
+  t = t.trim();
+
+  // Direct parse
+  try {
+    return JSON.parse(t);
+  } catch {}
+
+  // raw_decode equivalent: find first complete JSON object
+  const match = t.match(/\{[\s\S]*\}/);
+  if (match) {
+    try {
+      return JSON.parse(match[0]);
+    } catch {}
+  }
+
+  console.warn(
+    `${stepName}: Failed to parse JSON. Preview: ${t.slice(0, 200)}`
+  );
+  return null;
+};
+
+// ============================================================================
+// isMarkdownContent()
+// Mirrors _is_markdown_content() in trio.py
+// ============================================================================
+
+export const isMarkdownContent = (text: string): boolean => {
+  const t = text.trim();
+  return (
+    t.startsWith("#") ||
+    t.startsWith("##") ||
+    t.includes("# ") ||
+    t.includes("\n## ") ||
+    t.startsWith("**") ||
+    t.slice(0, 100).includes("- ") ||
+    t.includes("\n- ")
+  );
+};
+
+// ============================================================================
+// validateDatasetDescription()
+// Mirrors _validate_dataset_description() in trio.py
+// ============================================================================
+
+export const validateDatasetDescription = (
+  dd: Record<string, any>
+): { isValid: boolean; issues: string[] } => {
+  const issues: string[] = [];
+
+  if (!dd.Name) issues.push("Missing required field: Name");
+  if (!dd.BIDSVersion) issues.push("Missing required field: BIDSVersion");
+  if (!dd.License) issues.push("Missing required field: License");
+  else if (!LICENSE_WHITELIST.has(dd.License))
+    issues.push(`License '${dd.License}' not in BIDS whitelist`);
+
+  for (const field of ["Authors", "Funding", "EthicsApprovals"]) {
+    if (dd[field] !== undefined && !Array.isArray(dd[field]))
+      issues.push(`${field} must be an array`);
+  }
+
+  if (dd.License === "Non-Standard" && !dd.DataLicense)
+    issues.push("License='Non-Standard' requires DataLicense field");
+
+  const empty = Object.entries(dd)
+    .filter(([, v]) => v === "" || (Array.isArray(v) && v.length === 0))
+    .map(([k]) => k);
+  if (empty.length > 0)
+    issues.push(`Empty fields (will be removed): ${empty.join(", ")}`);
+
+  const isValid = !issues.some(
+    (i) => i.includes("Missing required") || i.includes("must be an array")
+  );
+  return { isValid, issues };
+};
+
+// ============================================================================
+// fixFieldTypes()
+// Mirrors _fix_field_types() in trio.py
+// Converts string → array for Authors/Funding/EthicsApprovals,
+// removes empty strings
+// ============================================================================
+
+export const fixFieldTypes = (
+  dd: Record<string, any>
+): { fixed: Record<string, any>; fixes: string[] } => {
+  const fixed = { ...dd };
+  const fixes: string[] = [];
+
+  for (const field of ["Authors", "Funding", "EthicsApprovals"]) {
+    if (!(field in fixed)) continue;
+    const val = fixed[field];
+    if (typeof val === "string") {
+      if (val.trim()) {
+        fixed[field] = [val];
+        fixes.push(`Converted ${field} from string to array`);
+      } else {
+        delete fixed[field];
+      }
+    } else if (Array.isArray(val) && val.length === 0) {
+      delete fixed[field];
+    }
+  }
+
+  // Remove empty strings except required fields
+  const required = new Set(["Name", "BIDSVersion", "DatasetType", "License"]);
+  for (const [k, v] of Object.entries(fixed)) {
+    if (v === "" && !required.has(k)) delete fixed[k];
+  }
+
+  return { fixed, fixes };
+};
+
+// ============================================================================
+// LLM call config type
+// ============================================================================
+
+// export interface TrioLLMConfig {
+//   provider: string;
+//   model: string;
+//   apiKey: string;
+//   baseUrl: string;
+//   isAnthropic?: boolean;
+//   noApiKey?: boolean;
+// }
+
+export interface GenerateTrioOptions {
+  evidenceBundle: any;
+  files: FileItem[];
+  llmConfig: TrioLLMConfig;
+  signal?: AbortSignal;
+  onStatus?: (msg: string) => void;
+}
+
+export interface GenerateTrioResult {
+  datasetDesc: Record<string, any>;
+  readmeContent: string;
+  participantsTsv: string;
+  skipped: { datasetDesc: boolean; readme: boolean; participants: boolean };
+}
+
+// ============================================================================
+// callTrioLLM() — internal LLM dispatcher
+// ============================================================================
+
+const callTrioLLM = async (
+  prompt: string,
+  llmConfig: TrioLLMConfig,
+  maxTokens: number = 2048,
+  signal?: AbortSignal
+): Promise<string> => {
+  const { provider, model, apiKey, baseUrl, isAnthropic, noApiKey } = llmConfig;
+
+  if (provider === "ollama") {
+    const res = await OllamaService.chat(model, [
+      { role: "user", content: prompt },
+    ]);
+    return res?.choices?.[0]?.message?.content ?? "";
+  }
+
+  if (isAnthropic) {
+    const res = await fetch(baseUrl, {
+      method: "POST",
+      signal,
+      headers: {
+        "Content-Type": "application/json",
+        "x-api-key": apiKey,
+        "anthropic-version": "2023-06-01",
+      },
+      body: JSON.stringify({
+        model,
+        max_tokens: maxTokens,
+        messages: [{ role: "user", content: prompt }],
+      }),
+    });
+    const data = await res.json();
+    return data?.content?.[0]?.text ?? "";
+  }
+
+  const res = await fetch(baseUrl, {
+    method: "POST",
+    signal,
+    headers: {
+      "Content-Type": "application/json",
+      ...(noApiKey ? {} : { Authorization: `Bearer ${apiKey}` }),
+    },
+    body: JSON.stringify({
+      model,
+      messages: [{ role: "user", content: prompt }],
+      max_tokens: maxTokens,
+    }),
+  });
+  const data = await res.json();
+  return data?.choices?.[0]?.message?.content ?? "";
+};
+
+// ============================================================================
+// generateDatasetDescription()
+// Mirrors generate_dataset_description() in trio.py
+//
+// Key design (mirrors Python):
+//   LLM outputs 'raw_license' (natural language, no format constraints)
+//   Python/TS normalizes: raw_license → normalizeLicenseLocally() → BIDS canonical
+//   This is robust: user can write anything, LLM understands it, we map it.
+// ============================================================================
+
+const generateDatasetDescription = async (
+  evidenceBundle: any,
+  existingContent: string | null,
+  llmConfig: TrioLLMConfig,
+  signal?: AbortSignal
+): Promise<Record<string, any>> => {
+  let existingDD: Record<string, any> | null = null;
+  if (existingContent) {
+    try {
+      existingDD = JSON.parse(existingContent);
+    } catch {}
+  }
+
+  const raw = await llmTrioDatasetDescription(
+    JSON.stringify({
+      user_hints: evidenceBundle?.user_hints ?? {},
+      documents: (evidenceBundle?.documents ?? []).map((d: any) => ({
+        filename: d.filename,
+        content: (d.content || "").slice(0, 500),
+      })),
+      counts_by_ext: evidenceBundle?.counts_by_ext ?? {},
+      existing: existingDD,
+    }),
+    llmConfig,
+    signal
+  );
+  const result = parseLLMJsonResponse(raw, "dataset_description");
+
+  // Extract raw_license — mirrors Python's multi-location search
+  const rawLicense: string =
+    result?.raw_license ||
+    result?.dataset_description?.raw_license ||
+    result?.dataset_description?.License ||
+    existingDD?.License ||
+    "";
+
+  // Python-side normalization — mirrors normalize_license_locally() call
+  const normalizedLicense = rawLicense
+    ? normalizeLicenseLocally(rawLicense)
+    : null;
+
+  const llmDD = result?.dataset_description || {};
+
+  // Merge: existingDD < llmDD, then apply normalized license (highest priority)
+  const merged: Record<string, any> = {};
+  Object.assign(merged, llmDD);
+  if (existingDD) {
+    for (const [k, v] of Object.entries(existingDD)) {
+      if (v) merged[k] = v; // existing wins only if non-empty
+    }
+  }
+  delete merged.raw_license; // remove if LLM put it inside
+
+  if (normalizedLicense) {
+    merged.License = normalizedLicense;
+  }
+
+  // Build final structure — mirrors Python's required_structure assembly
+  const final: Record<string, any> = {
+    Name: merged.Name || "",
+    BIDSVersion: "1.10.0",
+    DatasetType: merged.DatasetType || "raw",
+    License: merged.License || "",
+  };
+
+  // Array fields — mirrors _fix_field_types()
+  for (const field of ["Authors", "Funding", "EthicsApprovals"]) {
+    const val = merged[field];
+    if (!val) continue;
+    if (typeof val === "string" && val.trim()) final[field] = [val];
+    else if (Array.isArray(val) && val.length > 0) final[field] = val;
+  }
+
+  // Optional scalar fields
+  for (const field of [
+    "Acknowledgements",
+    "HowToAcknowledge",
+    "DatasetDOI",
+    "HEDVersion",
+    "DataLicense",
+  ]) {
+    if (merged[field]) final[field] = merged[field];
+  }
+
+  // Optional array fields
+  for (const field of ["ReferencesAndLinks", "GeneratedBy", "SourceDatasets"]) {
+    if (Array.isArray(merged[field]) && merged[field].length > 0)
+      final[field] = merged[field];
+  }
+
+  const finalLic = final.License;
+  if (finalLic && !LICENSE_WHITELIST.has(finalLic)) {
+    const again = normalizeLicenseLocally(finalLic);
+    if (again) final.License = again;
+  }
+
+  // Remove empty strings
+  for (const [k, v] of Object.entries(final)) {
+    if (v === "" || (Array.isArray(v) && v.length === 0)) delete final[k];
+  }
+
+  return final;
+};
+
+// ============================================================================
+// generateReadme()
+// Mirrors generate_readme() in trio.py
+// ============================================================================
+
+const generateReadme = async (
+  evidenceBundle: any,
+  llmConfig: TrioLLMConfig,
+  signal?: AbortSignal
+): Promise<string> => {
+  const raw = await llmTrioReadme(
+    JSON.stringify({
+      documents: (evidenceBundle?.documents ?? []).map((d: any) => ({
+        filename: d.filename,
+        content: (d.content || "").slice(0, 500),
+      })),
+      user_hints: evidenceBundle?.user_hints ?? {},
+      existing_readme: null,
+    }),
+    llmConfig,
+    signal
+  );
+
+  if (isMarkdownContent(raw)) return raw.trim();
+
+  const result = parseLLMJsonResponse(raw, "README");
+  return result?.readme_content ?? "# Dataset\n\nNeuroimaging dataset.\n";
+};
+// ============================================================================
+// generateParticipants()
+// Mirrors generate_participants() in trio.py
+//
+// Critical design (mirrors Python):
+//   LLM decides ONLY extra demographic column schema
+//   TS always controls participant_id and original_id from subject analysis
+// ============================================================================
+
+const generateParticipants = (evidenceBundle: any): string | null => {
+  // null = deferred
+  const nSubjects: number = evidenceBundle?.user_hints?.n_subjects ?? 1;
+  const allFiles: string[] = evidenceBundle?.all_files || [];
+
+  // Mirror Python deferral logic exactly
+  if (nSubjects > 100 || allFiles.length > 500) {
+    return null; // deferred — plan stage handles it
+  }
+
+  // Simple placeholder — plan stage will overwrite with full columns
+  const rows = Array.from(
+    { length: nSubjects },
+    (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
+  );
+  return ["participant_id", ...rows].join("\n");
+};
+// const generateParticipants = async (
+//   evidenceBundle: any,
+//   files: FileItem[],
+//   llmConfig: TrioLLMConfig,
+//   signal?: AbortSignal
+// ): Promise<string> => {
+//   const userText = evidenceBundle?.user_hints?.user_text || "";
+//   const allFiles: string[] = evidenceBundle?.all_files || [];
+//   const userNSubjects: number | null =
+//     evidenceBundle?.user_hints?.n_subjects ?? null;
+//   const dominantPrefixes =
+//     evidenceBundle?.filename_analysis?.python_statistics?.dominant_prefixes;
+
+//   // Compute subject analysis — mirrors Python's authoritative subject detection
+//   const subjectAnalysis = extractSubjectAnalysis(
+//     allFiles,
+//     userNSubjects,
+//     dominantPrefixes
+//   );
+
+//   const idMap = subjectAnalysis.id_mapping.id_mapping;
+//   const reverseMap = subjectAnalysis.id_mapping.reverse_mapping;
+//   const subjectRecords = subjectAnalysis.subject_records;
+
+//   // Build subject labels list
+//   const expectedCount = userNSubjects || Object.keys(idMap).length || 1;
+//   const subjectLabels: string[] =
+//     Object.keys(idMap).length > 0 &&
+//     (!userNSubjects || Object.keys(idMap).length === userNSubjects)
+//       ? Object.values(idMap).map((id: string) => `sub-${id}`)
+//       : Array.from(
+//           { length: expectedCount },
+//           (_, i) => `sub-${String(i + 1).padStart(2, "0")}`
+//         );
+
+//   // Ask LLM ONLY for extra column schema — mirrors Python's design
+//   const prompt = `You are a BIDS participants.tsv column schema generator.
+
+// USER-PROVIDED CONTENT:
+// ${"─".repeat(60)}
+// ${userText || "(none provided)"}
+// ${"─".repeat(60)}
+
+// YOUR JOB: Decide which EXTRA columns belong in participants.tsv based ONLY
+// on what is explicitly stated in the user content above.
+
+// STRICT RULES:
+// - participant_id and original_id are ALWAYS added by code — do NOT include them
+// - ONLY add columns for demographics EXPLICITLY mentioned
+// - DO NOT invent age, sex, handedness unless directly stated
+// - If no demographic info is mentioned, return empty columns array
+
+// Output ONLY valid JSON (no markdown fences):
+// {
+//   "columns": [
+//     {"name": "sex", "levels": ["M", "F"]}
+//   ]
+// }
+
+// If no extra columns: {"columns": []}`;
+
+//   const raw = await callTrioLLM(prompt, llmConfig, 1024, signal);
+//   const schema = parseLLMJsonResponse(raw, "participants");
+
+//   // Extra columns decided by LLM (demographic columns only)
+//   const extraColumns: string[] = (schema?.columns || [])
+//     .map((c: any) => c.name)
+//     .filter((n: string) => n !== "participant_id" && n !== "original_id");
+
+//   // TypeScript always controls participant_id and original_id
+//   // mirrors Python: _generate_participants_tsv_from_python()
+//   const columns = ["participant_id", "original_id", ...extraColumns];
+//   const header = columns.join("\t");
+
+//   const rows = subjectLabels.map((subId) => {
+//     const bareId = subId.replace(/^sub-/, "");
+//     const originalId = reverseMap[bareId] || "n/a";
+//     const record = subjectRecords.find((r) => r.original_id === originalId);
+
+//     return columns
+//       .map((col) => {
+//         if (col === "participant_id") return subId;
+//         if (col === "original_id") return originalId;
+//         if (col === "group") return (record as any)?.group ?? "n/a";
+//         return "n/a";
+//       })
+//       .join("\t");
+//   });
+
+//   return [header, ...rows].join("\n");
+// };
+
+// ============================================================================
+// generateTrioFiles()
+// Main entry point — mirrors trio_generate_all() in trio.py
+// Called by handleGenerateTrio() in LLMPanel.tsx
+// ============================================================================
+
+export const generateTrioFiles = async (
+  opts: GenerateTrioOptions
+): Promise<GenerateTrioResult> => {
+  const { evidenceBundle, files, llmConfig, signal, onStatus } = opts;
+  const log = (msg: string) => {
+    console.log(msg);
+    onStatus?.(msg);
+  };
+
+  const status = checkTrioStatus(files);
+  log(
+    `Trio status: DD=${status.dataset_description.exists}, README=${status.readme.exists}, participants=${status.participants.exists}`
+  );
+
+  // Skip logic:
+  //   source === "user"  → user uploaded this file → ALWAYS skip (never overwrite)
+  //   source === "ai"    → AI generated previously → REGENERATE (replace)
+  //   null               → nothing exists          → GENERATE
+  const skipDD = status.dataset_description.source === "user";
+  const skipReadme = status.readme.source === "user";
+  const skipParticipants = status.participants.source === "user";
+  // ── dataset_description.json ─────────────────────────────────────
+  let datasetDesc: Record<string, any>;
+
+  if (skipDD) {
+    log("1/3 dataset_description.json user-uploaded, skipping...");
+    try {
+      datasetDesc = JSON.parse(status.dataset_description.content!);
+    } catch {
+      datasetDesc = {};
+    }
+  } else {
+    // Generates on first click AND regenerates on every subsequent click
+    log("1/3 Generating dataset_description.json...");
+    datasetDesc = await generateDatasetDescription(
+      evidenceBundle,
+      status.dataset_description.content ?? null,
+      llmConfig,
+      signal
+    );
+    log(`  License: ${datasetDesc.License || "MISSING"}`);
+  }
+
+  // ── README.md ────────────────────────────────────────────────────
+  let readmeContent: string;
+
+  if (skipReadme) {
+    log("2/3 README.md user-uploaded, skipping...");
+    readmeContent = status.readme.content!;
+  } else {
+    log("2/3 Generating README.md...");
+    readmeContent = await generateReadme(evidenceBundle, llmConfig, signal);
+  }
+
+  // ── participants.tsv ─────────────────────────────────────────────
+  let participantsTsv: string;
+
+  //   if (skipParticipants) {
+  //     log("3/3 participants.tsv user-uploaded, skipping...");
+  //     participantsTsv = status.participants.content!;
+  //   } else {
+  //     log("3/3 Generating participants.tsv...");
+  //     participantsTsv = await generateParticipants(
+  //       evidenceBundle,
+  //       files,
+  //       llmConfig,
+  //       signal
+  //     );
+  //   }
+  if (skipParticipants) {
+    log("3/3 participants.tsv user-uploaded, skipping...");
+    participantsTsv = status.participants.content!;
+  } else {
+    const simple = generateParticipants(evidenceBundle);
+    if (simple === null) {
+      log("3/3 participants.tsv deferred to plan stage...");
+      participantsTsv = ""; // empty — plan step will generate it
+    } else {
+      log("3/3 Generating basic participants.tsv...");
+      participantsTsv = simple;
+    }
+  }
+
+  log("✓ Trio generation complete");
+
+  return {
+    datasetDesc,
+    readmeContent: readmeContent
+      .replace(/^```markdown\n?/g, "")
+      .replace(/\n?```$/g, "")
+      .trim(),
+    participantsTsv: participantsTsv
+      .replace(/^```\n?/g, "")
+      .replace(/\n?```$/g, "")
+      .trim(),
+    skipped: {
+      datasetDesc: skipDD,
+      readme: skipReadme,
+      participants: skipParticipants,
+    },
+  };
+};
diff --git a/src/services/ollama.service.ts b/src/services/ollama.service.ts
index 9d8b368..3decc45 100644
--- a/src/services/ollama.service.ts
+++ b/src/services/ollama.service.ts
@@ -1,17 +1,18 @@
 const API_URL = process.env.REACT_APP_API_URL || "http://localhost:5000/api/v1";
 
-const getQwenTemperature = (modelName: string): number => {
-  if (modelName.includes("next") || modelName.includes("fast")) return 0.4;
-  if (modelName.includes("careful") || modelName.includes("think")) return 0.15;
-  return 0.3;
-};
+// const getQwenTemperature = (modelName: string): number => {
+//   if (modelName.includes("next") || modelName.includes("fast")) return 0.4;
+//   if (modelName.includes("careful") || modelName.includes("think")) return 0.15;
+//   return 0.3;
+// };
 
 export const OllamaService = {
   chat: async (
     model: string,
-    messages: { role: string; content: string }[]
+    messages: { role: string; content: string }[],
+    temperature?: number
   ): Promise<any> => {
-    const temperature = getQwenTemperature(model);
+    // const temperature = getQwenTemperature(model);
     const response = await fetch(`${API_URL}/ollama/chat`, {
       method: "POST",
       headers: { "Content-Type": "application/json" },
@@ -20,7 +21,7 @@ export const OllamaService = {
         messages,
         stream: false,
         options: {
-          temperature, // ← pass to Ollama
+          ...(temperature !== undefined ? { temperature } : {}),
         },
       }),
     });
diff --git a/yarn.lock b/yarn.lock
index b946d6d..8b8ba61 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2962,6 +2962,11 @@
     jest-matcher-utils "^27.0.0"
     pretty-format "^27.0.0"
 
+"@types/js-yaml@^4.0.9":
+  version "4.0.9"
+  resolved "https://registry.yarnpkg.com/@types/js-yaml/-/js-yaml-4.0.9.tgz#cd82382c4f902fed9691a2ed79ec68c5898af4c2"
+  integrity sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==
+
 "@types/json-schema@*", "@types/json-schema@^7.0.15", "@types/json-schema@^7.0.4", "@types/json-schema@^7.0.5", "@types/json-schema@^7.0.8", "@types/json-schema@^7.0.9":
   version "7.0.15"
   resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841"