From 7888402e0f05cb35dea9b0b5140b8721de99c78a Mon Sep 17 00:00:00 2001
From: Anurag Ray Chowdhury <anuragraychowdhury@gmail.com>
Date: Wed, 22 Apr 2026 17:40:06 -0400
Subject: [PATCH 1/2] Add MedLingo dataset + jargon expansion task

---
 docs/api/datasets.rst         | 1 +
 docs/api/tasks.rst            | 1 +
 pyhealth/datasets/__init__.py | 1 +
 pyhealth/tasks/__init__.py    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
index 8d9a59d21..df06f3831 100644
--- a/docs/api/datasets.rst
+++ b/docs/api/datasets.rst
@@ -225,6 +225,7 @@ Available Datasets
     datasets/pyhealth.datasets.MIMIC3Dataset
     datasets/pyhealth.datasets.MIMIC4Dataset
     datasets/pyhealth.datasets.MedicalTranscriptionsDataset
+    datasets/pyhealth.datasets.MedLingoDataset
     datasets/pyhealth.datasets.CardiologyDataset
     datasets/pyhealth.datasets.eICUDataset
     datasets/pyhealth.datasets.ISRUCDataset
diff --git a/docs/api/tasks.rst b/docs/api/tasks.rst
index 23a4e06e5..d84448e34 100644
--- a/docs/api/tasks.rst
+++ b/docs/api/tasks.rst
@@ -214,6 +214,7 @@ Available Tasks
     Drug Recommendation <tasks/pyhealth.tasks.drug_recommendation>
     Length of Stay Prediction <tasks/pyhealth.tasks.length_of_stay_prediction>
     Medical Transcriptions Classification <tasks/pyhealth.tasks.MedicalTranscriptionsClassification>
+    MedLingo Jargon Expansion <tasks/pyhealth.tasks.medlingo_jargon_expansion>
     Mortality Prediction (Next Visit) <tasks/pyhealth.tasks.mortality_prediction>
     Mortality Prediction (StageNet MIMIC-IV) <tasks/pyhealth.tasks.mortality_prediction_stagenet_mimic4>
     Patient Linkage (MIMIC-III) <tasks/pyhealth.tasks.patient_linkage_mimic3_fn>
diff --git a/pyhealth/datasets/__init__.py b/pyhealth/datasets/__init__.py
index 50b1b3887..bc3dd5ce7 100644
--- a/pyhealth/datasets/__init__.py
+++ b/pyhealth/datasets/__init__.py
@@ -57,6 +57,7 @@ def __init__(self, *args, **kwargs):
 from .eicu import eICUDataset
 from .isruc import ISRUCDataset
 from .medical_transcriptions import MedicalTranscriptionsDataset
+from .medlingo import MedLingoDataset
 from .mimic3 import MIMIC3Dataset
 from .mimic4 import MIMIC4CXRDataset, MIMIC4Dataset, MIMIC4EHRDataset, MIMIC4NoteDataset
 from .mimicextract import MIMICExtractDataset
diff --git a/pyhealth/tasks/__init__.py b/pyhealth/tasks/__init__.py
index a32618f9c..72c02edde 100644
--- a/pyhealth/tasks/__init__.py
+++ b/pyhealth/tasks/__init__.py
@@ -32,6 +32,7 @@
 from .length_of_stay_stagenet_mimic4 import LengthOfStayStageNetMIMIC4
 from .medical_coding import MIMIC3ICD9Coding
 from .medical_transcriptions_classification import MedicalTranscriptionsClassification
+from .medlingo_jargon_expansion import MedLingoJargonExpansionTask
 from .mortality_prediction import (
     MortalityPredictionEICU,
     MortalityPredictionEICU2,

From 27e656e03f7effe57f6310cc74a84c1c2aa6cf26 Mon Sep 17 00:00:00 2001
From: Anurag Ray Chowdhury <anuragraychowdhury@gmail.com>
Date: Wed, 22 Apr 2026 17:41:54 -0400
Subject: [PATCH 2/2] Add MedLingo dataset + jargon expansion task

---
 .../datasets/pyhealth.datasets.medlingo.rst   |   7 ++
 ...health.tasks.medlingo_jargon_expansion.rst |   7 ++
 ...ingo_jargon_expansion_transformersmodel.py | 119 ++++++++++++++++++
 pyhealth/datasets/configs/medlingo.yaml       |  11 ++
 pyhealth/datasets/medlingo.py                 |  82 ++++++++++++
 pyhealth/tasks/medlingo_jargon_expansion.py   | 108 ++++++++++++++++
 tests/test_medlingo_dataset.py                | 102 +++++++++++++++
 tests/test_medlingo_jargon_expansion_task.py  | 110 ++++++++++++++++
 8 files changed, 546 insertions(+)
 create mode 100644 docs/api/datasets/pyhealth.datasets.medlingo.rst
 create mode 100644 docs/api/tasks/pyhealth.tasks.medlingo_jargon_expansion.rst
 create mode 100644 examples/medlingo_medlingo_jargon_expansion_transformersmodel.py
 create mode 100644 pyhealth/datasets/configs/medlingo.yaml
 create mode 100644 pyhealth/datasets/medlingo.py
 create mode 100644 pyhealth/tasks/medlingo_jargon_expansion.py
 create mode 100644 tests/test_medlingo_dataset.py
 create mode 100644 tests/test_medlingo_jargon_expansion_task.py

diff --git a/docs/api/datasets/pyhealth.datasets.medlingo.rst b/docs/api/datasets/pyhealth.datasets.medlingo.rst
new file mode 100644
index 000000000..b5c5294a0
--- /dev/null
+++ b/docs/api/datasets/pyhealth.datasets.medlingo.rst
@@ -0,0 +1,7 @@
+pyhealth.datasets.MedLingoDataset
+===================================
+
+.. autoclass:: pyhealth.datasets.medlingo.MedLingoDataset
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/api/tasks/pyhealth.tasks.medlingo_jargon_expansion.rst b/docs/api/tasks/pyhealth.tasks.medlingo_jargon_expansion.rst
new file mode 100644
index 000000000..8c871c08a
--- /dev/null
+++ b/docs/api/tasks/pyhealth.tasks.medlingo_jargon_expansion.rst
@@ -0,0 +1,7 @@
+pyhealth.tasks.medlingo_jargon_expansion
+========================================
+
+.. autoclass:: pyhealth.tasks.medlingo_jargon_expansion.MedLingoJargonExpansionTask
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/examples/medlingo_medlingo_jargon_expansion_transformersmodel.py b/examples/medlingo_medlingo_jargon_expansion_transformersmodel.py
new file mode 100644
index 000000000..2e8234c12
--- /dev/null
+++ b/examples/medlingo_medlingo_jargon_expansion_transformersmodel.py
@@ -0,0 +1,119 @@
+"""
+MedLingo jargon expansion with :class:`~pyhealth.models.TransformersModel`.
+
+**Paper:** Jia, Sontag & Agrawal — *Diagnosing our datasets* (CHIL 2025),
+https://arxiv.org/abs/2505.15024. Public CSV: ``questions.csv`` (columns
+``word1``, ``word2``, ``question``, ``answer``) from the MedLingo export in
+Flora-jia-jfr/diagnosing_our_datasets — place that file under the directory you
+pass as ``root`` below.
+
+**Ablation (two task configs):**
+
+- ``MedLingoJargonExpansionTask(shot_mode="one_shot")`` — ``prompt`` is the
+  released ``question`` string (matches the distributed MedLingo item).
+- ``MedLingoJargonExpansionTask(shot_mode="zero_shot")`` — ``prompt`` is rebuilt
+  from ``word1`` and ``word2`` only; the CSV ``question`` field is not used, so
+  any one-shot / ICL demo in that column is stripped by construction.
+
+**Limitation vs the paper:** this PyHealth task uses **multiclass classification
+on the string ``answer``** (via ``TransformersModel`` + Hugging Face encoders).
+The paper evaluates **open-ended** generations with an LLM judge; this script
+does not reproduce that protocol.
+
+**Smoke run (no Hugging Face download):** by default this script only builds the
+dataset, runs ``set_task`` for both shot modes, and prints sample counts. To
+also run one forward pass with a **tiny** BERT (small one-time download unless
+cached), set environment variable ``PYHEALTH_MEDLINGO_RUN_MODEL=1``::
+
+    PYHEALTH_MEDLINGO_RUN_MODEL=1 python examples/medlingo_medlingo_jargon_expansion_transformersmodel.py
+
+Optional: ``PYHEALTH_MEDLINGO_MODEL=<hf_model_id>`` overrides the tiny default
+(``hf-internal-testing/tiny-random-bert``).
+
+Run from the repository root after ``pip install -e .``, or set
+``PYTHONPATH`` to the repo root so ``import pyhealth`` resolves.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+
+logging.basicConfig(level=logging.WARNING)
+for _name in ("pyhealth", "pyhealth.datasets", "pyhealth.datasets.base_dataset"):
+    logging.getLogger(_name).setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+def _write_synthetic_questions_csv(path: Path) -> None:
+    """Tiny stand-in for ``datasets/MedLingo/questions.csv`` (no secrets)."""
+    rows = [
+        {
+            "word1": "MI",
+            "word2": "STEMI",
+            "question": "ICL_STUB What is MI vs STEMI in one sentence?",
+            "answer": "types of heart attack",
+        },
+        {
+            "word1": "HTN",
+            "word2": "BP",
+            "question": "ICL_STUB Define HTN.",
+            "answer": "high blood pressure",
+        },
+    ]
+    pd.DataFrame(rows).to_csv(path, index=False)
+
+
+def main() -> None:
+    from pyhealth.datasets import MedLingoDataset, get_dataloader
+    from pyhealth.tasks import MedLingoJargonExpansionTask
+
+    tmp = Path(tempfile.mkdtemp(prefix="pyhealth_medlingo_"))
+    root = tmp / "root"
+    root.mkdir()
+    cache = tmp / "cache"
+    _write_synthetic_questions_csv(root / "questions.csv")
+
+    base = MedLingoDataset(root=str(root), cache_dir=cache, num_workers=1)
+    logger.info("Patients: %s", len(base.unique_patient_ids))
+
+    for shot in ("one_shot", "zero_shot"):
+        task = MedLingoJargonExpansionTask(shot_mode=shot)
+        samples = base.set_task(task=task, num_workers=1)
+        logger.info("shot_mode=%s -> %s samples", shot, len(samples))
+        if len(samples):
+            s0 = samples[0]
+            logger.info("First keys: %s", sorted(s0.keys()))
+
+    if os.environ.get("PYHEALTH_MEDLINGO_RUN_MODEL") != "1":
+        logger.info(
+            "Skipping TransformersModel forward (no download). "
+            "Set PYHEALTH_MEDLINGO_RUN_MODEL=1 to run a tiny HF model on one batch."
+        )
+        return
+
+    from pyhealth.models import TransformersModel
+
+    model_name = os.environ.get(
+        "PYHEALTH_MEDLINGO_MODEL", "hf-internal-testing/tiny-random-bert"
+    )
+    task = MedLingoJargonExpansionTask(shot_mode="one_shot")
+    samples = base.set_task(task=task, num_workers=1)
+    loader = get_dataloader(samples, batch_size=2, shuffle=False)
+    model = TransformersModel(dataset=samples, model_name=model_name)
+    model.eval()
+    batch = next(iter(loader))
+    import torch
+
+    with torch.no_grad():
+        out = model(**batch)
+    logger.info("Forward ok; loss=%s", out.get("loss"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyhealth/datasets/configs/medlingo.yaml b/pyhealth/datasets/configs/medlingo.yaml
new file mode 100644
index 000000000..f1d22d0eb
--- /dev/null
+++ b/pyhealth/datasets/configs/medlingo.yaml
@@ -0,0 +1,11 @@
+version: "1.0"
+tables:
+  questions:
+    file_path: "questions.csv"
+    patient_id: null
+    timestamp: null
+    attributes:
+      - "word1"
+      - "word2"
+      - "question"
+      - "answer"
diff --git a/pyhealth/datasets/medlingo.py b/pyhealth/datasets/medlingo.py
new file mode 100644
index 000000000..9256d7144
--- /dev/null
+++ b/pyhealth/datasets/medlingo.py
@@ -0,0 +1,82 @@
+import logging
+from pathlib import Path
+from typing import Any
+
+import narwhals as nw
+
+from ..tasks.medlingo_jargon_expansion import MedLingoJargonExpansionTask
+from .base_dataset import BaseDataset
+
+logger = logging.getLogger(__name__)
+
+# Expected public export from Flora-jia-jfr/diagnosing_our_datasets:
+# datasets/MedLingo/questions.csv with columns word1, word2, question, answer.
+_REQUIRED_QUESTION_COLUMNS = frozenset({"word1", "word2", "question", "answer"})
+
+
+class MedLingoDataset(BaseDataset):
+    """MedLingo jargon QA rows from the *Diagnosing our datasets* line of work.
+
+    Public MedLingo data (e.g. ``questions.csv``) is released with the paper
+    *Diagnosing our datasets* (Jia, Sontag & Agrawal, CHIL 2025,
+    https://arxiv.org/abs/2505.15024). Place ``questions.csv`` under ``root``
+    (same layout as ``datasets/MedLingo/questions.csv`` in the paper's data
+    repo). Each CSV row becomes one synthetic patient with a single
+    ``questions`` event; attributes are ``word1``, ``word2``, ``question``,
+    and ``answer`` (column names are matched case-insensitively after load).
+
+    Args:
+        root: Directory containing ``questions.csv``.
+        dataset_name: Optional override for the dataset name.
+        config_path: YAML config path; defaults to ``configs/medlingo.yaml``.
+        cache_dir: Optional cache root (see :class:`BaseDataset`).
+        num_workers: Workers for task/sample transforms.
+        dev: If True, limits to the first 1000 patients (see ``BaseDataset``).
+
+    Note:
+        :meth:`default_task` uses ``MedLingoJargonExpansionTask(shot_mode=
+        \"one_shot\")`` so ``set_task()`` matches the released CSV prompts.
+        Pass ``MedLingoJargonExpansionTask(shot_mode=\"zero_shot\")`` for the
+        ablation that rebuilds the prompt from ``word1``/``word2`` only.
+    """
+
+    def __init__(
+        self,
+        root: str,
+        dataset_name: str | None = None,
+        config_path: str | Path | None = None,
+        cache_dir=None,
+        num_workers: int = 1,
+        dev: bool = False,
+    ) -> None:
+        if config_path is None:
+            logger.info("No config path provided, using default MedLingo config")
+            config_path = Path(__file__).parent / "configs" / "medlingo.yaml"
+        default_tables = ["questions"]
+        super().__init__(
+            root=root,
+            tables=default_tables,
+            dataset_name=dataset_name or "medlingo",
+            config_path=str(config_path),
+            cache_dir=cache_dir,
+            num_workers=num_workers,
+            dev=dev,
+        )
+
+    @property
+    def default_task(self) -> MedLingoJargonExpansionTask:
+        """Default MedLingo task using the released one-shot ``question`` text."""
+        return MedLingoJargonExpansionTask(shot_mode="one_shot")
+
+    def preprocess_questions(self, df: Any) -> Any:
+        """Ensure required MedLingo columns exist after lowercasing names."""
+        lf = nw.from_native(df)
+        names = set(lf.columns)
+        missing = _REQUIRED_QUESTION_COLUMNS - names
+        if missing:
+            raise ValueError(
+                "questions.csv is missing required column(s): "
+                f"{sorted(missing)}. Expected columns: "
+                f"{sorted(_REQUIRED_QUESTION_COLUMNS)} (case-insensitive)."
+            )
+        return lf
diff --git a/pyhealth/tasks/medlingo_jargon_expansion.py b/pyhealth/tasks/medlingo_jargon_expansion.py
new file mode 100644
index 000000000..ab67bd207
--- /dev/null
+++ b/pyhealth/tasks/medlingo_jargon_expansion.py
@@ -0,0 +1,108 @@
+"""MedLingo jargon expansion task (plain-language answer from a prompt).
+
+Tied to *Diagnosing our datasets* (Jia, Sontag & Agrawal, CHIL 2025;
+https://arxiv.org/abs/2505.15024). This task is a **multiclass shortcut** over
+the string ``answer`` column; it does not reproduce the paper's open-ended
+generation plus LLM-as-judge setup.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Literal, Optional, Tuple
+
+from ..data import Event, Patient
+from .base_task import BaseTask
+
+ShotMode = Literal["zero_shot", "one_shot"]
+
+
+def _as_str(value: Any) -> Optional[str]:
+    """Return a clean string or None if the value is unusable."""
+    if value is None:
+        return None
+    text = str(value).strip()
+    if not text or text.lower() == "nan":
+        return None
+    return text
+
+
+class MedLingoJargonExpansionTask(BaseTask):
+    """Map each MedLingo row to a text prompt and a plain-language ``answer``.
+
+    Ablation (``shot_mode``), aligned with the course rubric:
+
+    - **one_shot**: Use the ``question`` field verbatim as ``prompt``. This
+      matches the **released** MedLingo item (including any in-context demo
+      baked into that string).
+    - **zero_shot**: Do **not** use ``question``. Rebuild a minimal instruction
+      from ``word1`` and ``word2`` only so the model never sees the released
+      one-shot prompt (ICL demonstration stripped by construction).
+
+    Attributes:
+        task_name: Includes ``shot_mode`` so caches differ per configuration.
+        shot_mode: Either ``\"zero_shot\"`` or ``\"one_shot\"``.
+        input_schema: Single ``\"text\"`` field ``prompt`` for encoder models.
+        output_schema: ``answer`` as ``\"multiclass\"`` over distinct strings.
+    """
+
+    input_schema: Dict[str, str] = {"prompt": "text"}
+    output_schema: Dict[str, str] = {"answer": "multiclass"}
+
+    def __init__(
+        self,
+        shot_mode: ShotMode = "one_shot",
+        code_mapping: Optional[Dict[str, Tuple[str, str]]] = None,
+    ) -> None:
+        if shot_mode not in ("zero_shot", "one_shot"):
+            raise ValueError(
+                f"shot_mode must be 'zero_shot' or 'one_shot', got {shot_mode!r}"
+            )
+        super().__init__(code_mapping=code_mapping)
+        self.shot_mode: ShotMode = shot_mode
+        self.task_name = f"MedLingoJargonExpansionTask/{shot_mode}"
+
+    def _build_prompt(self, event: Event) -> Optional[str]:
+        """Build model input text for the current ``shot_mode``."""
+        word1 = _as_str(event.word1)
+        word2 = _as_str(event.word2)
+        question = _as_str(event.question)
+
+        if self.shot_mode == "one_shot":
+            # Released conditioning: full CSV ``question`` (demo + query as
+            # distributed).
+            return question
+
+        # zero_shot: ignore ``question`` entirely; ICL is not present by design.
+        if word1 is None or word2 is None:
+            return None
+        return (
+            "In plain language, define the medical jargon that connects "
+            f'"{word1}" and "{word2}". Respond with the plain-language '
+            "definition only."
+        )
+
+    def __call__(self, patient: Patient) -> List[Dict[str, Any]]:
+        """Emit one sample per patient when fields are valid.
+
+        Args:
+            patient: Synthetic patient with a single ``questions`` event.
+
+        Returns:
+            A one-element list with ``id``, ``prompt``, and ``answer``, or
+            empty if required fields are missing.
+        """
+        events = patient.get_events(event_type="questions")
+        if len(events) != 1:
+            return []
+        event = events[0]
+        answer = _as_str(event.answer)
+        prompt = self._build_prompt(event)
+        if prompt is None or answer is None:
+            return []
+        return [
+            {
+                "id": patient.patient_id,
+                "prompt": prompt,
+                "answer": answer,
+            }
+        ]
diff --git a/tests/test_medlingo_dataset.py b/tests/test_medlingo_dataset.py
new file mode 100644
index 000000000..c5e9d5d3c
--- /dev/null
+++ b/tests/test_medlingo_dataset.py
@@ -0,0 +1,102 @@
+"""Synthetic tests for :class:`~pyhealth.datasets.MedLingoDataset` (no real MedLingo)."""
+
+from __future__ import annotations
+
+import pandas as pd
+import pytest
+
+from pyhealth.datasets import MedLingoDataset
+from pyhealth.tasks import MedLingoJargonExpansionTask
+
+THREE_ROWS = [
+    {
+        "word1": "MI",
+        "word2": "STEMI",
+        "question": "Q0?",
+        "answer": "heart attack",
+    },
+    {
+        "word1": "HTN",
+        "word2": "BP",
+        "question": "Q1?",
+        "answer": "high blood pressure",
+    },
+    {
+        "word1": "DM",
+        "word2": "A1c",
+        "question": "Q2?",
+        "answer": "diabetes",
+    },
+]
+
+
+def _write_questions_csv(path, rows: list[dict]) -> None:
+    pd.DataFrame(rows).to_csv(path, index=False)
+
+
+@pytest.fixture(scope="module")
+def medlingo_three_patients(tmp_path_factory):
+    """One parquet build shared by load + default-task tests."""
+    base = tmp_path_factory.mktemp("medlingo_mod")
+    root = base / "data"
+    root.mkdir()
+    cache = base / "cache"
+    cache.mkdir()
+    _write_questions_csv(root / "questions.csv", THREE_ROWS)
+    return MedLingoDataset(root=str(root), cache_dir=str(cache), num_workers=1)
+
+
+def test_medlingo_default_task_raw_sample(medlingo_three_patients):
+    """Raw task output (no ``set_task`` / litdata)."""
+    ds = medlingo_three_patients
+    assert isinstance(ds.default_task, MedLingoJargonExpansionTask)
+    assert ds.default_task.shot_mode == "one_shot"
+    raw = ds.default_task(ds.get_patient("0"))
+    assert len(raw) == 1
+    assert raw[0]["prompt"] == "Q0?"
+    assert raw[0]["answer"] == "heart attack"
+
+
+def test_medlingo_loads_rows_as_patients(medlingo_three_patients):
+    ds = medlingo_three_patients
+    assert len(ds.unique_patient_ids) == 3
+    p0 = ds.get_patient("0")
+    evs = p0.get_events(event_type="questions")
+    assert len(evs) == 1
+    assert evs[0].word1 == "MI"
+    assert evs[0].answer == "heart attack"
+
+
+def test_medlingo_missing_column_raises(tmp_path):
+    root = tmp_path / "data"
+    root.mkdir()
+    _write_questions_csv(
+        root / "questions.csv",
+        [{"word1": "a", "word2": "b", "question": "q"}],
+    )
+    ds = MedLingoDataset(root=str(root), cache_dir=tmp_path / "c", num_workers=1)
+    with pytest.raises(ValueError, match="missing required column"):
+        _ = ds.unique_patient_ids
+
+
+def test_medlingo_z_case_insensitive_columns(tmp_path):
+    """Runs after module-scoped tests (name) so a second CSV build is isolated."""
+    root = tmp_path / "data"
+    root.mkdir()
+    cache = tmp_path / "cache"
+    _write_questions_csv(
+        root / "questions.csv",
+        [
+            {
+                "Word1": "a",
+                "WORD2": "b",
+                "Question": "Q?",
+                "ANSWER": "ans",
+            },
+        ],
+    )
+    ds = MedLingoDataset(root=str(root), cache_dir=cache, num_workers=1)
+    p = ds.get_patient("0")
+    ev = p.get_events(event_type="questions")[0]
+    assert ev.word1 == "a"
+    assert ev.answer == "ans"
diff --git a/tests/test_medlingo_jargon_expansion_task.py b/tests/test_medlingo_jargon_expansion_task.py
new file mode 100644
index 000000000..3cc202d17
--- /dev/null
+++ b/tests/test_medlingo_jargon_expansion_task.py
@@ -0,0 +1,110 @@
+"""Tests for :class:`~pyhealth.tasks.MedLingoJargonExpansionTask` (no network)."""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+import polars as pl
+import pytest
+
+from pyhealth.data import Patient
+from pyhealth.tasks import MedLingoJargonExpansionTask
+
+
+def _patient_from_row(
+    pid: str,
+    word1: str,
+    word2: str,
+    question: str,
+    answer: str,
+) -> Patient:
+    df = pl.DataFrame(
+        {
+            "patient_id": [pid],
+            "event_type": ["questions"],
+            "timestamp": [datetime(2020, 1, 1)],
+            "questions/word1": [word1],
+            "questions/word2": [word2],
+            "questions/question": [question],
+            "questions/answer": [answer],
+        }
+    )
+    return Patient(pid, df)
+
+
+def test_one_shot_uses_csv_question_verbatim():
+    task = MedLingoJargonExpansionTask(shot_mode="one_shot")
+    q = "ICL_DEMO_ONLY_XYZ What is MI?"
+    p = _patient_from_row("0", "MI", "STEMI", q, "myocardial infarction")
+    out = task(p)
+    assert len(out) == 1
+    assert out[0]["prompt"] == q
+    assert out[0]["answer"] == "myocardial infarction"
+    assert out[0]["id"] == "0"
+
+
+def test_zero_shot_ignores_question_field():
+    task = MedLingoJargonExpansionTask(shot_mode="zero_shot")
+    p = _patient_from_row(
+        "1",
+        "foo",
+        "bar",
+        "ICL_DEMO_ONLY_XYZ never use this in zero-shot",
+        "plain",
+    )
+    out = task(p)
+    assert len(out) == 1
+    assert "ICL_DEMO_ONLY_XYZ" not in out[0]["prompt"]
+    assert "foo" in out[0]["prompt"] and "bar" in out[0]["prompt"]
+
+
+def test_zero_shot_and_one_shot_differ_on_same_row():
+    p = _patient_from_row("2", "a", "b", "full released question", "lbl")
+    z = MedLingoJargonExpansionTask(shot_mode="zero_shot")(p)[0]["prompt"]
+    o = MedLingoJargonExpansionTask(shot_mode="one_shot")(p)[0]["prompt"]
+    assert z != o
+    assert o == "full released question"
+
+
+def test_invalid_shot_mode():
+    with pytest.raises(ValueError, match="shot_mode"):
+        MedLingoJargonExpansionTask(shot_mode="bad")
+
+
+def test_empty_answer_drops_sample():
+    task = MedLingoJargonExpansionTask(shot_mode="one_shot")
+    p = _patient_from_row("3", "a", "b", "q", "")
+    assert task(p) == []
+
+
+def test_zero_shot_requires_both_words():
+    task = MedLingoJargonExpansionTask(shot_mode="zero_shot")
+    p = _patient_from_row("4", "", "b", "q", "ans")
+    assert task(p) == []
+
+
+def test_task_name_includes_shot_mode():
+    assert (
+        MedLingoJargonExpansionTask(shot_mode="zero_shot").task_name
+        == "MedLingoJargonExpansionTask/zero_shot"
+    )
+    assert (
+        MedLingoJargonExpansionTask(shot_mode="one_shot").task_name
+        == "MedLingoJargonExpansionTask/one_shot"
+    )
+
+
+def test_wrong_event_count_returns_empty():
+    df = pl.DataFrame(
+        {
+            "patient_id": ["5", "5"],
+            "event_type": ["questions", "questions"],
+            "timestamp": [datetime(2020, 1, 1), datetime(2020, 1, 2)],
+            "questions/word1": ["a", "b"],
+            "questions/word2": ["c", "d"],
+            "questions/question": ["q1", "q2"],
+            "questions/answer": ["x", "y"],
+        }
+    )
+    p = Patient("5", df)
+    assert MedLingoJargonExpansionTask(shot_mode="one_shot")(p) == []