From 2f45a1d7bbdd7f0ee0a49f85de33badf366ea942 Mon Sep 17 00:00:00 2001 From: Chessing234 Date: Sun, 17 May 2026 05:59:38 +0530 Subject: [PATCH] fix(sqlite import): apply --limit subject filter to chunked large-file path The small-file branch passes subjects=subjects to process_dataframe, but the chunked branch for files above THRESHOLD_SIZE omitted it, so passing --limit N produced a database where small tables were restricted to N subjects but large tables (chartevents, labevents, etc.) imported the full unfiltered data. --- mimic-iv/buildmimic/sqlite/import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mimic-iv/buildmimic/sqlite/import.py b/mimic-iv/buildmimic/sqlite/import.py index 1cb8eb0d..68d43647 100644 --- a/mimic-iv/buildmimic/sqlite/import.py +++ b/mimic-iv/buildmimic/sqlite/import.py @@ -165,7 +165,7 @@ def main(): else: # If the file is too large, let's do the work in chunks for chunk in pd.read_csv(f, chunksize=CHUNKSIZE, low_memory=False, dtype=mimic_dtypes): - chunk = process_dataframe(chunk) + chunk = process_dataframe(chunk, subjects=subjects) chunk.to_sql(tablename, connection, if_exists="append", index=False) row_counts[tablename] += len(chunk) print("done!")