vmvarela · vmvarela · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/README.md b/README.md
@@ -160,7 +160,7 @@ $ printf '[{"name":"Alice","score":95},{"name":"Bob","score":72}]' \
 Alice,95
 ```
 
-Columns are auto-detected as `INTEGER`, `REAL`, or `TEXT` based on the first 100 rows. Use `--no-type-inference` to force all columns to `TEXT`:
+Columns are auto-detected as `INTEGER`, `REAL`, `DATE`, `DATETIME`, or `TEXT` based on the first 100 rows. Date and datetime values are normalized to ISO 8601 on insert, so SQLite date functions (`date()`, `strftime()`, `julianday()`) work immediately. Use `--no-type-inference` to force all columns to `TEXT`:
 
 ```sh
 $ cat orders.csv | sql-pipe 'SELECT COUNT(*), AVG(amount) FROM t WHERE status = "paid"'
@@ -315,12 +315,14 @@ $ cat contacts.csv | sql-pipe 'SELECT DISTINCT email FROM t'
 $ cat users.csv | sql-pipe 'SELECT * FROM t WHERE email = "" OR email IS NULL'
 ```
 
-**Date range filter (dates stored as text):**
+**Date range filter:**
 
 ```sh
 $ cat logs.csv | sql-pipe 'SELECT * FROM t WHERE ts >= "2024-01-01" AND ts < "2024-02-01"'
 ```
 
+Date columns are auto-detected and stored as ISO 8601 text, so comparison operators and `strftime()` / `julianday()` work without any preprocessing.
+
 **Compute a derived column:**
 
 ```sh
@@ -464,9 +466,34 @@ $ curl -s "https://api.open-meteo.com/v1/forecast?latitude=40.4168&longitude=-3.
 2026-05-07,19.6,10.7,2.1
 ```
 
+**La Liga: season lengths reveal COVID and the World Cup**
+
+The same [engsoccerdata](https://github.com/jalapic/engsoccerdata) dataset has a
+`Date` column in `YYYY-MM-DD` format. `sql-pipe` auto-detects it as `DATE` and
+stores it as ISO 8601 text, so `julianday()` works directly — no preprocessing:
+
+```sh
+$ curl -s https://raw.githubusercontent.com/jalapic/engsoccerdata/master/data-raw/spain.csv \
+    | sql-pipe 'SELECT Season,
+                       MIN(Date) AS start,
+                       MAX(Date) AS end,
+                       CAST(julianday(MAX(Date)) - julianday(MIN(Date)) AS INTEGER) AS days
+                FROM t WHERE tier=1 AND Season BETWEEN 2018 AND 2022
+                GROUP BY Season ORDER BY Season'
+2018,2018-08-17,2019-05-19,275
+2019,2019-08-16,2020-07-19,338
+2020,2020-09-12,2021-05-23,253
+2021,2021-08-13,2022-05-22,282
+2022,2022-08-12,2023-06-04,296
+```
+
+The 2019–20 season spans 338 days: COVID suspended play in March 2020 and pushed
+the final round to July. The 2022–23 season runs 296 days due to the November
+World Cup break. A normal season is ~275 days.
+
 ## How it works
 
-Each run opens a fresh `:memory:` SQLite database. The header row drives a `CREATE TABLE t (...)` with all columns as `TEXT`. Rows are loaded in a single transaction via a prepared `INSERT` statement, then `sqlite3_exec` runs your query and prints rows one by one.
+Each run opens a fresh `:memory:` SQLite database. The header row drives a `CREATE TABLE t (...)` with types inferred from the first 100 rows — `INTEGER`, `REAL`, `DATE`, `DATETIME`, or `TEXT`. Date variants use TEXT affinity so ISO 8601 string semantics are preserved and all SQLite date functions work correctly. Rows are loaded in a single transaction via a prepared `INSERT` statement, then `sqlite3_exec` runs your query and prints rows one by one.
 
 The database never touches disk and vanishes when the process exits. No state, no cleanup.
 

diff --git a/build.zig b/build.zig
@@ -1438,7 +1438,161 @@ pub fn build(b: *std.Build) void {
     test_json_path_format_mismatch.step.dependOn(b.getInstallStep());
     test_step.dependOn(&test_json_path_format_mismatch.step);
 
-    // Unit tests for the RFC 4180 CSV parser (src/csv.zig)
+    // ─── Date / datetime type inference integration tests ────────────────────
+
+    // Integration test 140: ISO date column is stored and queryable as YYYY-MM-DD text
+    const test_date_iso = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,2024-01-15\n2,1999-12-31\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT dob FROM t WHERE id=1")
+        \\[ "$result" = "2024-01-15" ]
+    });
+    test_date_iso.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_iso.step);
+
+    // Integration test 141: ISO date column supports SQLite date() function
+    const test_date_iso_func = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,2024-01-15\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT date(dob) FROM t")
+        \\[ "$result" = "2024-01-15" ]
+    });
+    test_date_iso_func.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_iso_func.step);
+
+    // Integration test 142: EU-dash date (DD-MM-YYYY) normalized to ISO on insert
+    const test_date_eu_dash = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,15-01-2024\n2,31-12-1999\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT dob FROM t ORDER BY dob")
+        \\expected=$(printf '1999-12-31\n2024-01-15')
+        \\[ "$result" = "$expected" ]
+    });
+    test_date_eu_dash.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_eu_dash.step);
+
+    // Integration test 143: EU-slash date (DD/MM/YYYY) detected when d1 > 12
+    const test_date_eu_slash = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,15/01/2024\n2,31/12/1999\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT dob FROM t ORDER BY dob")
+        \\expected=$(printf '1999-12-31\n2024-01-15')
+        \\[ "$result" = "$expected" ]
+    });
+    test_date_eu_slash.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_eu_slash.step);
+
+    // Integration test 144: US-slash date (MM/DD/YYYY) detected when d2 > 12
+    const test_date_us_slash = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,01/15/2024\n2,12/31/1999\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT dob FROM t ORDER BY dob")
+        \\expected=$(printf '1999-12-31\n2024-01-15')
+        \\[ "$result" = "$expected" ]
+    });
+    test_date_us_slash.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_us_slash.step);
+
+    // Integration test 145: ambiguous slash date (both ≤ 12) → TEXT, no normalization
+    const test_date_slash_ambiguous = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,05/06/2024\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT dob FROM t")
+        \\[ "$result" = "05/06/2024" ]
+    });
+    test_date_slash_ambiguous.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_slash_ambiguous.step);
+
+    // Integration test 146: ISO datetime (space separator) stored as ISO and queryable
+    const test_datetime_iso_space = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,ts\n1,2024-01-15 10:30:00\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT ts FROM t")
+        \\[ "$result" = "2024-01-15 10:30:00" ]
+    });
+    test_datetime_iso_space.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_datetime_iso_space.step);
+
+    // Integration test 147: ISO datetime T-separator normalized to space on insert
+    const test_datetime_iso_t = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,ts\n1,2024-01-15T10:30:00\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT ts FROM t")
+        \\[ "$result" = "2024-01-15 10:30:00" ]
+    });
+    test_datetime_iso_t.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_datetime_iso_t.step);
+
+    // Integration test 148: EU-slash datetime (DD/MM/YYYY HH:MM) normalized to ISO
+    const test_datetime_eu_slash = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,ts\n1,15/01/2024 10:30\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT ts FROM t")
+        \\[ "$result" = "2024-01-15 10:30:00" ]
+    });
+    test_datetime_eu_slash.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_datetime_eu_slash.step);
+
+    // Integration test 149: US-slash datetime (MM/DD/YYYY HH:MM) normalized to ISO
+    const test_datetime_us_slash = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,ts\n1,01/15/2024 10:30\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT ts FROM t")
+        \\[ "$result" = "2024-01-15 10:30:00" ]
+    });
+    test_datetime_us_slash.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_datetime_us_slash.step);
+
+    // Integration test 150: --columns --verbose shows DATE for date column
+    const test_columns_date_type = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,2024-01-15\n' \
+        \\    | ./zig-out/bin/sql-pipe --columns --verbose)
+        \\echo "$result" | grep -q "dob DATE"
+    });
+    test_columns_date_type.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_columns_date_type.step);
+
+    // Integration test 151: --columns --verbose shows DATETIME for datetime column
+    const test_columns_datetime_type = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,ts\n1,2024-01-15 10:30:00\n' \
+        \\    | ./zig-out/bin/sql-pipe --columns --verbose)
+        \\echo "$result" | grep -q "ts DATETIME"
+    });
+    test_columns_datetime_type.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_columns_datetime_type.step);
+
+    // Integration test 152: --validate shows DATE in schema summary
+    const test_validate_date_type = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,2024-01-15\n2,1999-12-31\n' \
+        \\    | ./zig-out/bin/sql-pipe --validate)
+        \\echo "$result" | grep -q "dob DATE"
+    });
+    test_validate_date_type.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_validate_date_type.step);
+
+    // Integration test 153: date column supports ORDER BY (ISO sort = chronological)
+    const test_date_order_by = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'name,dob\nBob,15-01-1990\nAlice,20-03-1985\nCarol,01-07-1992\n' \
+        \\    | ./zig-out/bin/sql-pipe "SELECT name FROM t ORDER BY dob")
+        \\expected=$(printf 'Alice\nBob\nCarol')
+        \\[ "$result" = "$expected" ]
+    });
+    test_date_order_by.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_order_by.step);
+
+    // Integration test 154: --no-type-inference keeps date as TEXT (no normalization)
+    const test_date_no_type_inference = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'id,dob\n1,15/01/2024\n' \
+        \\    | ./zig-out/bin/sql-pipe --no-type-inference "SELECT dob FROM t")
+        \\[ "$result" = "15/01/2024" ]
+    });
+    test_date_no_type_inference.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_date_no_type_inference.step);
     const unit_tests = b.addTest(.{
         .root_module = b.createModule(.{
             .root_source_file = b.path("src/csv.zig"),
@@ -1472,4 +1626,26 @@ pub fn build(b: *std.Build) void {
     const run_xml_unit_tests = b.addRunArtifact(xml_unit_tests);
     test_step.dependOn(&run_xml_unit_tests.step);
     unit_test_step.dependOn(&run_xml_unit_tests.step);
+
+    // Unit tests for the CSV loader (src/loader.zig) — isDate, isDateTime, inferTypes, normalize helpers
+    const loader_unit_tests = b.addTest(.{
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/loader.zig"),
+            .target = target,
+            .optimize = optimize,
+            .link_libc = true,
+        }),
+    });
+    loader_unit_tests.root_module.addImport("c", translate_c.createModule());
+    if (bundle_sqlite) {
+        loader_unit_tests.root_module.addIncludePath(b.path("lib"));
+        loader_unit_tests.root_module.addCSourceFile(.{
+            .file = b.path("lib/sqlite3.c"),
+            .flags = &.{"-DSQLITE_OMIT_LOAD_EXTENSION=1"},
+        });
+    } else {
+        loader_unit_tests.root_module.linkSystemLibrary("sqlite3", .{});
+    }
+    const run_loader_unit_tests = b.addRunArtifact(loader_unit_tests);
+    unit_test_step.dependOn(&run_loader_unit_tests.step);
 }
diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd
@@ -17,9 +17,11 @@ DESCRIPTION
 	aggregations on CSV files without manual SQL database setup.
 
 	All input columns are automatically loaded into the table with names derived from
-	the CSV header row. By default, column types (TEXT, INTEGER, REAL) are inferred
-	from the first 100 rows of data. Use *--no-type-inference* to disable this
-	behavior and treat all columns as TEXT.
+	the CSV header row. By default, column types (TEXT, INTEGER, REAL, DATE, DATETIME)
+	are inferred from the first 100 rows of data. DATE and DATETIME values are
+	normalized to ISO 8601 on insert, enabling SQLite date functions (*date()*,
+	*strftime()*, *julianday()*) to work directly on those columns. Use
+	*--no-type-inference* to disable this behavior and treat all columns as TEXT.
 
 	By default, input fields are parsed as comma-separated values. Use
 	*--delimiter* (or *-d*) to parse other delimiters (1–8 characters), or *--tsv*
@@ -95,7 +97,7 @@ OPTIONS
 		Read the input header, print each column name on its own line to
 		standard output, and exit with code 0. Supported for CSV, TSV,
 		JSON, NDJSON, and XML input. When combined with *-v* / *--verbose*,
-		also shows the inferred type (INTEGER, REAL, or TEXT) for each column
+		also shows the inferred type (INTEGER, REAL, DATE, DATETIME, or TEXT) for each column
 		(CSV/TSV only; other formats always show TEXT), using the first 100
 		data rows for inference. Respects *--delimiter* and *--tsv*.
 		Mutually exclusive with a query argument.
@@ -104,7 +106,7 @@ OPTIONS
 		Print a schema comment block to standard error and the first <n> data
 		rows to standard output as delimited text (default: 10 rows if no value
 		is given). The schema block lists each column name and its inferred
-		SQLite type (INTEGER, REAL, or TEXT), each line prefixed with *#* so it
+		type (INTEGER, REAL, DATE, DATETIME, or TEXT), each line prefixed with *#* so it
 		is ignored by downstream CSV parsers. Column header is always printed as
 		the first output row (implies *--header*). Type inference reads up to
 		100 rows or <n> rows, whichever is larger, before emitting output.