questdb · RaphDal · Mar 6, 2026 · Mar 6, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/documentation/configuration/configuration-utils/_cairo.config.json b/documentation/configuration/configuration-utils/_cairo.config.json
@@ -490,5 +490,9 @@
   "cairo.partition.encoder.parquet.data.page.size": {
     "default": "1048576",
     "description": "Sets the default page size for parquet-encoded partitions."
+  },
+  "cairo.partition.encoder.parquet.min.compression.ratio": {
+    "default": "1.2",
+    "description": "Minimum compression ratio (uncompressed_size / compressed_size) for Parquet pages. When a compressed page does not meet this threshold, it is stored uncompressed instead. A value of 0.0 disables the check."
   }
 }
diff --git a/documentation/query/export-parquet.md b/documentation/query/export-parquet.md
@@ -184,3 +184,32 @@ cairo.partition.encoder.parquet.compression.level=0
 When using `ZSTD`, the level ranges from 1 (fastest) to 22, with a default of 9.
 
 For COPY exports, you can also override compression per-query. See [Overriding compression](#overriding-compression).
+
+### Minimum compression ratio
+
+The `cairo.partition.encoder.parquet.min.compression.ratio` property controls
+whether compressed Parquet pages are worth keeping. After compressing a page,
+QuestDB checks the ratio of `uncompressed_size / compressed_size`. If the ratio
+falls below the threshold, the compressed output is discarded and the page is
+stored uncompressed instead.
+
+```ini
+# Default: 1.2 (keep compressed output only if it achieves ~17% size reduction)
+cairo.partition.encoder.parquet.min.compression.ratio=1.2
+```
+
+A value of `0.0` (or any value &lt;= 1.0) disables the check, always keeping
+compressed output.
+
+The ratio check applies to both data pages and dictionary pages and works with
+all compression codecs. It runs after compression, so the CPU cost of
+compression is still incurred -- this setting only avoids the I/O and storage
+penalty of keeping pages that barely compress.
+
+### Per-column overrides
+
+Individual columns can override the global encoding and compression settings.
+See [CREATE TABLE - Per-column Parquet encoding and compression](/docs/query/sql/create-table/#per-column-parquet-encoding-and-compression)
+for defining overrides at table creation, or
+[ALTER TABLE ALTER COLUMN SET/DROP PARQUET](/docs/query/sql/alter-table-alter-column-parquet-encoding/)
+for modifying existing tables.
diff --git a/documentation/query/sql/alter-table-alter-column-parquet-encoding.md b/documentation/query/sql/alter-table-alter-column-parquet-encoding.md
@@ -0,0 +1,42 @@
+---
+title: ALTER TABLE ALTER COLUMN SET/DROP PARQUET
+sidebar_label: PARQUET ENCODING/COMPRESSION
+description: ALTER TABLE ALTER COLUMN SET/DROP PARQUET SQL keyword reference documentation.
+---
+
+Sets or removes per-column Parquet encoding and compression configuration on
+existing tables. These settings only affect
+[Parquet partitions](/docs/query/export-parquet/#in-place-conversion) and are
+ignored for native partitions.
+
+## SET
+
+Override the default Parquet encoding, compression, or both for a column.
+The syntax is `SET PARQUET(encoding [, compression[(level)]])`. Use `default`
+for the encoding when specifying compression only.
+
+```questdb-sql title="Set encoding only"
+ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET(rle_dictionary);
+```
+
+```questdb-sql title="Set compression only (with optional level)"
+ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET(default, zstd(3));
+```
+
+```questdb-sql title="Set both encoding and compression"
+ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET(rle_dictionary, zstd(3));
+```
+
+## DROP
+
+Reset per-column overrides back to the server defaults.
+
+```questdb-sql title="Reset to defaults"
+ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET;
+```
+
+## Supported encodings and codecs
+
+See the [CREATE TABLE](/docs/query/sql/create-table/#supported-encodings)
+reference for the full list of supported encodings, compression codecs, and
+their valid column types.
diff --git a/documentation/query/sql/create-table.md b/documentation/query/sql/create-table.md
@@ -361,6 +361,99 @@ CREATE TABLE trades (
 ) TIMESTAMP(timestamp);
 ```
 
+### Per-column Parquet encoding and compression
+
+![Flow chart showing the syntax of per-column Parquet encoding and compression](/images/docs/diagrams/parquetEncodingDef.svg)
+
+Column definitions may include an optional `PARQUET(encoding [, compression[(level)]])`
+clause. These settings only affect
+[Parquet partitions](/docs/query/export-parquet/#in-place-conversion) and are
+ignored for native partitions. Both encoding and compression are optional — use
+`default` for the encoding when specifying compression only.
+
+```questdb-sql title="CREATE TABLE with per-column Parquet config"
+CREATE TABLE sensors (
+    ts TIMESTAMP,
+    temperature DOUBLE PARQUET(rle_dictionary, zstd(3)),
+    humidity FLOAT PARQUET(rle_dictionary),
+    device_id VARCHAR PARQUET(default, lz4_raw),
+    status INT
+) TIMESTAMP(ts) PARTITION BY DAY;
+```
+
+When omitted, columns use the global defaults: a type-appropriate encoding and
+the server-wide compression codec
+(`cairo.partition.encoder.parquet.compression.codec`).
+
+#### Supported encodings
+
+| Encoding                | SQL keyword               | Valid column types           |
+| ----------------------- | ------------------------- | ---------------------------- |
+| Plain                   | `plain`                   | All                          |
+| RLE Dictionary          | `rle_dictionary`          | All except BOOLEAN and ARRAY |
+| Delta Length Byte Array | `delta_length_byte_array` | STRING, BINARY, VARCHAR      |
+| Delta Binary Packed     | `delta_binary_packed`     | INT, LONG, DATE, TIMESTAMP   |
+
+- **Plain** — stores values as-is with no transformation. Simplest encoding
+  with no overhead. Use as a fallback when data has high cardinality and no
+  exploitable patterns (e.g. random floats or UUIDs).
+- **RLE Dictionary** — builds a dictionary of unique values and replaces each
+  value with a short integer key. The keys are then encoded with a hybrid of
+  run-length encoding (for repeated consecutive keys) and bit-packing (for
+  non-repeating sequences). Best for low-to-medium cardinality columns (status
+  codes, device IDs, symbols). The lower the cardinality, the greater the
+  compression.
+- **Delta Length Byte Array** — delta-encodes the lengths of consecutive
+  string/binary values, then stores the raw bytes back-to-back. This is the
+  Parquet-recommended encoding for byte array columns and is always preferred
+  over `plain` for STRING, BINARY, and VARCHAR.
+- **Delta Binary Packed** — delta-encodes integer values and packs the deltas
+  into a compact binary representation. Effective for monotonically increasing
+  or slowly changing integer/timestamp columns (e.g. sequential IDs, event
+  timestamps).
+
+For the full specification of each encoding, see the
+[Apache Parquet encodings documentation](https://parquet.apache.org/docs/file-format/data-pages/encodings/).
+
+When no encoding is specified, QuestDB picks a type-appropriate default:
+`rle_dictionary` for SYMBOL and VARCHAR, `delta_length_byte_array` for STRING
+and BINARY, and `plain` for everything else.
+
+#### Supported compression codecs
+
+| Codec        | SQL keyword    | Level range |
+| ------------ | -------------- | ----------- |
+| LZ4 Raw      | `lz4_raw`      | --          |
+| Zstd         | `zstd`         | 1-22        |
+| Snappy       | `snappy`       | --          |
+| Gzip         | `gzip`         | 1-9         |
+| Brotli       | `brotli`       | 0-11        |
+| Uncompressed | `uncompressed` | --          |
+
+- **LZ4 Raw** — extremely fast compression and decompression with a moderate
+  ratio. No tunable level. This is the QuestDB default and a good choice for
+  most workloads where query throughput matters.
+- **Zstd** — excellent balance of compression ratio and speed across its level
+  range. Lower levels (1-3) approach LZ4 speed with better ratios; higher
+  levels (up to 22) rival Brotli ratios. A strong general-purpose choice when
+  storage savings justify slightly slower decompression.
+- **Snappy** — very fast compression and decompression with moderate ratio. No
+  tunable level. Similar trade-offs to LZ4 Raw.
+- **Gzip** — widely supported, higher compression ratio than Snappy or LZ4 at
+  the cost of slower decompression, which reduces query throughput. Higher
+  levels (up to 9) improve ratio but further increase CPU time.
+- **Brotli** — achieves some of the highest compression ratios, especially at
+  higher levels, but decompression is significantly slower. Best suited for
+  cold/archival data where storage savings outweigh query throughput.
+- **Uncompressed** — no compression. Fastest decompression (none needed) but
+  largest file size. Useful when data is already incompressible.
+
+For more details on Parquet compression, see the
+[Apache Parquet compression documentation](https://parquet.apache.org/docs/file-format/data-pages/compression/).
+
+To modify encoding or compression on existing tables, see
+[ALTER TABLE ALTER COLUMN SET/DROP PARQUET](/docs/query/sql/alter-table-alter-column-parquet-encoding/).
+
 ### Casting types
 
 `castDef` - casts the type of a specific column. `columnRef` must reference

diff --git a/documentation/query/sql/show.md b/documentation/query/sql/show.md
@@ -88,6 +88,21 @@ CREATE TABLE trades (
 WITH maxUncommittedRows=500000, o3MaxLag=600000000us;
 ```
 
+#### Per-column Parquet encoding
+
+When columns have per-column Parquet encoding or compression overrides, they
+appear in the `SHOW CREATE TABLE` output:
+
+```questdb-sql
+CREATE TABLE sensors (
+	ts TIMESTAMP,
+	temperature DOUBLE PARQUET(rle_dictionary, zstd(3)),
+	humidity FLOAT PARQUET(rle_dictionary),
+	device_id VARCHAR PARQUET(default, lz4_raw),
+	status INT
+) timestamp(ts) PARTITION BY DAY BYPASS WAL;
+```
+
 #### Enterprise variant
 
 [QuestDB Enterprise](/enterprise/) will include an additional `OWNED BY` clause populated with the current user.

diff --git a/documentation/sidebars.js b/documentation/sidebars.js
@@ -264,6 +264,7 @@ module.exports = {
                     "query/sql/alter-table-alter-column-cache",
                     "query/sql/alter-table-change-column-type",
                     "query/sql/alter-table-alter-column-drop-index",
+                    "query/sql/alter-table-alter-column-parquet-encoding",
                     "query/sql/alter-table-change-symbol-capacity",
                   ],
                 },

diff --git a/static/images/docs/diagrams/.railroad b/static/images/docs/diagrams/.railroad
@@ -379,6 +379,9 @@ refreshMatView
 dropMatView
   ::= 'DROP' 'MATERIALIZED' 'VIEW' ('IF' 'EXISTS')? viewName
 
+parquetEncodingDef
+  ::= 'PARQUET' '(' encoding ( ',' compression-codec ( '(' level ')' )? )? ')'
+
 pivot
   ::= ( '(' selectQuery ')' | tableName )
     ('WHERE' condition)?

diff --git a/static/images/docs/diagrams/parquetEncodingDef.svg b/static/images/docs/diagrams/parquetEncodingDef.svg