From 2be2bf6ec18f665deb472639233874e0cf028d61 Mon Sep 17 00:00:00 2001 From: Raphael DALMON Date: Fri, 6 Mar 2026 11:12:32 +0100 Subject: [PATCH 1/4] docs: add per-column Parquet encoding and compression documentation --- .../configuration-utils/_cairo.config.json | 4 ++ documentation/query/export-parquet.md | 29 +++++++++++ ...ter-table-alter-column-parquet-encoding.md | 48 +++++++++++++++++ documentation/query/sql/create-table.md | 51 ++++++++++++++++++ documentation/query/sql/show.md | 15 ++++++ documentation/sidebars.js | 1 + static/images/docs/diagrams/.railroad | 3 ++ .../docs/diagrams/parquetEncodingDef.svg | 52 +++++++++++++++++++ 8 files changed, 203 insertions(+) create mode 100644 documentation/query/sql/alter-table-alter-column-parquet-encoding.md create mode 100644 static/images/docs/diagrams/parquetEncodingDef.svg diff --git a/documentation/configuration/configuration-utils/_cairo.config.json b/documentation/configuration/configuration-utils/_cairo.config.json index 20af3db59..e7c70fe92 100644 --- a/documentation/configuration/configuration-utils/_cairo.config.json +++ b/documentation/configuration/configuration-utils/_cairo.config.json @@ -490,5 +490,9 @@ "cairo.partition.encoder.parquet.data.page.size": { "default": "1048576", "description": "Sets the default page size for parquet-encoded partitions." + }, + "cairo.partition.encoder.parquet.min.compression.ratio": { + "default": "1.2", + "description": "Minimum compression ratio (uncompressed_size / compressed_size) for Parquet pages. When a compressed page does not meet this threshold, it is stored uncompressed instead. A value of 0.0 disables the check." } } diff --git a/documentation/query/export-parquet.md b/documentation/query/export-parquet.md index 21a22d574..d67c5635e 100644 --- a/documentation/query/export-parquet.md +++ b/documentation/query/export-parquet.md @@ -184,3 +184,32 @@ cairo.partition.encoder.parquet.compression.level=0 When using `ZSTD`, the level ranges from 1 (fastest) to 22, with a default of 9. For COPY exports, you can also override compression per-query. See [Overriding compression](#overriding-compression). + +### Minimum compression ratio + +The `cairo.partition.encoder.parquet.min.compression.ratio` property controls +whether compressed Parquet pages are worth keeping. After compressing a page, +QuestDB checks the ratio of `uncompressed_size / compressed_size`. If the ratio +falls below the threshold, the compressed output is discarded and the page is +stored uncompressed instead. + +```ini +# Default: 1.2 (keep compressed output only if it achieves ~17% size reduction) +cairo.partition.encoder.parquet.min.compression.ratio=1.2 +``` + +A value of `0.0` (or any value <= 1.0) disables the check, always keeping +compressed output. + +The ratio check applies to both data pages and dictionary pages and works with +all compression codecs. It runs after compression, so the CPU cost of +compression is still incurred -- this setting only avoids the I/O and storage +penalty of keeping pages that barely compress. + +### Per-column overrides + +Individual columns can override the global encoding and compression settings. +See [CREATE TABLE - Per-column Parquet encoding and compression](/docs/query/sql/create-table/#per-column-parquet-encoding-and-compression) +for defining overrides at table creation, or +[ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING](/docs/query/sql/alter-table-alter-column-parquet-encoding/) +for modifying existing tables. diff --git a/documentation/query/sql/alter-table-alter-column-parquet-encoding.md b/documentation/query/sql/alter-table-alter-column-parquet-encoding.md new file mode 100644 index 000000000..8387964d4 --- /dev/null +++ b/documentation/query/sql/alter-table-alter-column-parquet-encoding.md @@ -0,0 +1,48 @@ +--- +title: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING +sidebar_label: PARQUET ENCODING +description: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING SQL keyword reference documentation. +--- + +Sets or removes per-column Parquet encoding and compression configuration on +existing tables. These settings only affect +[Parquet partitions](/docs/query/export-parquet/#in-place-conversion) and are +ignored for native partitions. + +## SET + +Override the default Parquet encoding, compression, or both for a column. + +```questdb-sql title="Set encoding only" +ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET ENCODING rle_dictionary; +``` + +```questdb-sql title="Set compression only (with optional level)" +ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET COMPRESSION zstd 3; +``` + +```questdb-sql title="Set both encoding and compression" +ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET ENCODING rle_dictionary COMPRESSION zstd 3; +``` + +## DROP + +Reset per-column overrides back to the server defaults. + +```questdb-sql title="Drop both encoding and compression overrides" +ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET ENCODING COMPRESSION; +``` + +```questdb-sql title="Drop encoding only (keeps compression override)" +ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET ENCODING; +``` + +```questdb-sql title="Drop compression only (keeps encoding override)" +ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET COMPRESSION; +``` + +## Supported encodings and codecs + +See the [CREATE TABLE](/docs/query/sql/create-table/#supported-encodings) +reference for the full list of supported encodings, compression codecs, and +their valid column types. diff --git a/documentation/query/sql/create-table.md b/documentation/query/sql/create-table.md index ad5db8404..af9c4fb0d 100644 --- a/documentation/query/sql/create-table.md +++ b/documentation/query/sql/create-table.md @@ -361,6 +361,57 @@ CREATE TABLE trades ( ) TIMESTAMP(timestamp); ``` +### Per-column Parquet encoding and compression + +![Flow chart showing the syntax of per-column Parquet encoding and compression](/images/docs/diagrams/parquetEncodingDef.svg) + +Column definitions may include an optional `PARQUET` clause followed by +`ENCODING`, `COMPRESSION`, or both. These settings only affect +[Parquet partitions](/docs/query/export-parquet/#in-place-conversion) and are +ignored for native partitions. Both keywords are optional and can be used +independently or together. + +```questdb-sql title="CREATE TABLE with per-column Parquet config" +CREATE TABLE sensors ( + ts TIMESTAMP, + temperature DOUBLE PARQUET ENCODING rle_dictionary COMPRESSION zstd 3, + humidity FLOAT PARQUET ENCODING rle_dictionary, + device_id VARCHAR PARQUET COMPRESSION lz4_raw, + status INT +) TIMESTAMP(ts) PARTITION BY DAY; +``` + +When omitted, columns use the global defaults: a type-appropriate encoding and +the server-wide compression codec +(`cairo.partition.encoder.parquet.compression.codec`). + +#### Supported encodings + +| Encoding | SQL keyword | Valid column types | +| ----------------------- | ------------------------- | ---------------------------- | +| Plain | `plain` | All | +| RLE Dictionary | `rle_dictionary` | All except BOOLEAN and ARRAY | +| Delta Length Byte Array | `delta_length_byte_array` | STRING, BINARY, VARCHAR | +| Delta Binary Packed | `delta_binary_packed` | INT, LONG, DATE, TIMESTAMP | + +When no encoding is specified, QuestDB picks a type-appropriate default: +`rle_dictionary` for SYMBOL and VARCHAR, `delta_length_byte_array` for STRING +and BINARY, and `plain` for everything else. + +#### Supported compression codecs + +| Codec | SQL keyword | Level range | +| ------------ | -------------- | ----------- | +| Uncompressed | `uncompressed` | -- | +| Snappy | `snappy` | -- | +| Gzip | `gzip` | 1-9 | +| Brotli | `brotli` | 0-11 | +| Zstd | `zstd` | 1-22 | +| LZ4 Raw | `lz4_raw` | -- | + +To modify encoding or compression on existing tables, see +[ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING](/docs/query/sql/alter-table-alter-column-parquet-encoding/). + ### Casting types `castDef` - casts the type of a specific column. `columnRef` must reference diff --git a/documentation/query/sql/show.md b/documentation/query/sql/show.md index 51b8a2de5..66cc0fbc7 100644 --- a/documentation/query/sql/show.md +++ b/documentation/query/sql/show.md @@ -88,6 +88,21 @@ CREATE TABLE trades ( WITH maxUncommittedRows=500000, o3MaxLag=600000000us; ``` +#### Per-column Parquet encoding + +When columns have per-column Parquet encoding or compression overrides, they +appear in the `SHOW CREATE TABLE` output: + +```questdb-sql +CREATE TABLE sensors ( + ts TIMESTAMP, + temperature DOUBLE PARQUET ENCODING rle_dictionary COMPRESSION zstd 3, + humidity FLOAT PARQUET ENCODING rle_dictionary, + device_id VARCHAR PARQUET COMPRESSION lz4_raw, + status INT +) timestamp(ts) PARTITION BY DAY BYPASS WAL; +``` + #### Enterprise variant [QuestDB Enterprise](/enterprise/) will include an additional `OWNED BY` clause populated with the current user. diff --git a/documentation/sidebars.js b/documentation/sidebars.js index 645e259ef..f96b2e32d 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -264,6 +264,7 @@ module.exports = { "query/sql/alter-table-alter-column-cache", "query/sql/alter-table-change-column-type", "query/sql/alter-table-alter-column-drop-index", + "query/sql/alter-table-alter-column-parquet-encoding", "query/sql/alter-table-change-symbol-capacity", ], }, diff --git a/static/images/docs/diagrams/.railroad b/static/images/docs/diagrams/.railroad index e0536140e..a0258fdcd 100644 --- a/static/images/docs/diagrams/.railroad +++ b/static/images/docs/diagrams/.railroad @@ -379,6 +379,9 @@ refreshMatView dropMatView ::= 'DROP' 'MATERIALIZED' 'VIEW' ('IF' 'EXISTS')? viewName +parquetEncodingDef + ::= 'PARQUET' ( 'ENCODING' encoding )? ( 'COMPRESSION' codec level? )? + pivot ::= ( '(' selectQuery ')' | tableName ) ('WHERE' condition)? diff --git a/static/images/docs/diagrams/parquetEncodingDef.svg b/static/images/docs/diagrams/parquetEncodingDef.svg new file mode 100644 index 000000000..4fe4de4d9 --- /dev/null +++ b/static/images/docs/diagrams/parquetEncodingDef.svg @@ -0,0 +1,52 @@ + + + + + + + + + PARQUET + + + ENCODING + + + encoding + + COMPRESSION + + + codec + + + level + + + \ No newline at end of file From 750adc085c2270debd05226edf7d23e388805b4f Mon Sep 17 00:00:00 2001 From: Raphael DALMON Date: Fri, 6 Mar 2026 11:17:25 +0100 Subject: [PATCH 2/4] docs: update title and description for Parquet encoding/compression documentation --- .../query/sql/alter-table-alter-column-parquet-encoding.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/documentation/query/sql/alter-table-alter-column-parquet-encoding.md b/documentation/query/sql/alter-table-alter-column-parquet-encoding.md index 8387964d4..b3a38d110 100644 --- a/documentation/query/sql/alter-table-alter-column-parquet-encoding.md +++ b/documentation/query/sql/alter-table-alter-column-parquet-encoding.md @@ -1,7 +1,7 @@ --- -title: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING -sidebar_label: PARQUET ENCODING -description: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING SQL keyword reference documentation. +title: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING/COMPRESSION +sidebar_label: PARQUET ENCODING/COMPRESSION +description: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING/COMPRESSION SQL keyword reference documentation. --- Sets or removes per-column Parquet encoding and compression configuration on From 5891cca290d0d6f15be6ff152e616acb211fc3fc Mon Sep 17 00:00:00 2001 From: Raphael DALMON Date: Wed, 11 Mar 2026 10:15:24 +0100 Subject: [PATCH 3/4] update sql syntax --- documentation/query/export-parquet.md | 2 +- ...ter-table-alter-column-parquet-encoding.md | 24 ++++------- documentation/query/sql/create-table.md | 16 +++---- documentation/query/sql/show.md | 6 +-- static/images/docs/diagrams/.railroad | 2 +- .../docs/diagrams/parquetEncodingDef.svg | 42 +++++++++++-------- 6 files changed, 47 insertions(+), 45 deletions(-) diff --git a/documentation/query/export-parquet.md b/documentation/query/export-parquet.md index d67c5635e..a0b84e174 100644 --- a/documentation/query/export-parquet.md +++ b/documentation/query/export-parquet.md @@ -211,5 +211,5 @@ penalty of keeping pages that barely compress. Individual columns can override the global encoding and compression settings. See [CREATE TABLE - Per-column Parquet encoding and compression](/docs/query/sql/create-table/#per-column-parquet-encoding-and-compression) for defining overrides at table creation, or -[ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING](/docs/query/sql/alter-table-alter-column-parquet-encoding/) +[ALTER TABLE ALTER COLUMN SET/DROP PARQUET](/docs/query/sql/alter-table-alter-column-parquet-encoding/) for modifying existing tables. diff --git a/documentation/query/sql/alter-table-alter-column-parquet-encoding.md b/documentation/query/sql/alter-table-alter-column-parquet-encoding.md index b3a38d110..37d0bf73e 100644 --- a/documentation/query/sql/alter-table-alter-column-parquet-encoding.md +++ b/documentation/query/sql/alter-table-alter-column-parquet-encoding.md @@ -1,7 +1,7 @@ --- -title: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING/COMPRESSION +title: ALTER TABLE ALTER COLUMN SET/DROP PARQUET sidebar_label: PARQUET ENCODING/COMPRESSION -description: ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING/COMPRESSION SQL keyword reference documentation. +description: ALTER TABLE ALTER COLUMN SET/DROP PARQUET SQL keyword reference documentation. --- Sets or removes per-column Parquet encoding and compression configuration on @@ -12,33 +12,27 @@ ignored for native partitions. ## SET Override the default Parquet encoding, compression, or both for a column. +The syntax is `SET PARQUET(encoding [, compression[(level)]])`. Use `default` +for the encoding when specifying compression only. ```questdb-sql title="Set encoding only" -ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET ENCODING rle_dictionary; +ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET(rle_dictionary); ``` ```questdb-sql title="Set compression only (with optional level)" -ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET COMPRESSION zstd 3; +ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET(default, zstd(3)); ``` ```questdb-sql title="Set both encoding and compression" -ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET ENCODING rle_dictionary COMPRESSION zstd 3; +ALTER TABLE sensors ALTER COLUMN temperature SET PARQUET(rle_dictionary, zstd(3)); ``` ## DROP Reset per-column overrides back to the server defaults. -```questdb-sql title="Drop both encoding and compression overrides" -ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET ENCODING COMPRESSION; -``` - -```questdb-sql title="Drop encoding only (keeps compression override)" -ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET ENCODING; -``` - -```questdb-sql title="Drop compression only (keeps encoding override)" -ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET COMPRESSION; +```questdb-sql title="Reset to defaults" +ALTER TABLE sensors ALTER COLUMN temperature DROP PARQUET; ``` ## Supported encodings and codecs diff --git a/documentation/query/sql/create-table.md b/documentation/query/sql/create-table.md index af9c4fb0d..4664ea9c4 100644 --- a/documentation/query/sql/create-table.md +++ b/documentation/query/sql/create-table.md @@ -365,18 +365,18 @@ CREATE TABLE trades ( ![Flow chart showing the syntax of per-column Parquet encoding and compression](/images/docs/diagrams/parquetEncodingDef.svg) -Column definitions may include an optional `PARQUET` clause followed by -`ENCODING`, `COMPRESSION`, or both. These settings only affect +Column definitions may include an optional `PARQUET(encoding [, compression[(level)]])` +clause. These settings only affect [Parquet partitions](/docs/query/export-parquet/#in-place-conversion) and are -ignored for native partitions. Both keywords are optional and can be used -independently or together. +ignored for native partitions. Both encoding and compression are optional — use +`default` for the encoding when specifying compression only. ```questdb-sql title="CREATE TABLE with per-column Parquet config" CREATE TABLE sensors ( ts TIMESTAMP, - temperature DOUBLE PARQUET ENCODING rle_dictionary COMPRESSION zstd 3, - humidity FLOAT PARQUET ENCODING rle_dictionary, - device_id VARCHAR PARQUET COMPRESSION lz4_raw, + temperature DOUBLE PARQUET(rle_dictionary, zstd(3)), + humidity FLOAT PARQUET(rle_dictionary), + device_id VARCHAR PARQUET(default, lz4_raw), status INT ) TIMESTAMP(ts) PARTITION BY DAY; ``` @@ -410,7 +410,7 @@ and BINARY, and `plain` for everything else. | LZ4 Raw | `lz4_raw` | -- | To modify encoding or compression on existing tables, see -[ALTER TABLE ALTER COLUMN SET/DROP PARQUET ENCODING](/docs/query/sql/alter-table-alter-column-parquet-encoding/). +[ALTER TABLE ALTER COLUMN SET/DROP PARQUET](/docs/query/sql/alter-table-alter-column-parquet-encoding/). ### Casting types diff --git a/documentation/query/sql/show.md b/documentation/query/sql/show.md index 66cc0fbc7..4669e4d8c 100644 --- a/documentation/query/sql/show.md +++ b/documentation/query/sql/show.md @@ -96,9 +96,9 @@ appear in the `SHOW CREATE TABLE` output: ```questdb-sql CREATE TABLE sensors ( ts TIMESTAMP, - temperature DOUBLE PARQUET ENCODING rle_dictionary COMPRESSION zstd 3, - humidity FLOAT PARQUET ENCODING rle_dictionary, - device_id VARCHAR PARQUET COMPRESSION lz4_raw, + temperature DOUBLE PARQUET(rle_dictionary, zstd(3)), + humidity FLOAT PARQUET(rle_dictionary), + device_id VARCHAR PARQUET(default, lz4_raw), status INT ) timestamp(ts) PARTITION BY DAY BYPASS WAL; ``` diff --git a/static/images/docs/diagrams/.railroad b/static/images/docs/diagrams/.railroad index a0258fdcd..7e1f10612 100644 --- a/static/images/docs/diagrams/.railroad +++ b/static/images/docs/diagrams/.railroad @@ -380,7 +380,7 @@ dropMatView ::= 'DROP' 'MATERIALIZED' 'VIEW' ('IF' 'EXISTS')? viewName parquetEncodingDef - ::= 'PARQUET' ( 'ENCODING' encoding )? ( 'COMPRESSION' codec level? )? + ::= 'PARQUET' '(' encoding ( ',' compression-codec ( '(' level ')' )? )? ')' pivot ::= ( '(' selectQuery ')' | tableName ) diff --git a/static/images/docs/diagrams/parquetEncodingDef.svg b/static/images/docs/diagrams/parquetEncodingDef.svg index 4fe4de4d9..b352e5d6c 100644 --- a/static/images/docs/diagrams/parquetEncodingDef.svg +++ b/static/images/docs/diagrams/parquetEncodingDef.svg @@ -1,4 +1,4 @@ - + \ No newline at end of file From 942dfc3bfdfe2883971fc043beb6d92c6d491962 Mon Sep 17 00:00:00 2001 From: Raphael DALMON Date: Wed, 11 Mar 2026 17:20:08 +0100 Subject: [PATCH 4/4] docs: enhance Parquet encoding and compression documentation with detailed descriptions --- documentation/query/sql/create-table.md | 48 +++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/documentation/query/sql/create-table.md b/documentation/query/sql/create-table.md index 4664ea9c4..e07a8567f 100644 --- a/documentation/query/sql/create-table.md +++ b/documentation/query/sql/create-table.md @@ -394,6 +394,27 @@ the server-wide compression codec | Delta Length Byte Array | `delta_length_byte_array` | STRING, BINARY, VARCHAR | | Delta Binary Packed | `delta_binary_packed` | INT, LONG, DATE, TIMESTAMP | +- **Plain** — stores values as-is with no transformation. Simplest encoding + with no overhead. Use as a fallback when data has high cardinality and no + exploitable patterns (e.g. random floats or UUIDs). +- **RLE Dictionary** — builds a dictionary of unique values and replaces each + value with a short integer key. The keys are then encoded with a hybrid of + run-length encoding (for repeated consecutive keys) and bit-packing (for + non-repeating sequences). Best for low-to-medium cardinality columns (status + codes, device IDs, symbols). The lower the cardinality, the greater the + compression. +- **Delta Length Byte Array** — delta-encodes the lengths of consecutive + string/binary values, then stores the raw bytes back-to-back. This is the + Parquet-recommended encoding for byte array columns and is always preferred + over `plain` for STRING, BINARY, and VARCHAR. +- **Delta Binary Packed** — delta-encodes integer values and packs the deltas + into a compact binary representation. Effective for monotonically increasing + or slowly changing integer/timestamp columns (e.g. sequential IDs, event + timestamps). + +For the full specification of each encoding, see the +[Apache Parquet encodings documentation](https://parquet.apache.org/docs/file-format/data-pages/encodings/). + When no encoding is specified, QuestDB picks a type-appropriate default: `rle_dictionary` for SYMBOL and VARCHAR, `delta_length_byte_array` for STRING and BINARY, and `plain` for everything else. @@ -402,12 +423,33 @@ and BINARY, and `plain` for everything else. | Codec | SQL keyword | Level range | | ------------ | -------------- | ----------- | -| Uncompressed | `uncompressed` | -- | +| LZ4 Raw | `lz4_raw` | -- | +| Zstd | `zstd` | 1-22 | | Snappy | `snappy` | -- | | Gzip | `gzip` | 1-9 | | Brotli | `brotli` | 0-11 | -| Zstd | `zstd` | 1-22 | -| LZ4 Raw | `lz4_raw` | -- | +| Uncompressed | `uncompressed` | -- | + +- **LZ4 Raw** — extremely fast compression and decompression with a moderate + ratio. No tunable level. This is the QuestDB default and a good choice for + most workloads where query throughput matters. +- **Zstd** — excellent balance of compression ratio and speed across its level + range. Lower levels (1-3) approach LZ4 speed with better ratios; higher + levels (up to 22) rival Brotli ratios. A strong general-purpose choice when + storage savings justify slightly slower decompression. +- **Snappy** — very fast compression and decompression with moderate ratio. No + tunable level. Similar trade-offs to LZ4 Raw. +- **Gzip** — widely supported, higher compression ratio than Snappy or LZ4 at + the cost of slower decompression, which reduces query throughput. Higher + levels (up to 9) improve ratio but further increase CPU time. +- **Brotli** — achieves some of the highest compression ratios, especially at + higher levels, but decompression is significantly slower. Best suited for + cold/archival data where storage savings outweigh query throughput. +- **Uncompressed** — no compression. Fastest decompression (none needed) but + largest file size. Useful when data is already incompressible. + +For more details on Parquet compression, see the +[Apache Parquet compression documentation](https://parquet.apache.org/docs/file-format/data-pages/compression/). To modify encoding or compression on existing tables, see [ALTER TABLE ALTER COLUMN SET/DROP PARQUET](/docs/query/sql/alter-table-alter-column-parquet-encoding/).