From 5f30556f80fb93477264cd2835354fd5d3f31da6 Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 6 Mar 2026 14:00:05 +0100 Subject: [PATCH] Add GROUPING SETS, ROLLUP, and CUBE documentation Documents the new SQL:1999 grouping sets support. Adds a main reference page with syntax, examples, SAMPLE BY integration, and limitations. Includes CUBE and ROLLUP redirect pages, sidebar entries, updated GROUP BY and SAMPLE BY syntax blocks, and the cairo.sql.max.grouping.sets configuration property. --- .../configuration-utils/_cairo.config.json | 4 + .../sql/time-series/fill-from-one-column.md | 2 +- documentation/query/sql/cube.md | 7 + documentation/query/sql/group-by.md | 12 +- documentation/query/sql/grouping-sets.md | 320 ++++++++++++++++++ documentation/query/sql/rollup.md | 7 + documentation/query/sql/sample-by.md | 29 +- documentation/sidebars.js | 3 + 8 files changed, 367 insertions(+), 17 deletions(-) create mode 100644 documentation/query/sql/cube.md create mode 100644 documentation/query/sql/grouping-sets.md create mode 100644 documentation/query/sql/rollup.md diff --git a/documentation/configuration/configuration-utils/_cairo.config.json b/documentation/configuration/configuration-utils/_cairo.config.json index 20af3db59..2ef2b5143 100644 --- a/documentation/configuration/configuration-utils/_cairo.config.json +++ b/documentation/configuration/configuration-utils/_cairo.config.json @@ -419,6 +419,10 @@ "default": "1", "description": "Number of partition expected on average. Initial value for purge allocation job, extended in runtime automatically." }, + "cairo.sql.max.grouping.sets": { + "default": "4096", + "description": "Maximum number of grouping sets allowed in a single query. ROLLUP produces N+1 sets, CUBE produces 2^N sets, and explicit GROUPING SETS produces one set per listed group. Queries exceeding this limit are rejected at parse time." + }, "cairo.sql.parallel.groupby.enabled": { "default": "true", "description": "Enables parallel GROUP BY execution; requires at least 4 shared worker threads." diff --git a/documentation/cookbook/sql/time-series/fill-from-one-column.md b/documentation/cookbook/sql/time-series/fill-from-one-column.md index b7b97a9a6..509248d02 100644 --- a/documentation/cookbook/sql/time-series/fill-from-one-column.md +++ b/documentation/cookbook/sql/time-series/fill-from-one-column.md @@ -69,7 +69,7 @@ FROM with_previous_vals; :::info Related Documentation - [SAMPLE BY](/docs/query/sql/sample-by/) -- [FILL keyword](/docs/query/sql/sample-by/#fill-keywords) +- [FILL keyword](/docs/query/sql/sample-by/#fill-options) - [Window functions](/docs/query/functions/window-functions/syntax/) - [last_value()](/docs/query/functions/window-functions/reference/#last_value) ::: diff --git a/documentation/query/sql/cube.md b/documentation/query/sql/cube.md new file mode 100644 index 000000000..8523522ae --- /dev/null +++ b/documentation/query/sql/cube.md @@ -0,0 +1,7 @@ +--- +title: CUBE keyword +sidebar_label: CUBE +description: CUBE SQL keyword reference for computing all combinations of aggregation levels. +--- + +See [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/#cube). diff --git a/documentation/query/sql/group-by.md b/documentation/query/sql/group-by.md index f2346e756..458a4256c 100644 --- a/documentation/query/sql/group-by.md +++ b/documentation/query/sql/group-by.md @@ -9,7 +9,16 @@ is [optional](/docs/concepts/deep-dive/sql-extensions/#group-by-is-optional). ## Syntax -![Flow chart showing the syntax of the GROUP BY keyword](/images/docs/diagrams/groupBy.svg) +```questdb-sql +SELECT column [, ...], aggregate(column) [, ...] +FROM table +[WHERE condition] +GROUP BY + column [, ...] + | ROLLUP(column [, ...]) + | CUBE(column [, ...]) + | GROUPING SETS ((column [, ...]) [, ...]) +``` :::note @@ -72,6 +81,7 @@ GROUP BY a, b; ## See also +- [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/) - Compute subtotals and grand totals in a single query - [PIVOT](/docs/query/sql/pivot/) - Transform GROUP BY results from rows to columns - [SAMPLE BY](/docs/query/sql/sample-by/) - Time-series aggregation - [Aggregation functions](/docs/query/functions/aggregation/) - Available aggregate functions diff --git a/documentation/query/sql/grouping-sets.md b/documentation/query/sql/grouping-sets.md new file mode 100644 index 000000000..6a8979bd0 --- /dev/null +++ b/documentation/query/sql/grouping-sets.md @@ -0,0 +1,320 @@ +--- +title: GROUPING SETS, ROLLUP, and CUBE +sidebar_label: GROUPING SETS +description: GROUPING SETS, ROLLUP, and CUBE SQL keyword reference for computing multiple levels of aggregation in a single query. +--- + +`GROUPING SETS`, `ROLLUP`, and `CUBE` perform aggregation over multiple +dimensions within a single query. This can be used, for example, to compute +subtotals and grand totals alongside detail-level results, without multiple +passes over the data. + +## Syntax + +Grouping sets can be used with both `GROUP BY` and `SAMPLE BY`. + +With `GROUP BY`: + +```questdb-sql +SELECT column [, ...], aggregate(column) [, ...] +FROM table +[WHERE condition] +GROUP BY + column [, ...], + ROLLUP(column [, ...]) + | CUBE(column [, ...]) + | GROUPING SETS ((column [, ...]) [, ...]) +``` + +With `SAMPLE BY`: + +```questdb-sql +SELECT [column [, ...],] aggregate(column) [, ...] +FROM table +[WHERE condition] +SAMPLE BY n{units} + [ROLLUP(column [, ...]) | CUBE(column [, ...]) | GROUPING SETS (...)] + [FILL(...)] + [ALIGN TO ...] +``` + +## GROUPING SETS + +`GROUPING SETS` gives explicit control over which grouping combinations to +compute. Each set in the list produces its own group of aggregated rows. + +```questdb-sql title="Explicit grouping sets" demo +SELECT symbol, side, SUM(amount) AS total_amount, COUNT(*) AS trade_count +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY GROUPING SETS ( + (symbol, side), + (symbol), + () +); +``` + +- `(symbol, side)` groups by both columns (detail rows) +- `(symbol)` groups by symbol only (subtotals per symbol, `side` is `NULL`) +- `()` is the empty set, producing a single grand total row (both columns `NULL`) + +You can specify any combination of column subsets. `ROLLUP` and `CUBE` are +shorthand for common `GROUPING SETS` patterns. + +## ROLLUP + +`ROLLUP` generates hierarchical subtotals, progressively dropping columns from +right to left. With N columns, `ROLLUP` produces N+1 grouping sets. + +```questdb-sql title="Trade volume breakdown with ROLLUP" demo +SELECT symbol, side, + SUM(price * amount) AS volume, + COUNT(*) AS trades +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY ROLLUP(symbol, side) +ORDER BY symbol, side; +``` + +This produces: + +- Per-symbol, per-side detail rows +- Per-symbol subtotals (`side` is `NULL`) +- A single grand total row (both `NULL`) + +`ROLLUP(symbol, side)` is equivalent to: + +```questdb-sql +GROUP BY GROUPING SETS ( + (symbol, side), + (symbol), + () +) +``` + +With three columns, `ROLLUP(a, b, c)` produces four grouping sets: + +```questdb-sql +GROUP BY GROUPING SETS ( + (a, b, c), + (a, b), + (a), + () +) +``` + +## CUBE + +`CUBE` generates all possible combinations of the specified columns. With N +columns, `CUBE` produces 2^N grouping sets. + +```questdb-sql title="Cross-tabulation with CUBE" demo +SELECT symbol, side, + SUM(amount) AS total_amount, + GROUPING_ID(symbol, side) AS grp +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY CUBE(symbol, side) +ORDER BY grp, symbol, side; +``` + +`CUBE(symbol, side)` is equivalent to: + +```questdb-sql +GROUP BY GROUPING SETS ( + (symbol, side), -- both grouped + (symbol), -- symbol only + (side), -- side only + () -- grand total +) +``` + +Ordering by `GROUPING_ID` groups the output by aggregation level: + +- `grp=0`: all detail combinations +- `grp=1`: per-symbol totals (side rolled up) +- `grp=2`: per-side totals (symbol rolled up) +- `grp=3`: grand total + +`CUBE` is limited to 15 columns maximum (2^15 = 32,768 grouping sets). + +## Composite syntax + +Plain `GROUP BY` columns can be combined with `ROLLUP` or `CUBE`. The plain +columns are always included in every grouping set. + +```questdb-sql title="symbol always grouped, side rolled up" demo +SELECT symbol, side, SUM(amount) AS total_amount +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY symbol, ROLLUP(side); +``` + +This is equivalent to: + +```questdb-sql +GROUP BY GROUPING SETS ( + (symbol, side), + (symbol) +) +``` + +There is no empty set `()` here because `symbol` is always present. + +## GROUPING() and GROUPING_ID() functions + +When columns are rolled up, they appear as `NULL` in the result. The data might +also contain genuine `NULL` values. `GROUPING()` and `GROUPING_ID()` distinguish +between the two. + +### GROUPING(column) + +Accepts a single column. Returns: + +- `0` if the column is actively grouped (a `NULL` is a real data value) +- `1` if the column is rolled up (the `NULL` is a placeholder) + +```questdb-sql title="Identify rolled-up rows" demo +SELECT symbol, side, SUM(amount) AS total_amount, + GROUPING(symbol) AS gs, + GROUPING(side) AS gsd +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY ROLLUP(symbol, side) +ORDER BY gs, gsd, symbol, side; +``` + +In the results: + +| gs | gsd | Meaning | +| -- | --- | ------- | +| 0 | 0 | Detail row: both columns actively grouped | +| 0 | 1 | Subtotal: grouped by symbol, side rolled up | +| 1 | 1 | Grand total: both columns rolled up | + +### GROUPING_ID(column1, column2, ...) + +Accepts one or more columns. Returns an integer bitmask combining the +`GROUPING()` values of all specified columns. Bit positions are assigned +right-to-left: the rightmost argument occupies bit 0 (least significant bit). + +```questdb-sql title="Bitmask for aggregation levels" demo +SELECT symbol, side, SUM(amount) AS total_amount, + GROUPING_ID(symbol, side) AS grp +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY CUBE(symbol, side) +ORDER BY grp, symbol, side; +``` + +For `GROUPING_ID(symbol, side)`, bit 1 is assigned to `symbol` and bit 0 to +`side`: + +| grp | Binary | Meaning | +| --- | ------ | ------- | +| 0 | 0b00 | Both columns grouped | +| 1 | 0b01 | `side` rolled up | +| 2 | 0b10 | `symbol` rolled up | +| 3 | 0b11 | Both rolled up (grand total) | + +Writing `GROUPING_ID(side, symbol)` would reverse the bit assignments. + +## SAMPLE BY integration + +Grouping sets work with QuestDB's `SAMPLE BY` clause for time-bucketed +aggregation with multiple rollup levels. + +```questdb-sql title="Hourly breakdown with ROLLUP" demo +SELECT timestamp, symbol, SUM(amount) AS total_amount, AVG(price) AS avg_price +FROM trades +WHERE timestamp IN '$now-1d..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +SAMPLE BY 1h ROLLUP(symbol) +ORDER BY timestamp, symbol; +``` + +Each time bucket contains one row per symbol plus one grand total row (where +`symbol` is `NULL`). The timestamp column is never rolled up - it is always +present as the time bucket key. + +### FILL support + +`FILL` works with grouping sets. Missing time buckets are filled per key +combination - each distinct (symbol, grouping level) pair gets its own fill row. + +```questdb-sql title="SAMPLE BY with FILL and ROLLUP" demo +SELECT timestamp, symbol, SUM(amount) AS total_amount, AVG(price) AS avg_price +FROM trades +WHERE timestamp IN '$now-1d..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +SAMPLE BY 1h ROLLUP(symbol) FILL(0) +ORDER BY timestamp, symbol; +``` + +Supported FILL modes: + +| FILL mode | Supported | +| ------------ | --------- | +| `FILL(NONE)` | Yes | +| `FILL(NULL)` | Yes | +| `FILL(value)` | Yes | +| `FILL(PREV)` | No | +| `FILL(LINEAR)` | No | + +`GROUPING()` and `GROUPING_ID()` values are preserved in fill rows. They are not +replaced by the fill value. + +```questdb-sql title="GROUPING values preserved in fill rows" demo +SELECT GROUPING(symbol) AS gs, timestamp, symbol, SUM(amount) AS total_amount +FROM trades +WHERE timestamp IN '$now-1d..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +SAMPLE BY 1h ROLLUP(symbol) FILL(NULL) +ORDER BY timestamp, gs, symbol; +``` + +A fill row for a missing hour shows `gs=0` for detail-level fills and `gs=1` for +grand-total-level fills, just like real data rows. Only aggregate columns get the +fill value. + +## Limitations + +- **Expressions not allowed** in `ROLLUP`, `CUBE`, or `GROUPING SETS` - only + column references are accepted. `ROLLUP(a + b)` is rejected; use a subquery or + alias. Plain columns in composite syntax (`GROUP BY expr, ROLLUP(col)`) are not + restricted. + +- **No mixed qualified/unqualified references** to the same column - + `ROLLUP(a, t.a)` is rejected. Use one form consistently. + +- **Not supported with `LATEST ON`** - rejected with an error. + +- **`FILL(PREV)` and `FILL(LINEAR)` not supported** with grouping sets. + +- **`CUBE` limited to 15 columns** (2^15 = 32,768 grouping sets). + +- **`GROUPING()` / `GROUPING_ID()` limited to 31 `GROUP BY` key columns** - the + bitmask is int-based. + +- **No multiple `ROLLUP`/`CUBE` in the same `GROUP BY`** - + `GROUP BY ROLLUP(a), CUBE(b)` is not supported. + +- **Maximum grouping sets per query** - controlled by the + `cairo.sql.max.grouping.sets` + [configuration property](/docs/configuration/overview/) (default 4096). + `ROLLUP` produces N+1 sets, `CUBE` produces 2^N sets, and explicit + `GROUPING SETS` produces one set per listed group. Queries exceeding this limit + are rejected at parse time. + +## See also + +- [GROUP BY](/docs/query/sql/group-by/) - Standard grouping +- [SAMPLE BY](/docs/query/sql/sample-by/) - Time-series aggregation +- [PIVOT](/docs/query/sql/pivot/) - Transform GROUP BY results from rows to columns +- [Aggregation functions](/docs/query/functions/aggregation/) - Available aggregate functions diff --git a/documentation/query/sql/rollup.md b/documentation/query/sql/rollup.md new file mode 100644 index 000000000..51d0f5850 --- /dev/null +++ b/documentation/query/sql/rollup.md @@ -0,0 +1,7 @@ +--- +title: ROLLUP keyword +sidebar_label: ROLLUP +description: ROLLUP SQL keyword reference for computing hierarchical subtotals. +--- + +See [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/#rollup). diff --git a/documentation/query/sql/sample-by.md b/documentation/query/sql/sample-by.md index 954fa2272..6b31ae0db 100644 --- a/documentation/query/sql/sample-by.md +++ b/documentation/query/sql/sample-by.md @@ -16,21 +16,17 @@ use of the [FILL](#fill-options) keyword to specify a fill behavior. ## Syntax -### SAMPLE BY keywords - -![Flow chart showing the syntax of the SAMPLE BY keywords](/images/docs/diagrams/sampleBy.svg) - -### FROM-TO keywords - -![Flow chart showing the syntax of the FROM-TO keywords](/images/docs/diagrams/fromTo.svg) - -### FILL keywords - -![Flow chart showing the syntax of the FILL keyword](/images/docs/diagrams/fill.svg) - -### ALIGN TO keywords - -![Flow chart showing the syntax of the ALIGN TO keywords](/images/docs/diagrams/alignToCalTimeZone.svg) +```questdb-sql +SELECT [column [, ...],] aggregate(column) [, ...] +FROM table +[WHERE condition] +SAMPLE BY n{units} + [ROLLUP(column [, ...]) | CUBE(column [, ...]) | GROUPING SETS (...)] + [FROM timestamp TO timestamp] + [FILL(NONE | NULL | PREV | LINEAR | value [, ...])] + [ALIGN TO CALENDAR [TIME ZONE tz] [WITH OFFSET 'HH:mm'] + | ALIGN TO FIRST OBSERVATION] +``` ## Sample units @@ -136,6 +132,8 @@ restrictions apply: `NONE`, `NULL`, `PREV`, `LINEAR` and constants may be used. - `LINEAR` strategy is not supported for keyed queries, i.e. queries that contain non-aggregated columns other than the timestamp in the SELECT clause. +- `PREV` and `LINEAR` strategies are not supported with + [GROUPING SETS, ROLLUP, or CUBE](/docs/query/sql/grouping-sets/). - The `FILL` keyword must precede alignment described in the [sample calculation section](#sample-calculation), i.e.: @@ -593,6 +591,7 @@ SAMPLE BY 1h; This section includes links to additional information such as tutorials: +- [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/) - Compute subtotals and grand totals within time buckets - [PIVOT](/docs/query/sql/pivot/) - Transform SAMPLE BY results from rows to columns - [Materialized Views](/docs/concepts/materialized-views/) - Pre-compute SAMPLE BY queries for better performance - [SQL Extensions for Time-Series Data in QuestDB](/blog/2022/11/23/sql-extensions-time-series-data-questdb-part-ii/) diff --git a/documentation/sidebars.js b/documentation/sidebars.js index 645e259ef..a72ee6468 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -414,16 +414,19 @@ module.exports = { "query/sql/asof-join", "query/sql/case", "query/sql/cast", + "query/sql/cube", "query/sql/declare", "query/sql/distinct", "query/sql/fill", "query/sql/group-by", + "query/sql/grouping-sets", "query/sql/horizon-join", "query/sql/join", "query/sql/latest-on", "query/sql/limit", "query/sql/order-by", "query/sql/pivot", + "query/sql/rollup", "query/sql/sample-by", "query/sql/where", "query/sql/window-join",