diff --git a/documentation/configuration/configuration-utils/_cairo.config.json b/documentation/configuration/configuration-utils/_cairo.config.json index 20af3db59..2ef2b5143 100644 --- a/documentation/configuration/configuration-utils/_cairo.config.json +++ b/documentation/configuration/configuration-utils/_cairo.config.json @@ -419,6 +419,10 @@ "default": "1", "description": "Number of partition expected on average. Initial value for purge allocation job, extended in runtime automatically." }, + "cairo.sql.max.grouping.sets": { + "default": "4096", + "description": "Maximum number of grouping sets allowed in a single query. ROLLUP produces N+1 sets, CUBE produces 2^N sets, and explicit GROUPING SETS produces one set per listed group. Queries exceeding this limit are rejected at parse time." + }, "cairo.sql.parallel.groupby.enabled": { "default": "true", "description": "Enables parallel GROUP BY execution; requires at least 4 shared worker threads." diff --git a/documentation/cookbook/sql/time-series/fill-from-one-column.md b/documentation/cookbook/sql/time-series/fill-from-one-column.md index b7b97a9a6..509248d02 100644 --- a/documentation/cookbook/sql/time-series/fill-from-one-column.md +++ b/documentation/cookbook/sql/time-series/fill-from-one-column.md @@ -69,7 +69,7 @@ FROM with_previous_vals; :::info Related Documentation - [SAMPLE BY](/docs/query/sql/sample-by/) -- [FILL keyword](/docs/query/sql/sample-by/#fill-keywords) +- [FILL keyword](/docs/query/sql/sample-by/#fill-options) - [Window functions](/docs/query/functions/window-functions/syntax/) - [last_value()](/docs/query/functions/window-functions/reference/#last_value) ::: diff --git a/documentation/query/sql/cube.md b/documentation/query/sql/cube.md new file mode 100644 index 000000000..8523522ae --- /dev/null +++ b/documentation/query/sql/cube.md @@ -0,0 +1,7 @@ +--- +title: CUBE keyword +sidebar_label: CUBE +description: CUBE SQL keyword reference for computing all combinations of aggregation levels. +--- + +See [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/#cube). diff --git a/documentation/query/sql/group-by.md b/documentation/query/sql/group-by.md index f2346e756..458a4256c 100644 --- a/documentation/query/sql/group-by.md +++ b/documentation/query/sql/group-by.md @@ -9,7 +9,16 @@ is [optional](/docs/concepts/deep-dive/sql-extensions/#group-by-is-optional). ## Syntax -![Flow chart showing the syntax of the GROUP BY keyword](/images/docs/diagrams/groupBy.svg) +```questdb-sql +SELECT column [, ...], aggregate(column) [, ...] +FROM table +[WHERE condition] +GROUP BY + column [, ...] + | ROLLUP(column [, ...]) + | CUBE(column [, ...]) + | GROUPING SETS ((column [, ...]) [, ...]) +``` :::note @@ -72,6 +81,7 @@ GROUP BY a, b; ## See also +- [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/) - Compute subtotals and grand totals in a single query - [PIVOT](/docs/query/sql/pivot/) - Transform GROUP BY results from rows to columns - [SAMPLE BY](/docs/query/sql/sample-by/) - Time-series aggregation - [Aggregation functions](/docs/query/functions/aggregation/) - Available aggregate functions diff --git a/documentation/query/sql/grouping-sets.md b/documentation/query/sql/grouping-sets.md new file mode 100644 index 000000000..6a8979bd0 --- /dev/null +++ b/documentation/query/sql/grouping-sets.md @@ -0,0 +1,320 @@ +--- +title: GROUPING SETS, ROLLUP, and CUBE +sidebar_label: GROUPING SETS +description: GROUPING SETS, ROLLUP, and CUBE SQL keyword reference for computing multiple levels of aggregation in a single query. +--- + +`GROUPING SETS`, `ROLLUP`, and `CUBE` perform aggregation over multiple +dimensions within a single query. This can be used, for example, to compute +subtotals and grand totals alongside detail-level results, without multiple +passes over the data. + +## Syntax + +Grouping sets can be used with both `GROUP BY` and `SAMPLE BY`. + +With `GROUP BY`: + +```questdb-sql +SELECT column [, ...], aggregate(column) [, ...] +FROM table +[WHERE condition] +GROUP BY + column [, ...], + ROLLUP(column [, ...]) + | CUBE(column [, ...]) + | GROUPING SETS ((column [, ...]) [, ...]) +``` + +With `SAMPLE BY`: + +```questdb-sql +SELECT [column [, ...],] aggregate(column) [, ...] +FROM table +[WHERE condition] +SAMPLE BY n{units} + [ROLLUP(column [, ...]) | CUBE(column [, ...]) | GROUPING SETS (...)] + [FILL(...)] + [ALIGN TO ...] +``` + +## GROUPING SETS + +`GROUPING SETS` gives explicit control over which grouping combinations to +compute. Each set in the list produces its own group of aggregated rows. + +```questdb-sql title="Explicit grouping sets" demo +SELECT symbol, side, SUM(amount) AS total_amount, COUNT(*) AS trade_count +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY GROUPING SETS ( + (symbol, side), + (symbol), + () +); +``` + +- `(symbol, side)` groups by both columns (detail rows) +- `(symbol)` groups by symbol only (subtotals per symbol, `side` is `NULL`) +- `()` is the empty set, producing a single grand total row (both columns `NULL`) + +You can specify any combination of column subsets. `ROLLUP` and `CUBE` are +shorthand for common `GROUPING SETS` patterns. + +## ROLLUP + +`ROLLUP` generates hierarchical subtotals, progressively dropping columns from +right to left. With N columns, `ROLLUP` produces N+1 grouping sets. + +```questdb-sql title="Trade volume breakdown with ROLLUP" demo +SELECT symbol, side, + SUM(price * amount) AS volume, + COUNT(*) AS trades +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY ROLLUP(symbol, side) +ORDER BY symbol, side; +``` + +This produces: + +- Per-symbol, per-side detail rows +- Per-symbol subtotals (`side` is `NULL`) +- A single grand total row (both `NULL`) + +`ROLLUP(symbol, side)` is equivalent to: + +```questdb-sql +GROUP BY GROUPING SETS ( + (symbol, side), + (symbol), + () +) +``` + +With three columns, `ROLLUP(a, b, c)` produces four grouping sets: + +```questdb-sql +GROUP BY GROUPING SETS ( + (a, b, c), + (a, b), + (a), + () +) +``` + +## CUBE + +`CUBE` generates all possible combinations of the specified columns. With N +columns, `CUBE` produces 2^N grouping sets. + +```questdb-sql title="Cross-tabulation with CUBE" demo +SELECT symbol, side, + SUM(amount) AS total_amount, + GROUPING_ID(symbol, side) AS grp +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY CUBE(symbol, side) +ORDER BY grp, symbol, side; +``` + +`CUBE(symbol, side)` is equivalent to: + +```questdb-sql +GROUP BY GROUPING SETS ( + (symbol, side), -- both grouped + (symbol), -- symbol only + (side), -- side only + () -- grand total +) +``` + +Ordering by `GROUPING_ID` groups the output by aggregation level: + +- `grp=0`: all detail combinations +- `grp=1`: per-symbol totals (side rolled up) +- `grp=2`: per-side totals (symbol rolled up) +- `grp=3`: grand total + +`CUBE` is limited to 15 columns maximum (2^15 = 32,768 grouping sets). + +## Composite syntax + +Plain `GROUP BY` columns can be combined with `ROLLUP` or `CUBE`. The plain +columns are always included in every grouping set. + +```questdb-sql title="symbol always grouped, side rolled up" demo +SELECT symbol, side, SUM(amount) AS total_amount +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY symbol, ROLLUP(side); +``` + +This is equivalent to: + +```questdb-sql +GROUP BY GROUPING SETS ( + (symbol, side), + (symbol) +) +``` + +There is no empty set `()` here because `symbol` is always present. + +## GROUPING() and GROUPING_ID() functions + +When columns are rolled up, they appear as `NULL` in the result. The data might +also contain genuine `NULL` values. `GROUPING()` and `GROUPING_ID()` distinguish +between the two. + +### GROUPING(column) + +Accepts a single column. Returns: + +- `0` if the column is actively grouped (a `NULL` is a real data value) +- `1` if the column is rolled up (the `NULL` is a placeholder) + +```questdb-sql title="Identify rolled-up rows" demo +SELECT symbol, side, SUM(amount) AS total_amount, + GROUPING(symbol) AS gs, + GROUPING(side) AS gsd +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY ROLLUP(symbol, side) +ORDER BY gs, gsd, symbol, side; +``` + +In the results: + +| gs | gsd | Meaning | +| -- | --- | ------- | +| 0 | 0 | Detail row: both columns actively grouped | +| 0 | 1 | Subtotal: grouped by symbol, side rolled up | +| 1 | 1 | Grand total: both columns rolled up | + +### GROUPING_ID(column1, column2, ...) + +Accepts one or more columns. Returns an integer bitmask combining the +`GROUPING()` values of all specified columns. Bit positions are assigned +right-to-left: the rightmost argument occupies bit 0 (least significant bit). + +```questdb-sql title="Bitmask for aggregation levels" demo +SELECT symbol, side, SUM(amount) AS total_amount, + GROUPING_ID(symbol, side) AS grp +FROM trades +WHERE timestamp IN '$now-1m..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +GROUP BY CUBE(symbol, side) +ORDER BY grp, symbol, side; +``` + +For `GROUPING_ID(symbol, side)`, bit 1 is assigned to `symbol` and bit 0 to +`side`: + +| grp | Binary | Meaning | +| --- | ------ | ------- | +| 0 | 0b00 | Both columns grouped | +| 1 | 0b01 | `side` rolled up | +| 2 | 0b10 | `symbol` rolled up | +| 3 | 0b11 | Both rolled up (grand total) | + +Writing `GROUPING_ID(side, symbol)` would reverse the bit assignments. + +## SAMPLE BY integration + +Grouping sets work with QuestDB's `SAMPLE BY` clause for time-bucketed +aggregation with multiple rollup levels. + +```questdb-sql title="Hourly breakdown with ROLLUP" demo +SELECT timestamp, symbol, SUM(amount) AS total_amount, AVG(price) AS avg_price +FROM trades +WHERE timestamp IN '$now-1d..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +SAMPLE BY 1h ROLLUP(symbol) +ORDER BY timestamp, symbol; +``` + +Each time bucket contains one row per symbol plus one grand total row (where +`symbol` is `NULL`). The timestamp column is never rolled up - it is always +present as the time bucket key. + +### FILL support + +`FILL` works with grouping sets. Missing time buckets are filled per key +combination - each distinct (symbol, grouping level) pair gets its own fill row. + +```questdb-sql title="SAMPLE BY with FILL and ROLLUP" demo +SELECT timestamp, symbol, SUM(amount) AS total_amount, AVG(price) AS avg_price +FROM trades +WHERE timestamp IN '$now-1d..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +SAMPLE BY 1h ROLLUP(symbol) FILL(0) +ORDER BY timestamp, symbol; +``` + +Supported FILL modes: + +| FILL mode | Supported | +| ------------ | --------- | +| `FILL(NONE)` | Yes | +| `FILL(NULL)` | Yes | +| `FILL(value)` | Yes | +| `FILL(PREV)` | No | +| `FILL(LINEAR)` | No | + +`GROUPING()` and `GROUPING_ID()` values are preserved in fill rows. They are not +replaced by the fill value. + +```questdb-sql title="GROUPING values preserved in fill rows" demo +SELECT GROUPING(symbol) AS gs, timestamp, symbol, SUM(amount) AS total_amount +FROM trades +WHERE timestamp IN '$now-1d..$now' + AND symbol IN ('BTC-USDT', 'ETH-USDT') +SAMPLE BY 1h ROLLUP(symbol) FILL(NULL) +ORDER BY timestamp, gs, symbol; +``` + +A fill row for a missing hour shows `gs=0` for detail-level fills and `gs=1` for +grand-total-level fills, just like real data rows. Only aggregate columns get the +fill value. + +## Limitations + +- **Expressions not allowed** in `ROLLUP`, `CUBE`, or `GROUPING SETS` - only + column references are accepted. `ROLLUP(a + b)` is rejected; use a subquery or + alias. Plain columns in composite syntax (`GROUP BY expr, ROLLUP(col)`) are not + restricted. + +- **No mixed qualified/unqualified references** to the same column - + `ROLLUP(a, t.a)` is rejected. Use one form consistently. + +- **Not supported with `LATEST ON`** - rejected with an error. + +- **`FILL(PREV)` and `FILL(LINEAR)` not supported** with grouping sets. + +- **`CUBE` limited to 15 columns** (2^15 = 32,768 grouping sets). + +- **`GROUPING()` / `GROUPING_ID()` limited to 31 `GROUP BY` key columns** - the + bitmask is int-based. + +- **No multiple `ROLLUP`/`CUBE` in the same `GROUP BY`** - + `GROUP BY ROLLUP(a), CUBE(b)` is not supported. + +- **Maximum grouping sets per query** - controlled by the + `cairo.sql.max.grouping.sets` + [configuration property](/docs/configuration/overview/) (default 4096). + `ROLLUP` produces N+1 sets, `CUBE` produces 2^N sets, and explicit + `GROUPING SETS` produces one set per listed group. Queries exceeding this limit + are rejected at parse time. + +## See also + +- [GROUP BY](/docs/query/sql/group-by/) - Standard grouping +- [SAMPLE BY](/docs/query/sql/sample-by/) - Time-series aggregation +- [PIVOT](/docs/query/sql/pivot/) - Transform GROUP BY results from rows to columns +- [Aggregation functions](/docs/query/functions/aggregation/) - Available aggregate functions diff --git a/documentation/query/sql/rollup.md b/documentation/query/sql/rollup.md new file mode 100644 index 000000000..51d0f5850 --- /dev/null +++ b/documentation/query/sql/rollup.md @@ -0,0 +1,7 @@ +--- +title: ROLLUP keyword +sidebar_label: ROLLUP +description: ROLLUP SQL keyword reference for computing hierarchical subtotals. +--- + +See [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/#rollup). diff --git a/documentation/query/sql/sample-by.md b/documentation/query/sql/sample-by.md index 954fa2272..6b31ae0db 100644 --- a/documentation/query/sql/sample-by.md +++ b/documentation/query/sql/sample-by.md @@ -16,21 +16,17 @@ use of the [FILL](#fill-options) keyword to specify a fill behavior. ## Syntax -### SAMPLE BY keywords - -![Flow chart showing the syntax of the SAMPLE BY keywords](/images/docs/diagrams/sampleBy.svg) - -### FROM-TO keywords - -![Flow chart showing the syntax of the FROM-TO keywords](/images/docs/diagrams/fromTo.svg) - -### FILL keywords - -![Flow chart showing the syntax of the FILL keyword](/images/docs/diagrams/fill.svg) - -### ALIGN TO keywords - -![Flow chart showing the syntax of the ALIGN TO keywords](/images/docs/diagrams/alignToCalTimeZone.svg) +```questdb-sql +SELECT [column [, ...],] aggregate(column) [, ...] +FROM table +[WHERE condition] +SAMPLE BY n{units} + [ROLLUP(column [, ...]) | CUBE(column [, ...]) | GROUPING SETS (...)] + [FROM timestamp TO timestamp] + [FILL(NONE | NULL | PREV | LINEAR | value [, ...])] + [ALIGN TO CALENDAR [TIME ZONE tz] [WITH OFFSET 'HH:mm'] + | ALIGN TO FIRST OBSERVATION] +``` ## Sample units @@ -136,6 +132,8 @@ restrictions apply: `NONE`, `NULL`, `PREV`, `LINEAR` and constants may be used. - `LINEAR` strategy is not supported for keyed queries, i.e. queries that contain non-aggregated columns other than the timestamp in the SELECT clause. +- `PREV` and `LINEAR` strategies are not supported with + [GROUPING SETS, ROLLUP, or CUBE](/docs/query/sql/grouping-sets/). - The `FILL` keyword must precede alignment described in the [sample calculation section](#sample-calculation), i.e.: @@ -593,6 +591,7 @@ SAMPLE BY 1h; This section includes links to additional information such as tutorials: +- [GROUPING SETS, ROLLUP, and CUBE](/docs/query/sql/grouping-sets/) - Compute subtotals and grand totals within time buckets - [PIVOT](/docs/query/sql/pivot/) - Transform SAMPLE BY results from rows to columns - [Materialized Views](/docs/concepts/materialized-views/) - Pre-compute SAMPLE BY queries for better performance - [SQL Extensions for Time-Series Data in QuestDB](/blog/2022/11/23/sql-extensions-time-series-data-questdb-part-ii/) diff --git a/documentation/sidebars.js b/documentation/sidebars.js index 645e259ef..a72ee6468 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -414,16 +414,19 @@ module.exports = { "query/sql/asof-join", "query/sql/case", "query/sql/cast", + "query/sql/cube", "query/sql/declare", "query/sql/distinct", "query/sql/fill", "query/sql/group-by", + "query/sql/grouping-sets", "query/sql/horizon-join", "query/sql/join", "query/sql/latest-on", "query/sql/limit", "query/sql/order-by", "query/sql/pivot", + "query/sql/rollup", "query/sql/sample-by", "query/sql/where", "query/sql/window-join",