diff --git a/.github/workflows/nightly-bench.yml b/.github/workflows/nightly-bench.yml index b89395516be..6452267990b 100644 --- a/.github/workflows/nightly-bench.yml +++ b/.github/workflows/nightly-bench.yml @@ -64,6 +64,86 @@ jobs: {"engine": "duckdb", "format": "vortex"} ], "scale_factor": "100.0" + }, + { + "id": "sqlstorm-stackoverflow-nvme", + "subcommand": "sqlstorm", + "name": "SQLStorm (stackoverflow) on NVME", + "origin": "stackoverflow", + "data_formats": ["parquet", "vortex"], + "pr_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "develop_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "iterations": "3" + }, + { + "id": "sqlstorm-job-nvme", + "subcommand": "sqlstorm", + "name": "SQLStorm (job) on NVME", + "origin": "job", + "data_formats": ["parquet", "vortex"], + "pr_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "develop_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "iterations": "3" + }, + { + "id": "sqlstorm-tpch-nvme", + "subcommand": "sqlstorm", + "name": "SQLStorm (tpch) on NVME", + "origin": "tpch", + "data_formats": ["parquet", "vortex"], + "pr_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "develop_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "iterations": "3" + }, + { + "id": "sqlstorm-tpcds-nvme", + "subcommand": "sqlstorm", + "name": "SQLStorm (tpcds) on NVME", + "origin": "tpcds", + "data_formats": ["parquet", "vortex"], + "pr_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "develop_targets": [ + {"engine": "datafusion", "format": "parquet"}, + {"engine": "datafusion", "format": "vortex"}, + {"engine": "duckdb", "format": "parquet"}, + {"engine": "duckdb", "format": "vortex"} + ], + "iterations": "3" } ] strategy: diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 8e6ed6088e2..8db27ca5f72 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -358,6 +358,7 @@ jobs: run: | uv run --project bench-orchestrator vx-bench prepare-data "${{ matrix.subcommand }}" \ --formats-json '${{ toJSON(matrix.data_formats) }}' \ + ${{ matrix.origin && format('--opt origin={0}', matrix.origin) || '' }} \ ${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }} - name: Setup AWS CLI @@ -403,6 +404,7 @@ jobs: --no-build \ --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ + ${{ matrix.origin && format('--opt origin={0}', matrix.origin) || '' }} \ ${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }} - name: Run ${{ matrix.name }} benchmark (remote) @@ -424,6 +426,7 @@ jobs: --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ --opt remote-data-dir=${{ matrix.remote_storage }} \ + ${{ matrix.origin && format('--opt origin={0}', matrix.origin) || '' }} \ ${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }} - name: Capture file sizes diff --git a/bench-orchestrator/README.md b/bench-orchestrator/README.md index 0b267008a85..dab70666c94 100644 --- a/bench-orchestrator/README.md +++ b/bench-orchestrator/README.md @@ -41,7 +41,7 @@ vx-bench run [options] **Arguments:** -- `benchmark`: Benchmark suite to run (`appian`, `tpch`, `tpcds`, `clickbench`, `fineweb`, `gh-archive`, `polarsignals`, `public-bi`, `statpopgen`) +- `benchmark`: Benchmark suite to run (`appian`, `tpch`, `tpcds`, `clickbench`, `fineweb`, `gh-archive`, `polarsignals`, `public-bi`, `statpopgen`, `sqlstorm`) **Options:** diff --git a/bench-orchestrator/bench_orchestrator/config.py b/bench-orchestrator/bench_orchestrator/config.py index fb90ac8bd6a..1e3226192f7 100644 --- a/bench-orchestrator/bench_orchestrator/config.py +++ b/bench-orchestrator/bench_orchestrator/config.py @@ -51,6 +51,7 @@ class Benchmark(Enum): POLARSIGNALS = "polarsignals" PUBLIC_BI = "public-bi" STATPOPGEN = "statpopgen" + SQLSTORM = "sqlstorm" # Engine to supported formats mapping. diff --git a/vortex-bench/sqlstorm/README.md b/vortex-bench/sqlstorm/README.md new file mode 100644 index 00000000000..b027ec62a5f --- /dev/null +++ b/vortex-bench/sqlstorm/README.md @@ -0,0 +1,86 @@ +# vortex-bench SQLStorm queries + +[SQLStorm] is an LLM-generated SQL stress suite — ~62k queries across four +schemas, broad enough to exercise SQL surface that TPC-H and TPC-DS don't. +This directory holds a small, confirmed-working sample (125 queries per +schema, 500 total) that the nightly bench runs against TPC-H and TPC-DS data +generated at SF10 plus two larger datasets we download for the non-TPC +schemas. Queries are pinned at SHA +[`b3bb0b96794a6afe9bb8f3ff2b243562b779c40d`][pinned-sqlstorm]. + +[SQLStorm]: https://github.com/SQL-Storm/SQLStorm +[pinned-sqlstorm]: https://github.com/SQL-Storm/SQLStorm/tree/b3bb0b96794a6afe9bb8f3ff2b243562b779c40d + +## Layout + +- `/.sql` — 125 queries per origin, 4 origins, 500 total. + `` is the upstream SQLStorm query id (sparse, non-sequential). + +| Origin | Source data | Upstream SQLStorm dir | +| --- | --- | --- | +| `tpch` | TPC-H generated at SF10 (`data/tpch/10.0/`) | `v1.0/tpch/` | +| `tpcds` | TPC-DS generated at SF10 (`data/tpcds/10.0/`) | `v1.0/tpcds/` | +| `stackoverflow` | `stackoverflow_math.tar.gz` (~12 GB) from `db.in.tum.de` | `v1.0/stackoverflow/` | +| `job` | `imdb.tzst` from `db.in.tum.de` | `v1.0/job/` | + +The benchmark runs strict — a query failure aborts the run rather than +silently dropping a row, so any regression that breaks a query in nightly +is loud. The vendored set was curated to be the intersection of queries +that pass DuckDB and DataFusion against the source data; that is why a +small, confirmed-working sample lives in-tree and the full ~62k SQLStorm +corpus does not. + +## Data size (fixed scale) + +**There is no SQLStorm scale factor.** Each origin runs at a single fixed +size, and `vx-bench run sqlstorm` does **not** read `--opt scale-factor` — +passing one is silently ignored (it is not an error and changes nothing). The +four origins are sized to sit within the same order of magnitude as JOB: + +| Origin | Fixed size | ~Rows (all tables) | ~Parquet | +| --- | --- | --- | --- | +| `stackoverflow` | the `math` tier, ~12 GB | 40 M | 6.1 GB | +| `job` | the full IMDB/JOB snapshot (fixed real dataset) | 74 M | 1.7 GB | +| `tpch` | SF 10 | 87 M | 3.5 GB | +| `tpcds` | SF 10 | 192 M | 3.9 GB | + +This mirrors upstream: SQLStorm has no uniform scale knob either. OLAPBench +(the canonical runner) selects size *per origin* — StackOverflow ships at +0 / 1 GB (`dba`) / 12 GB (`math`) / 222 GB, TPC-H/TPC-DS scale via their own +generators, and JOB is fixed. Query *validity* is scale-independent; only row +counts change with size. The fixed points above are set in code — the TPC +scale by `SQLSTORM_TPC_SCALE_FACTOR` (`sqlstorm_benchmark.rs`) and the +StackOverflow tier by the `STACKOVERFLOW` recipe's tarball URL (`data.rs`) — +so changing them means editing those consts (and re-curating, since the +vendored queries are selected to stay short at the configured scale), not +passing a runtime scale factor. + +## Refreshing the vendored set + +Swaps happen by hand against the pinned SHA above: clone the SQLStorm +corpus at that SHA, pick candidates from `v1.0//queries/`, and +verify each runs cleanly on both DuckDB and DataFusion **at the configured +scale** (SF10 / `math`) before vendoring. Candidates must also stay short +— the vendored set is curated to keep each query under ~5 s/engine at scale +so the nightly stays bounded; drop anything slower and refill. One gotcha: +verify against the bench's own DataFusion `SessionContext`, **not** +`datafusion-cli` — the cli decorrelates more subqueries than the harness can +physically plan and reports false-positive passes on queries the harness then +can't actually run. + +## Running + +The four origins are nightly-only matrix entries in +`.github/workflows/nightly-bench.yml`. Locally: + +``` +vx-bench run sqlstorm --opt origin=tpch # tpch | tpcds | stackoverflow | job +``` + +TPC-H / TPC-DS generate their own SF10 datasets under +`vortex-bench/data/tpch/10.0/` and `vortex-bench/data/tpcds/10.0/` (no longer +shared with the standalone SF1 benchmarks). StackOverflow / JOB download and +convert their upstream tarballs to Parquet under +`vortex-bench/data/sqlstorm//parquet/` on first run (idempotent via a +`.success` marker). The StackOverflow `math` tarball is ~12 GB and needs +~30 GB of scratch to extract and load. diff --git a/vortex-bench/sqlstorm/job/10088.sql b/vortex-bench/sqlstorm/job/10088.sql new file mode 100644 index 00000000000..e1605b4ca04 --- /dev/null +++ b/vortex-bench/sqlstorm/job/10088.sql @@ -0,0 +1,24 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.kind AS cast_type, + m.info AS movie_info, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + movie_info m ON t.id = m.movie_id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +JOIN + comp_cast_type c ON ci.role_id = c.id +WHERE + t.production_year = 2020 +ORDER BY + a.name, t.title; diff --git a/vortex-bench/sqlstorm/job/10166.sql b/vortex-bench/sqlstorm/job/10166.sql new file mode 100644 index 00000000000..34b6749a332 --- /dev/null +++ b/vortex-bench/sqlstorm/job/10166.sql @@ -0,0 +1,29 @@ +SELECT + a.name AS aka_name, + t.title AS movie_title, + c.note AS cast_note, + c.nr_order AS cast_order, + n.name AS person_name, + rt.role AS role, + m.info AS movie_info, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + name n ON a.person_id = n.imdb_id +JOIN + role_type rt ON c.role_id = rt.id +JOIN + movie_info m ON t.id = m.movie_id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year = 2020 +ORDER BY + t.title, c.nr_order; diff --git a/vortex-bench/sqlstorm/job/10176.sql b/vortex-bench/sqlstorm/job/10176.sql new file mode 100644 index 00000000000..cb534ff4a01 --- /dev/null +++ b/vortex-bench/sqlstorm/job/10176.sql @@ -0,0 +1,24 @@ +SELECT + a.name AS aka_name, + t.title AS movie_title, + c.note AS cast_note, + co.name AS company_name, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name co ON mc.company_id = co.id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/10194.sql b/vortex-bench/sqlstorm/job/10194.sql new file mode 100644 index 00000000000..d3e7265677e --- /dev/null +++ b/vortex-bench/sqlstorm/job/10194.sql @@ -0,0 +1,25 @@ +SELECT + a.name AS aka_name, + t.title AS movie_title, + c.nr_order AS cast_order, + n.name AS person_name, + p.info AS person_info, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + name n ON c.person_id = n.imdb_id +JOIN + person_info p ON n.id = p.person_id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC, c.nr_order; diff --git a/vortex-bench/sqlstorm/job/10228.sql b/vortex-bench/sqlstorm/job/10228.sql new file mode 100644 index 00000000000..4d9ae56fcf5 --- /dev/null +++ b/vortex-bench/sqlstorm/job/10228.sql @@ -0,0 +1,22 @@ +SELECT + a.name AS actor_name, + m.title AS movie_title, + m.production_year, + c.kind AS cast_type, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + aka_title m ON ci.movie_id = m.id +JOIN + comp_cast_type c ON ci.person_role_id = c.id +JOIN + movie_keyword mk ON m.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +WHERE + m.production_year > 2000 +ORDER BY + m.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/10231.sql b/vortex-bench/sqlstorm/job/10231.sql new file mode 100644 index 00000000000..41050f309f6 --- /dev/null +++ b/vortex-bench/sqlstorm/job/10231.sql @@ -0,0 +1,32 @@ +SELECT + t.title, + a.name AS actor_name, + c.kind AS comp_cast_type, + m.name AS company_name, + k.keyword, + i.info +FROM + title t +JOIN + cast_info ci ON t.id = ci.movie_id +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + comp_cast_type c ON ci.role_id = c.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name m ON mc.company_id = m.id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +JOIN + movie_info mi ON t.id = mi.movie_id +JOIN + info_type i ON mi.info_type_id = i.id +WHERE + t.production_year >= 2000 + AND m.country_code = 'USA' +ORDER BY + t.title, a.name; diff --git a/vortex-bench/sqlstorm/job/10489.sql b/vortex-bench/sqlstorm/job/10489.sql new file mode 100644 index 00000000000..bf4a39437de --- /dev/null +++ b/vortex-bench/sqlstorm/job/10489.sql @@ -0,0 +1,22 @@ +SELECT + a.name AS aka_name, + t.title AS movie_title, + c.note AS cast_note, + ri.role AS person_role, + m.name AS company_name +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name m ON mc.company_id = m.id +JOIN + role_type ri ON c.role_id = ri.id +WHERE + t.production_year = 2022 +ORDER BY + t.title, a.name; diff --git a/vortex-bench/sqlstorm/job/10846.sql b/vortex-bench/sqlstorm/job/10846.sql new file mode 100644 index 00000000000..1ac41a5cb86 --- /dev/null +++ b/vortex-bench/sqlstorm/job/10846.sql @@ -0,0 +1,22 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + r.role AS actor_role, + c.kind AS company_type +FROM + cast_info ci +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type c ON mc.company_type_id = c.id +JOIN + role_type r ON ci.role_id = r.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/111.sql b/vortex-bench/sqlstorm/job/111.sql new file mode 100644 index 00000000000..c973a6b6699 --- /dev/null +++ b/vortex-bench/sqlstorm/job/111.sql @@ -0,0 +1,70 @@ +WITH RankedTitles AS ( + SELECT + t.id AS title_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS rank_per_year + FROM + title t + WHERE + t.production_year IS NOT NULL +), +ActorMovies AS ( + SELECT + ci.movie_id, + ak.name AS actor_name, + ROW_NUMBER() OVER (PARTITION BY ci.movie_id ORDER BY ak.name) AS actor_rank + FROM + cast_info ci + JOIN + aka_name ak ON ci.person_id = ak.person_id + WHERE + ci.role_id IN (SELECT id FROM role_type WHERE role = 'actor') +), +MovieKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keyword_list + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +), +CompanyInfo AS ( + SELECT + mc.movie_id, + COALESCE(cn.name, 'Unknown Company') AS company_name, + ct.kind AS company_type + FROM + movie_companies mc + LEFT JOIN + company_name cn ON mc.company_id = cn.id + LEFT JOIN + company_type ct ON mc.company_type_id = ct.id +) +SELECT + rt.title AS Movie_Title, + rt.production_year AS Production_Year, + am.actor_name AS Actor, + mk.keyword_list AS Keywords, + ci.company_name AS Production_Company, + ci.company_type AS Company_Type, + am.actor_rank, + rt.rank_per_year +FROM + RankedTitles rt +LEFT JOIN + ActorMovies am ON rt.title_id = am.movie_id +LEFT JOIN + MovieKeywords mk ON rt.title_id = mk.movie_id +LEFT JOIN + CompanyInfo ci ON rt.title_id = ci.movie_id +WHERE + rt.rank_per_year <= 3 + AND rt.production_year > 2000 +ORDER BY + rt.production_year DESC, + rt.title ASC, + am.actor_rank; diff --git a/vortex-bench/sqlstorm/job/11405.sql b/vortex-bench/sqlstorm/job/11405.sql new file mode 100644 index 00000000000..dc4988d9ab0 --- /dev/null +++ b/vortex-bench/sqlstorm/job/11405.sql @@ -0,0 +1,26 @@ +SELECT + t.title AS movie_title, + a.name AS actor_name, + r.role AS role_name, + c.note AS cast_note, + m.info AS movie_info +FROM + title t +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name cn ON mc.company_id = cn.id +JOIN + cast_info c ON t.id = c.movie_id +JOIN + aka_name a ON c.person_id = a.person_id +JOIN + role_type r ON c.role_id = r.id +JOIN + movie_info m ON t.id = m.movie_id +WHERE + t.production_year >= 2000 + AND cn.country_code = 'USA' +ORDER BY + t.production_year DESC, + a.name; diff --git a/vortex-bench/sqlstorm/job/11571.sql b/vortex-bench/sqlstorm/job/11571.sql new file mode 100644 index 00000000000..e2d3e8e85c3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/11571.sql @@ -0,0 +1,18 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + r.role AS role_type +FROM + cast_info ci +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + role_type r ON ci.role_id = r.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC, + actor_name; diff --git a/vortex-bench/sqlstorm/job/12132.sql b/vortex-bench/sqlstorm/job/12132.sql new file mode 100644 index 00000000000..ef0357f4dc8 --- /dev/null +++ b/vortex-bench/sqlstorm/job/12132.sql @@ -0,0 +1,26 @@ +SELECT + t.title, + a.name AS actor_name, + c.kind AS company_type, + k.keyword, + m.info +FROM + title t +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type c ON mc.company_type_id = c.id +JOIN + complete_cast cc ON t.id = cc.movie_id +JOIN + aka_name a ON cc.subject_id = a.person_id +JOIN + movie_info m ON t.id = m.movie_id +WHERE + t.production_year > 2000 +ORDER BY + t.title, a.name; diff --git a/vortex-bench/sqlstorm/job/1233.sql b/vortex-bench/sqlstorm/job/1233.sql new file mode 100644 index 00000000000..2da21119a8f --- /dev/null +++ b/vortex-bench/sqlstorm/job/1233.sql @@ -0,0 +1,64 @@ + +WITH ranked_movies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + COUNT(DISTINCT ci.person_id) AS total_cast, + RANK() OVER (PARTITION BY t.production_year ORDER BY COUNT(DISTINCT ci.person_id) DESC) AS rank_within_year + FROM + aka_title t + LEFT JOIN + cast_info ci ON t.id = ci.movie_id + GROUP BY + t.id, t.title, t.production_year +), +high_cast_movies AS ( + SELECT + rm.movie_id, + rm.title, + rm.production_year, + rm.total_cast + FROM + ranked_movies rm + WHERE + rm.rank_within_year <= 5 +), +movie_keywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +), +final_result AS ( + SELECT + hcm.movie_id, + hcm.title, + hcm.production_year, + hcm.total_cast, + COALESCE(mk.keywords, 'No Keywords') AS keywords + FROM + high_cast_movies hcm + LEFT JOIN + movie_keywords mk ON hcm.movie_id = mk.movie_id +) +SELECT + f.movie_id, + f.title, + f.production_year, + f.total_cast, + f.keywords, + CASE + WHEN f.total_cast IS NULL THEN 'Unknown' + ELSE CAST(f.total_cast AS VARCHAR) || ' Cast Members' + END AS cast_info +FROM + final_result f +ORDER BY + f.production_year DESC, + f.total_cast DESC; diff --git a/vortex-bench/sqlstorm/job/12424.sql b/vortex-bench/sqlstorm/job/12424.sql new file mode 100644 index 00000000000..0bb2a577521 --- /dev/null +++ b/vortex-bench/sqlstorm/job/12424.sql @@ -0,0 +1,23 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + r.role AS role_name, + c.kind AS comp_cast_type +FROM + cast_info ci +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + role_type r ON ci.role_id = r.id +JOIN + complete_cast cc ON ci.movie_id = cc.movie_id AND ci.person_id = cc.subject_id +JOIN + comp_cast_type c ON ci.person_role_id = c.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC, + a.name; diff --git a/vortex-bench/sqlstorm/job/13057.sql b/vortex-bench/sqlstorm/job/13057.sql new file mode 100644 index 00000000000..9dea40d32f5 --- /dev/null +++ b/vortex-bench/sqlstorm/job/13057.sql @@ -0,0 +1,23 @@ +SELECT + a.name AS aka_name, + t.title AS movie_title, + c.note AS cast_note, + n.name AS person_name, + rt.role AS role, + ci.kind AS comp_cast_type +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + name n ON a.person_id = n.imdb_id +JOIN + role_type rt ON c.role_id = rt.id +JOIN + comp_cast_type ci ON c.person_role_id = ci.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/1333.sql b/vortex-bench/sqlstorm/job/1333.sql new file mode 100644 index 00000000000..dd5b2d73d8b --- /dev/null +++ b/vortex-bench/sqlstorm/job/1333.sql @@ -0,0 +1,54 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS title_rank + FROM + aka_title t + WHERE + t.kind_id = (SELECT id FROM kind_type WHERE kind = 'movie') +), FilteredCast AS ( + SELECT + c.movie_id, + COUNT(c.person_id) AS cast_count, + STRING_AGG(a.name, ', ') AS cast_names + FROM + cast_info c + JOIN + aka_name a ON c.person_id = a.person_id + WHERE + c.nr_order < 10 + GROUP BY + c.movie_id +), MovieCompanies AS ( + SELECT + mc.movie_id, + COUNT(DISTINCT mc.company_id) AS company_count + FROM + movie_companies mc + JOIN + company_name cn ON mc.company_id = cn.id + WHERE + cn.country_code IS NOT NULL + GROUP BY + mc.movie_id +) +SELECT + rm.movie_id, + rm.title, + rm.production_year, + coalesce(fc.cast_count, 0) AS total_cast, + coalesce(fc.cast_names, 'No cast information available') AS cast_members, + coalesce(mc.company_count, 0) AS total_companies +FROM + RankedMovies rm +LEFT JOIN + FilteredCast fc ON rm.movie_id = fc.movie_id +LEFT JOIN + MovieCompanies mc ON rm.movie_id = mc.movie_id +WHERE + rm.production_year BETWEEN 2000 AND 2020 +ORDER BY + rm.production_year DESC, + rm.title_rank; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/13822.sql b/vortex-bench/sqlstorm/job/13822.sql new file mode 100644 index 00000000000..cda630a6a59 --- /dev/null +++ b/vortex-bench/sqlstorm/job/13822.sql @@ -0,0 +1,18 @@ +SELECT + t.title AS movie_title, + t.production_year, + a.name AS actor_name, + r.role AS actor_role, + c.name AS company_name, + k.keyword AS movie_keyword +FROM title t +JOIN movie_companies mc ON t.id = mc.movie_id +JOIN company_name c ON mc.company_id = c.id +JOIN complete_cast cc ON t.id = cc.movie_id +JOIN cast_info ci ON cc.subject_id = ci.id +JOIN aka_name a ON ci.person_id = a.person_id +JOIN role_type r ON ci.role_id = r.id +JOIN movie_keyword mk ON t.id = mk.movie_id +JOIN keyword k ON mk.keyword_id = k.id +WHERE t.production_year >= 2000 +ORDER BY t.production_year, t.title; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/1413.sql b/vortex-bench/sqlstorm/job/1413.sql new file mode 100644 index 00000000000..1d02343bfaa --- /dev/null +++ b/vortex-bench/sqlstorm/job/1413.sql @@ -0,0 +1,53 @@ +WITH RankedMovies AS ( + SELECT + t.title, + t.production_year, + COUNT(DISTINCT ci.person_id) AS total_cast, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY COUNT(DISTINCT ci.person_id) DESC) AS rank + FROM + aka_title t + LEFT JOIN + cast_info ci ON t.id = ci.movie_id + GROUP BY + t.id, t.title, t.production_year +), +TopMovies AS ( + SELECT + rm.title, + rm.production_year, + rm.total_cast + FROM + RankedMovies rm + WHERE + rm.rank <= 5 +), +FilteredKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keyword_list + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +) +SELECT + tm.title, + tm.production_year, + tm.total_cast, + COALESCE(fk.keyword_list, 'No Keywords') AS keywords, + COALESCE(cn.name, 'Undisclosed Company') AS production_company +FROM + TopMovies tm +LEFT JOIN + movie_companies mc ON tm.production_year = mc.movie_id +LEFT JOIN + company_name cn ON mc.company_id = cn.id +LEFT JOIN + FilteredKeywords fk ON tm.production_year = fk.movie_id +WHERE + tm.total_cast > 0 +ORDER BY + tm.production_year DESC, + tm.total_cast DESC; diff --git a/vortex-bench/sqlstorm/job/14152.sql b/vortex-bench/sqlstorm/job/14152.sql new file mode 100644 index 00000000000..7220e2fd5cb --- /dev/null +++ b/vortex-bench/sqlstorm/job/14152.sql @@ -0,0 +1,20 @@ +SELECT + a.name AS actor_name, + m.title AS movie_title, + c.role_id AS role_id, + t.production_year AS production_year, + p.info AS person_info +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title m ON c.movie_id = m.movie_id +JOIN + title t ON m.id = t.id +JOIN + person_info p ON a.person_id = p.person_id +WHERE + p.info_type_id = (SELECT id FROM info_type WHERE info = 'Biography') +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/14839.sql b/vortex-bench/sqlstorm/job/14839.sql new file mode 100644 index 00000000000..135bc57f10b --- /dev/null +++ b/vortex-bench/sqlstorm/job/14839.sql @@ -0,0 +1,25 @@ + +SELECT + t.title AS movie_title, + a.name AS actor_name, + ci.nr_order AS actor_order, + ct.kind AS company_type, + COUNT(mk.keyword_id) AS keyword_count +FROM + title t +JOIN + complete_cast cc ON t.id = cc.movie_id +JOIN + cast_info ci ON ci.id = cc.subject_id +JOIN + aka_name a ON a.person_id = ci.person_id +JOIN + movie_companies mc ON mc.movie_id = t.id +JOIN + company_type ct ON ct.id = mc.company_type_id +JOIN + movie_keyword mk ON mk.movie_id = t.id +GROUP BY + t.title, a.name, ci.nr_order, ct.kind +ORDER BY + t.title, actor_order; diff --git a/vortex-bench/sqlstorm/job/15050.sql b/vortex-bench/sqlstorm/job/15050.sql new file mode 100644 index 00000000000..bd40bf7c080 --- /dev/null +++ b/vortex-bench/sqlstorm/job/15050.sql @@ -0,0 +1,17 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.nr_order AS cast_order, + p.info AS person_info +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +LEFT JOIN + person_info p ON a.person_id = p.person_id +WHERE + t.production_year = 2023 +ORDER BY + a.name, c.nr_order; diff --git a/vortex-bench/sqlstorm/job/15183.sql b/vortex-bench/sqlstorm/job/15183.sql new file mode 100644 index 00000000000..3852e7b32b3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/15183.sql @@ -0,0 +1,14 @@ +SELECT + t.title, + a.name AS actor_name, + c.note AS character_role +FROM + title t +JOIN + cast_info c ON t.id = c.movie_id +JOIN + aka_name a ON c.person_id = a.person_id +WHERE + t.production_year > 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/15362.sql b/vortex-bench/sqlstorm/job/15362.sql new file mode 100644 index 00000000000..9016d23a2ae --- /dev/null +++ b/vortex-bench/sqlstorm/job/15362.sql @@ -0,0 +1,5 @@ +SELECT a.name AS actor_name, t.title AS movie_title, c.note AS role_note +FROM aka_name a +JOIN cast_info c ON a.person_id = c.person_id +JOIN aka_title t ON c.movie_id = t.movie_id +WHERE t.production_year = 2023; diff --git a/vortex-bench/sqlstorm/job/15382.sql b/vortex-bench/sqlstorm/job/15382.sql new file mode 100644 index 00000000000..08257aa7daa --- /dev/null +++ b/vortex-bench/sqlstorm/job/15382.sql @@ -0,0 +1,17 @@ +SELECT + a.id AS aka_id, + a.name AS aka_name, + t.id AS title_id, + t.title AS movie_title, + c.person_role_id, + p.info AS person_info +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + person_info p ON a.person_id = p.person_id +WHERE + t.production_year = 2023; diff --git a/vortex-bench/sqlstorm/job/15404.sql b/vortex-bench/sqlstorm/job/15404.sql new file mode 100644 index 00000000000..571e40f4d09 --- /dev/null +++ b/vortex-bench/sqlstorm/job/15404.sql @@ -0,0 +1,17 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + r.role AS role +FROM + cast_info c +JOIN + aka_name a ON c.person_id = a.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + role_type r ON c.role_id = r.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year ASC; diff --git a/vortex-bench/sqlstorm/job/15493.sql b/vortex-bench/sqlstorm/job/15493.sql new file mode 100644 index 00000000000..9798fa342e1 --- /dev/null +++ b/vortex-bench/sqlstorm/job/15493.sql @@ -0,0 +1,17 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + r.role AS role_type +FROM + cast_info ci +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + role_type r ON ci.role_id = r.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/15729.sql b/vortex-bench/sqlstorm/job/15729.sql new file mode 100644 index 00000000000..832f7f6894f --- /dev/null +++ b/vortex-bench/sqlstorm/job/15729.sql @@ -0,0 +1,12 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.nr_order AS cast_order +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +WHERE + t.production_year = 2020; diff --git a/vortex-bench/sqlstorm/job/15732.sql b/vortex-bench/sqlstorm/job/15732.sql new file mode 100644 index 00000000000..09789de7aa3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/15732.sql @@ -0,0 +1,16 @@ +SELECT + t.title, + a.name AS actor_name, + r.role +FROM + title t +JOIN + cast_info c ON t.id = c.movie_id +JOIN + aka_name a ON c.person_id = a.person_id +JOIN + role_type r ON c.role_id = r.id +WHERE + t.production_year = 2020 +ORDER BY + t.title; diff --git a/vortex-bench/sqlstorm/job/16132.sql b/vortex-bench/sqlstorm/job/16132.sql new file mode 100644 index 00000000000..be9736f3cf3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/16132.sql @@ -0,0 +1,18 @@ +SELECT + t.title, + a.name AS actor_name, + ci.note AS role_note +FROM + title t +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name cn ON mc.company_id = cn.id +JOIN + cast_info ci ON t.id = ci.movie_id +JOIN + aka_name a ON ci.person_id = a.person_id +WHERE + cn.country_code = 'USA' +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/16223.sql b/vortex-bench/sqlstorm/job/16223.sql new file mode 100644 index 00000000000..0027fdefe66 --- /dev/null +++ b/vortex-bench/sqlstorm/job/16223.sql @@ -0,0 +1,17 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + r.role AS role_name +FROM + cast_info ci +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + role_type r ON ci.role_id = r.id +WHERE + t.production_year >= 2020 +ORDER BY + t.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/1644.sql b/vortex-bench/sqlstorm/job/1644.sql new file mode 100644 index 00000000000..d94381c47b3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/1644.sql @@ -0,0 +1,70 @@ + +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.id) AS rn + FROM + aka_title t + WHERE + t.production_year IS NOT NULL +), +ActorRoles AS ( + SELECT + c.movie_id, + r.role, + COUNT(*) AS actor_count + FROM + cast_info c + JOIN + role_type r ON c.role_id = r.id + GROUP BY + c.movie_id, r.role +), +MovieCompanyDetails AS ( + SELECT + mc.movie_id, + cn.name AS company_name, + ct.kind AS company_type, + COUNT(*) OVER (PARTITION BY mc.movie_id) AS num_companies + FROM + movie_companies mc + JOIN + company_name cn ON mc.company_id = cn.id + JOIN + company_type ct ON mc.company_type_id = ct.id +), +TitlesWithKeyword AS ( + SELECT + m.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword m + JOIN + keyword k ON m.keyword_id = k.id + GROUP BY + m.movie_id +) +SELECT + r.movie_id, + r.title, + r.production_year, + COALESCE(ar.role, 'Unknown Role') AS actor_role, + COALESCE(ar.actor_count, 0) AS number_of_actors, + COALESCE(mcd.company_name, 'No Company') AS company_name, + COALESCE(mcd.company_type, 'Unknown Type') AS company_type, + COALESCE(mcd.num_companies, 0) AS total_companies, + COALESCE(t.keywords, 'No Keywords') AS keywords +FROM + RankedMovies r +LEFT JOIN + ActorRoles ar ON r.movie_id = ar.movie_id +LEFT JOIN + MovieCompanyDetails mcd ON r.movie_id = mcd.movie_id +LEFT JOIN + TitlesWithKeyword t ON r.movie_id = t.movie_id +WHERE + r.rn <= 10 +ORDER BY + r.production_year DESC, r.movie_id; diff --git a/vortex-bench/sqlstorm/job/16563.sql b/vortex-bench/sqlstorm/job/16563.sql new file mode 100644 index 00000000000..98044fdd49d --- /dev/null +++ b/vortex-bench/sqlstorm/job/16563.sql @@ -0,0 +1,6 @@ +SELECT a.name AS actor_name, t.title AS movie_title, c.note AS role_note +FROM aka_name a +JOIN cast_info c ON a.person_id = c.person_id +JOIN aka_title t ON c.movie_id = t.movie_id +WHERE t.production_year >= 2000 +ORDER BY t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/16875.sql b/vortex-bench/sqlstorm/job/16875.sql new file mode 100644 index 00000000000..86ca7a7f154 --- /dev/null +++ b/vortex-bench/sqlstorm/job/16875.sql @@ -0,0 +1,9 @@ +SELECT a.name AS actor_name, + t.title AS movie_title, + c.role_id, + c.nr_order +FROM cast_info c +JOIN aka_name a ON c.person_id = a.person_id +JOIN aka_title t ON c.movie_id = t.movie_id +WHERE t.production_year = 2020 +ORDER BY a.name, t.title; diff --git a/vortex-bench/sqlstorm/job/16910.sql b/vortex-bench/sqlstorm/job/16910.sql new file mode 100644 index 00000000000..7780420d304 --- /dev/null +++ b/vortex-bench/sqlstorm/job/16910.sql @@ -0,0 +1,14 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +WHERE + t.production_year > 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/16957.sql b/vortex-bench/sqlstorm/job/16957.sql new file mode 100644 index 00000000000..63098399f89 --- /dev/null +++ b/vortex-bench/sqlstorm/job/16957.sql @@ -0,0 +1,14 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +WHERE + t.production_year > 2000 +ORDER BY + a.name; diff --git a/vortex-bench/sqlstorm/job/17009.sql b/vortex-bench/sqlstorm/job/17009.sql new file mode 100644 index 00000000000..1cab5865bbd --- /dev/null +++ b/vortex-bench/sqlstorm/job/17009.sql @@ -0,0 +1,15 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.nr_order AS role_order +FROM + cast_info AS c +JOIN + aka_name AS a ON c.person_id = a.person_id +JOIN + aka_title AS t ON c.movie_id = t.movie_id +WHERE + t.production_year > 2000 +ORDER BY + t.production_year DESC, + c.nr_order; diff --git a/vortex-bench/sqlstorm/job/17248.sql b/vortex-bench/sqlstorm/job/17248.sql new file mode 100644 index 00000000000..9ae5598cdd6 --- /dev/null +++ b/vortex-bench/sqlstorm/job/17248.sql @@ -0,0 +1,18 @@ +SELECT + t.title, + a.name, + c.note +FROM + title t +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name cn ON mc.company_id = cn.id +JOIN + cast_info c ON t.id = c.movie_id +JOIN + aka_name a ON c.person_id = a.person_id +WHERE + t.production_year >= 2020 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/17492.sql b/vortex-bench/sqlstorm/job/17492.sql new file mode 100644 index 00000000000..5361e81b7d6 --- /dev/null +++ b/vortex-bench/sqlstorm/job/17492.sql @@ -0,0 +1,14 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/17556.sql b/vortex-bench/sqlstorm/job/17556.sql new file mode 100644 index 00000000000..dc4020bd906 --- /dev/null +++ b/vortex-bench/sqlstorm/job/17556.sql @@ -0,0 +1,5 @@ +SELECT a.name AS aka_name, t.title AS movie_title, c.note AS role_note +FROM aka_name a +JOIN cast_info c ON a.person_id = c.person_id +JOIN aka_title t ON c.movie_id = t.movie_id +WHERE t.production_year = 2020; diff --git a/vortex-bench/sqlstorm/job/17600.sql b/vortex-bench/sqlstorm/job/17600.sql new file mode 100644 index 00000000000..1be07196e29 --- /dev/null +++ b/vortex-bench/sqlstorm/job/17600.sql @@ -0,0 +1,16 @@ +SELECT + a.name AS alias_name, + t.title AS movie_title, + r.role AS role_name +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + role_type r ON c.role_id = r.id +WHERE + t.production_year > 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/17641.sql b/vortex-bench/sqlstorm/job/17641.sql new file mode 100644 index 00000000000..7012db34b0d --- /dev/null +++ b/vortex-bench/sqlstorm/job/17641.sql @@ -0,0 +1,19 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + c.kind AS company_type +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + aka_title t ON ci.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type c ON mc.company_type_id = c.id +WHERE + t.production_year > 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/17995.sql b/vortex-bench/sqlstorm/job/17995.sql new file mode 100644 index 00000000000..073759d1f73 --- /dev/null +++ b/vortex-bench/sqlstorm/job/17995.sql @@ -0,0 +1,16 @@ +SELECT + t.title, + a.name AS actor_name, + r.role AS role_type +FROM + title t +JOIN + cast_info ci ON t.id = ci.movie_id +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + role_type r ON ci.role_id = r.id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/18253.sql b/vortex-bench/sqlstorm/job/18253.sql new file mode 100644 index 00000000000..9f932c718c9 --- /dev/null +++ b/vortex-bench/sqlstorm/job/18253.sql @@ -0,0 +1,17 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.nr_order AS role_order, + r.role AS role_name +FROM + cast_info c +JOIN + aka_name a ON c.person_id = a.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + role_type r ON c.role_id = r.id +WHERE + t.production_year = 2020 +ORDER BY + a.name, c.nr_order; diff --git a/vortex-bench/sqlstorm/job/18699.sql b/vortex-bench/sqlstorm/job/18699.sql new file mode 100644 index 00000000000..fcdac2ec860 --- /dev/null +++ b/vortex-bench/sqlstorm/job/18699.sql @@ -0,0 +1,17 @@ +SELECT + t.title, + a.name AS actor_name, + c.role_id, + m.info +FROM + title AS t +JOIN + cast_info AS c ON t.id = c.movie_id +JOIN + aka_name AS a ON c.person_id = a.person_id +JOIN + movie_info AS m ON t.id = m.movie_id +WHERE + t.production_year = 2023 +ORDER BY + t.title; diff --git a/vortex-bench/sqlstorm/job/18820.sql b/vortex-bench/sqlstorm/job/18820.sql new file mode 100644 index 00000000000..268ad16db66 --- /dev/null +++ b/vortex-bench/sqlstorm/job/18820.sql @@ -0,0 +1,7 @@ +SELECT a.name AS actor_name, t.title AS movie_title, c.kind AS company_type +FROM aka_name a +JOIN cast_info ci ON a.person_id = ci.person_id +JOIN aka_title t ON ci.movie_id = t.movie_id +JOIN movie_companies mc ON t.id = mc.movie_id +JOIN company_type c ON mc.company_type_id = c.id +WHERE t.production_year = 2020; diff --git a/vortex-bench/sqlstorm/job/18983.sql b/vortex-bench/sqlstorm/job/18983.sql new file mode 100644 index 00000000000..d818d6c688a --- /dev/null +++ b/vortex-bench/sqlstorm/job/18983.sql @@ -0,0 +1,16 @@ +SELECT + t.title, + a.name AS actor_name, + r.role AS role_name +FROM + title t +JOIN + cast_info ci ON t.id = ci.movie_id +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + role_type r ON ci.role_id = r.id +WHERE + t.production_year > 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/19007.sql b/vortex-bench/sqlstorm/job/19007.sql new file mode 100644 index 00000000000..0a841af3864 --- /dev/null +++ b/vortex-bench/sqlstorm/job/19007.sql @@ -0,0 +1,16 @@ +SELECT + t.title AS movie_title, + a.name AS actor_name, + r.role AS role +FROM + title t +JOIN + cast_info c ON t.id = c.movie_id +JOIN + aka_name a ON c.person_id = a.person_id +JOIN + role_type r ON c.role_id = r.id +WHERE + t.production_year = 2023 +ORDER BY + t.title; diff --git a/vortex-bench/sqlstorm/job/19236.sql b/vortex-bench/sqlstorm/job/19236.sql new file mode 100644 index 00000000000..f0814568155 --- /dev/null +++ b/vortex-bench/sqlstorm/job/19236.sql @@ -0,0 +1,15 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year AS release_year, + c.role_id AS character_id +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +WHERE + t.production_year > 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/19413.sql b/vortex-bench/sqlstorm/job/19413.sql new file mode 100644 index 00000000000..f3a852f7d5d --- /dev/null +++ b/vortex-bench/sqlstorm/job/19413.sql @@ -0,0 +1,15 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year AS production_year, + c.role_id AS role_id +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +WHERE + t.production_year >= 2000 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/19811.sql b/vortex-bench/sqlstorm/job/19811.sql new file mode 100644 index 00000000000..324e1e583f5 --- /dev/null +++ b/vortex-bench/sqlstorm/job/19811.sql @@ -0,0 +1,17 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + r.role AS role_name +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + role_type r ON ci.role_id = r.id +WHERE + r.role = 'Actor' +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/19828.sql b/vortex-bench/sqlstorm/job/19828.sql new file mode 100644 index 00000000000..6034182158f --- /dev/null +++ b/vortex-bench/sqlstorm/job/19828.sql @@ -0,0 +1,9 @@ +SELECT a.name, t.title, c.note, ci.kind +FROM aka_name a +JOIN cast_info c ON a.person_id = c.person_id +JOIN aka_title t ON c.movie_id = t.movie_id +JOIN movie_companies mc ON t.id = mc.movie_id +JOIN company_name cn ON mc.company_id = cn.id +JOIN comp_cast_type ci ON c.person_role_id = ci.id +WHERE t.production_year > 2000 +ORDER BY t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/19997.sql b/vortex-bench/sqlstorm/job/19997.sql new file mode 100644 index 00000000000..9242c14ac32 --- /dev/null +++ b/vortex-bench/sqlstorm/job/19997.sql @@ -0,0 +1,14 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.note AS role_note +FROM + cast_info c +JOIN + aka_name a ON c.person_id = a.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +WHERE + t.production_year >= 2020 +ORDER BY + t.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/2014.sql b/vortex-bench/sqlstorm/job/2014.sql new file mode 100644 index 00000000000..ceb50978a74 --- /dev/null +++ b/vortex-bench/sqlstorm/job/2014.sql @@ -0,0 +1,72 @@ +WITH RankedTitles AS ( + SELECT + a.title, + a.production_year, + ROW_NUMBER() OVER (PARTITION BY a.production_year ORDER BY a.production_year DESC) AS rank + FROM + aka_title AS a + WHERE + a.production_year IS NOT NULL +), +ActorMovieCounts AS ( + SELECT + ci.person_id, + COUNT(DISTINCT ci.movie_id) AS movie_count + FROM + cast_info AS ci + GROUP BY + ci.person_id +), +RecentMovies AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + COALESCE(k.keyword, 'No Keyword') AS keyword, + CASE + WHEN m.production_year >= 2000 THEN 'Modern' + ELSE 'Classic' + END AS era + FROM + aka_title AS m + LEFT JOIN + movie_keyword AS mk ON m.id = mk.movie_id + LEFT JOIN + keyword AS k ON mk.keyword_id = k.id + WHERE + m.production_year IS NOT NULL AND + m.production_year > (SELECT AVG(production_year) FROM aka_title) +), +SelectedActors AS ( + SELECT + p.id AS person_id, + p.name, + a.movie_count + FROM + aka_name AS p + JOIN + ActorMovieCounts AS a ON p.person_id = a.person_id + WHERE + a.movie_count > 5 +) +SELECT + rm.title, + rm.production_year, + rm.keyword, + sa.name AS actor_name, + sa.movie_count, + rt.rank +FROM + RecentMovies AS rm +JOIN + complete_cast AS cc ON rm.movie_id = cc.movie_id +JOIN + SelectedActors AS sa ON cc.subject_id = sa.person_id +JOIN + RankedTitles AS rt ON rm.title = rt.title +WHERE + rm.era = 'Modern' AND + sa.name IS NOT NULL +ORDER BY + rm.production_year DESC, + sa.name; diff --git a/vortex-bench/sqlstorm/job/20839.sql b/vortex-bench/sqlstorm/job/20839.sql new file mode 100644 index 00000000000..558333bad7b --- /dev/null +++ b/vortex-bench/sqlstorm/job/20839.sql @@ -0,0 +1,67 @@ +WITH RankedMovies AS ( + SELECT + t.id AS title_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY SUM(COALESCE(mk.id, 0)) DESC) AS rank_by_keywords, + COUNT(DISTINCT ci.person_id) AS total_cast_members, + SUM(CASE WHEN ci.note IS NOT NULL THEN 1 ELSE 0 END) AS has_notes + FROM + aka_title t + LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id + LEFT JOIN + complete_cast cc ON t.id = cc.movie_id + LEFT JOIN + cast_info ci ON cc.subject_id = ci.id + GROUP BY + t.id, t.title, t.production_year +), +MoviesWithNotes AS ( + SELECT + rm.title_id, + rm.title, + rm.production_year, + rm.rank_by_keywords, + rm.total_cast_members, + rm.has_notes, + CASE + WHEN rm.total_cast_members > 5 AND rm.has_notes > 0 THEN 'Highly Casted with Notes' + WHEN rm.total_cast_members > 5 THEN 'Highly Casted' + WHEN rm.has_notes > 0 THEN 'Few Casted with Notes' + ELSE 'Few Casted' + END AS cast_category + FROM + RankedMovies rm +), +DistinctCompanies AS ( + SELECT + mc.movie_id, + COUNT(DISTINCT c.name) AS unique_company_count + FROM + movie_companies mc + JOIN + company_name c ON mc.company_id = c.id + GROUP BY + mc.movie_id +) +SELECT + mw.title, + mw.production_year, + mw.cast_category, + COALESCE(dc.unique_company_count, 0) AS number_of_companies, + CASE + WHEN mw.rank_by_keywords < 5 THEN 'Consider Watching' + WHEN mw.rank_by_keywords BETWEEN 5 AND 10 THEN 'Mainstream Pick' + ELSE 'Top Choice!' + END AS recommendation +FROM + MoviesWithNotes mw +LEFT JOIN + DistinctCompanies dc ON mw.title_id = dc.movie_id +WHERE + mw.production_year IS NOT NULL + AND mw.cast_category IN ('Highly Casted with Notes', 'Highly Casted') +ORDER BY + mw.production_year DESC, + mw.rank_by_keywords; diff --git a/vortex-bench/sqlstorm/job/21138.sql b/vortex-bench/sqlstorm/job/21138.sql new file mode 100644 index 00000000000..1e721620f9f --- /dev/null +++ b/vortex-bench/sqlstorm/job/21138.sql @@ -0,0 +1,78 @@ + +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS rank_within_year + FROM + aka_title t + WHERE + t.production_year >= 2000 + AND t.production_year <= 2023 +), +CastDetails AS ( + SELECT + ci.movie_id, + c.name AS actor_name, + COUNT(ci.id) AS role_count, + SUM(CASE WHEN ci.note IS NULL THEN 1 ELSE 0 END) AS null_notes_count + FROM + cast_info ci + JOIN + aka_name c ON ci.person_id = c.person_id + GROUP BY + ci.movie_id, c.name +), +CompanyInfo AS ( + SELECT + mc.movie_id, + cc.name AS company_name, + ct.kind AS company_type, + COALESCE(NULLIF(mc.note, ''), 'No Note') AS company_note + FROM + movie_companies mc + JOIN + company_name cc ON mc.company_id = cc.id + JOIN + company_type ct ON mc.company_type_id = ct.id +) +SELECT + rm.title, + rm.production_year, + cd.actor_name, + cd.role_count, + cd.null_notes_count, + ci.company_name, + ci.company_type, + ci.company_note +FROM + RankedMovies rm +LEFT JOIN + CastDetails cd ON rm.movie_id = cd.movie_id +LEFT JOIN + CompanyInfo ci ON rm.movie_id = ci.movie_id +WHERE + rm.rank_within_year <= 5 + +UNION ALL + +SELECT + title.title, + title.production_year, + 'Cameo Appearance' AS actor_name, + 1 AS role_count, + 0 AS null_notes_count, + 'Unknown Company' AS company_name, + 'Cameo' AS company_type, + 'Not Specified' AS company_note +FROM + title +WHERE + title.kind_id IN (SELECT id FROM kind_type WHERE kind LIKE '%Cameo%') + AND title.production_year < 2000 + AND title.title IS NOT NULL + +ORDER BY + production_year, + role_count DESC; diff --git a/vortex-bench/sqlstorm/job/21613.sql b/vortex-bench/sqlstorm/job/21613.sql new file mode 100644 index 00000000000..cc462752d83 --- /dev/null +++ b/vortex-bench/sqlstorm/job/21613.sql @@ -0,0 +1,59 @@ +WITH ranked_movies AS ( + SELECT + mt.id AS movie_id, + mt.title AS movie_title, + mt.production_year, + ROW_NUMBER() OVER (PARTITION BY mt.production_year ORDER BY mt.title) AS title_rank, + COUNT(*) OVER (PARTITION BY mt.production_year) AS total_movies + FROM aka_title mt + WHERE mt.production_year IS NOT NULL +), +cast_details AS ( + SELECT + c.movie_id, + c.person_id, + ak.name AS actor_name, + rc.role AS role_type, + COALESCE(CAST(COUNT(DISTINCT c.id) AS TEXT), '0') AS role_count + FROM cast_info c + INNER JOIN aka_name ak ON c.person_id = ak.person_id + LEFT JOIN role_type rc ON c.role_id = rc.id + GROUP BY c.movie_id, c.person_id, ak.name, rc.role +), +movie_keywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM movie_keyword mk + JOIN keyword k ON mk.keyword_id = k.id + GROUP BY mk.movie_id +), +selected_movies AS ( + SELECT + rm.movie_id, + rm.movie_title, + rm.production_year, + cd.actor_name, + cd.role_type, + mk.keywords, + CASE + WHEN rm.title_rank <= 10 THEN 'Top Ten' + WHEN rm.total_movies > 0 AND rm.title_rank > 10 THEN 'Non-Top Ten' + ELSE 'Unknown Rank' + END AS movie_rank_category + FROM ranked_movies rm + LEFT JOIN cast_details cd ON rm.movie_id = cd.movie_id + LEFT JOIN movie_keywords mk ON rm.movie_id = mk.movie_id +) +SELECT + sm.movie_title, + sm.production_year, + sm.actor_name, + sm.role_type, + sm.keywords, + sm.movie_rank_category +FROM selected_movies sm +WHERE (sm.production_year BETWEEN 2000 AND 2023) + AND (sm.movie_rank_category = 'Top Ten' OR sm.keywords LIKE '%Action%') +ORDER BY sm.production_year DESC, sm.movie_title ASC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/22014.sql b/vortex-bench/sqlstorm/job/22014.sql new file mode 100644 index 00000000000..675a3cba40f --- /dev/null +++ b/vortex-bench/sqlstorm/job/22014.sql @@ -0,0 +1,75 @@ + +WITH RECURSIVE MoviesCTE AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + t.kind_id, + COALESCE(SUM(CASE WHEN c.nr_order IS NOT NULL THEN 1 ELSE 0 END), 0) AS cast_count + FROM + aka_title t + LEFT JOIN + cast_info c ON t.id = c.movie_id + WHERE + t.production_year IS NOT NULL + GROUP BY + t.id, t.title, t.production_year, t.kind_id + HAVING + t.production_year > 2000 +), +RankedMovies AS ( + SELECT + movie_id, + title, + production_year, + cast_count, + ROW_NUMBER() OVER (PARTITION BY production_year ORDER BY cast_count DESC) AS rank + FROM + MoviesCTE +), +TitleKeyword AS ( + SELECT + mt.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mt + JOIN + keyword k ON mt.keyword_id = k.id + GROUP BY + mt.movie_id +), +CompanyTitles AS ( + SELECT + m.title, + c.name AS company_name, + c.country_code + FROM + aka_title m + JOIN + movie_companies mc ON m.id = mc.movie_id + JOIN + company_name c ON mc.company_id = c.id + WHERE + c.country_code IS NOT NULL +) +SELECT + rm.title, + rm.production_year, + rm.cast_count, + tk.keywords, + ct.company_name, + ct.country_code +FROM + RankedMovies rm +LEFT JOIN + TitleKeyword tk ON rm.movie_id = tk.movie_id +LEFT JOIN + CompanyTitles ct ON rm.title = ct.title +WHERE + (rm.rank <= 5 OR rm.cast_count >= 10) + AND COALESCE(ct.country_code, '') <> 'USA' + AND rm.production_year BETWEEN 2000 AND 2023 +ORDER BY + rm.production_year DESC, + rm.cast_count DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/job/22414.sql b/vortex-bench/sqlstorm/job/22414.sql new file mode 100644 index 00000000000..aa434b2843a --- /dev/null +++ b/vortex-bench/sqlstorm/job/22414.sql @@ -0,0 +1,60 @@ +WITH RECURSIVE top_movie_titles AS ( + SELECT + t.id AS title_id, + t.title, + COALESCE(t.production_year, 0) AS production_year, + 'N/A' AS cast_member + FROM title t + WHERE t.production_year IS NOT NULL + UNION ALL + SELECT + t.id, + t.title, + t.production_year, + ak.name AS cast_member + FROM title t + JOIN cast_info c ON c.movie_id = t.id + JOIN aka_name ak ON ak.person_id = c.person_id + WHERE ak.name IS NOT NULL +), +movie_keywords AS ( + SELECT + m.id AS movie_id, + k.keyword, + ROW_NUMBER() OVER (PARTITION BY m.id ORDER BY k.keyword) AS keyword_rank + FROM movie_keyword mk + JOIN keyword k ON k.id = mk.keyword_id + JOIN aka_title m ON m.id = mk.movie_id +), +movie_company_data AS ( + SELECT + c.movie_id, + cn.name AS company_name, + ct.kind AS company_type, + ROW_NUMBER() OVER (PARTITION BY c.movie_id ORDER BY cn.name) AS company_rank + FROM movie_companies c + JOIN company_name cn ON cn.id = c.company_id + JOIN company_type ct ON ct.id = c.company_type_id +), +filtered_titles AS ( + SELECT + title_id, + title, + production_year, + STRING_AGG(cast_member, ', ') AS cast_list + FROM top_movie_titles + GROUP BY title_id, title, production_year + HAVING COUNT(*) > 1 +) +SELECT + ft.title_id, + ft.title, + ft.production_year, + COALESCE(mk.keyword, 'No Keywords') AS keyword, + COALESCE(mcd.company_name, 'Unknown Company') AS company_name, + COALESCE(mcd.company_type, 'Unknown Type') AS company_type +FROM filtered_titles ft +LEFT JOIN movie_keywords mk ON mk.movie_id = ft.title_id AND mk.keyword_rank = 1 +LEFT JOIN movie_company_data mcd ON mcd.movie_id = ft.title_id AND mcd.company_rank = 1 +WHERE ft.production_year > 2000 +ORDER BY ft.production_year DESC, ft.title; diff --git a/vortex-bench/sqlstorm/job/2320.sql b/vortex-bench/sqlstorm/job/2320.sql new file mode 100644 index 00000000000..69edb897df6 --- /dev/null +++ b/vortex-bench/sqlstorm/job/2320.sql @@ -0,0 +1,62 @@ + +WITH RankedMovies AS ( + SELECT + t.id AS title_id, + t.title, + t.production_year, + COUNT(DISTINCT c.person_id) AS actor_count, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY COUNT(DISTINCT c.person_id) DESC) AS rank + FROM + aka_title t + JOIN + cast_info c ON t.id = c.movie_id + WHERE + t.production_year IS NOT NULL + GROUP BY + t.id, t.title, t.production_year +), +TopMovies AS ( + SELECT + title_id, + title, + production_year + FROM + RankedMovies + WHERE + rank <= 5 +), +MovieDetails AS ( + SELECT + tm.title_id, + tm.title, + tm.production_year, + STRING_AGG(DISTINCT co.name, ', ') AS companies, + STRING_AGG(DISTINCT k.keyword, ', ') AS keywords + FROM + TopMovies tm + LEFT JOIN + movie_companies mc ON tm.title_id = mc.movie_id + LEFT JOIN + company_name co ON mc.company_id = co.id + LEFT JOIN + movie_keyword mk ON tm.title_id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + tm.title_id, tm.title, tm.production_year +) +SELECT + md.title, + md.production_year, + md.companies, + md.keywords, + COALESCE(NULLIF(md.companies, ''), 'No companies listed') AS company_info, + CASE + WHEN md.production_year < 2000 THEN 'Classic' + WHEN md.production_year BETWEEN 2000 AND 2010 THEN 'Modern' + ELSE 'Recent' + END AS era +FROM + MovieDetails md +ORDER BY + md.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/2347.sql b/vortex-bench/sqlstorm/job/2347.sql new file mode 100644 index 00000000000..9a2a7e36996 --- /dev/null +++ b/vortex-bench/sqlstorm/job/2347.sql @@ -0,0 +1,67 @@ +WITH RankedMovies AS ( + SELECT + t.title, + t.production_year, + COUNT(ci.person_id) AS total_cast, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY COUNT(ci.person_id) DESC) AS rank + FROM + title t + LEFT JOIN + complete_cast cc ON t.id = cc.movie_id + LEFT JOIN + cast_info ci ON cc.subject_id = ci.movie_id + GROUP BY + t.title, t.production_year +), +FilteredMovies AS ( + SELECT + title, + production_year, + total_cast + FROM + RankedMovies + WHERE + rank <= 10 +), +MovieKeywords AS ( + SELECT + t.title, + k.keyword + FROM + title t + JOIN + movie_keyword mk ON t.id = mk.movie_id + JOIN + keyword k ON mk.keyword_id = k.id +), +MoviesWithKeywords AS ( + SELECT + fm.title, + fm.production_year, + fm.total_cast, + STRING_AGG(mk.keyword, ', ') AS keywords + FROM + FilteredMovies fm + LEFT JOIN + MovieKeywords mk ON fm.title = mk.title + GROUP BY + fm.title, fm.production_year, fm.total_cast +) +SELECT + mwk.title, + mwk.production_year, + mwk.total_cast, + COALESCE(mwk.keywords, 'No Keywords') AS keywords, + CASE + WHEN mwk.total_cast > 100 THEN 'Blockbuster' + WHEN mwk.total_cast BETWEEN 50 AND 100 THEN 'Moderate Hit' + WHEN mwk.total_cast < 50 THEN 'Flop' + ELSE 'Unknown' + END AS movie_performance +FROM + MoviesWithKeywords mwk +WHERE + mwk.production_year >= 2000 +ORDER BY + mwk.total_cast DESC +LIMIT 20; diff --git a/vortex-bench/sqlstorm/job/23823.sql b/vortex-bench/sqlstorm/job/23823.sql new file mode 100644 index 00000000000..5e396881421 --- /dev/null +++ b/vortex-bench/sqlstorm/job/23823.sql @@ -0,0 +1,71 @@ + +WITH RECURSIVE MovieHierarchy AS ( + SELECT + mt.id AS movie_id, + mt.title, + mt.production_year, + CAST(NULL AS integer) AS parent_id, + 1 AS level + FROM aka_title mt + WHERE mt.production_year > 1990 + + UNION ALL + + SELECT + m.id, + m.title, + m.production_year, + mh.movie_id, + mh.level + 1 + FROM aka_title m + JOIN MovieHierarchy mh ON m.episode_of_id = mh.movie_id +), + +RoleCounts AS ( + SELECT + c.movie_id, + rc.role, + COUNT(c.person_id) AS role_count + FROM cast_info c + JOIN role_type rc ON c.role_id = rc.id + GROUP BY c.movie_id, rc.role +), + +TopDirectors AS ( + SELECT + ci.movie_id, + ak.name, + COUNT(*) AS director_count + FROM cast_info ci + JOIN aka_name ak ON ci.person_id = ak.person_id + WHERE ci.person_role_id = (SELECT id FROM role_type WHERE role = 'Director') + GROUP BY ci.movie_id, ak.name + HAVING COUNT(*) > 1 +), + +MovieDetails AS ( + SELECT + th.movie_id, + th.title, + th.production_year, + COALESCE(SUM(rc.role_count), 0) AS total_roles, + COALESCE(MAX(td.director_count), 0) AS max_directors + FROM MovieHierarchy th + LEFT JOIN RoleCounts rc ON th.movie_id = rc.movie_id + LEFT JOIN TopDirectors td ON th.movie_id = td.movie_id + GROUP BY th.movie_id, th.title, th.production_year +) + +SELECT + md.title, + md.production_year, + md.total_roles, + md.max_directors, + CASE + WHEN md.max_directors > 0 THEN 'Multi-Directed' + ELSE 'Single-Directed or Not Available' + END AS director_status +FROM MovieDetails md +WHERE md.total_roles > 5 +ORDER BY md.production_year DESC, md.total_roles DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/job/25030.sql b/vortex-bench/sqlstorm/job/25030.sql new file mode 100644 index 00000000000..e32fd79d80f --- /dev/null +++ b/vortex-bench/sqlstorm/job/25030.sql @@ -0,0 +1,64 @@ +WITH MovieTitleInfo AS ( + SELECT + t.title AS movie_title, + t.production_year, + k.keyword AS movie_keyword, + r.role AS cast_role, + a.name AS actor_name, + c.note AS cast_note + FROM + aka_title t + JOIN + movie_keyword mk ON t.id = mk.movie_id + JOIN + keyword k ON mk.keyword_id = k.id + JOIN + cast_info c ON t.id = c.movie_id + JOIN + aka_name a ON c.person_id = a.person_id + JOIN + role_type r ON c.role_id = r.id + WHERE + t.production_year BETWEEN 2000 AND 2023 +), +AggregateKeywordCount AS ( + SELECT + movie_title, + production_year, + STRING_AGG(movie_keyword, ', ') AS keywords, + COUNT(movie_keyword) AS keyword_count + FROM + MovieTitleInfo + GROUP BY + movie_title, production_year +), +DetailedMovieInfo AS ( + SELECT + m.movie_title, + m.production_year, + m.keywords, + m.keyword_count, + COUNT(DISTINCT c.id) AS cast_count, + STRING_AGG(DISTINCT a.name, ', ') AS all_actors + FROM + AggregateKeywordCount m + JOIN + aka_title t ON m.movie_title = t.title AND m.production_year = t.production_year + JOIN + cast_info c ON t.id = c.movie_id + JOIN + aka_name a ON c.person_id = a.person_id + GROUP BY + m.movie_title, m.production_year, m.keywords, m.keyword_count +) +SELECT + d.movie_title, + d.production_year, + d.keywords, + d.keyword_count, + d.cast_count, + d.all_actors +FROM + DetailedMovieInfo d +ORDER BY + d.production_year DESC, d.keyword_count DESC; diff --git a/vortex-bench/sqlstorm/job/25111.sql b/vortex-bench/sqlstorm/job/25111.sql new file mode 100644 index 00000000000..8c1add23423 --- /dev/null +++ b/vortex-bench/sqlstorm/job/25111.sql @@ -0,0 +1,50 @@ +WITH RankedMovies AS ( + SELECT + mt.id AS movie_id, + mt.title, + mt.production_year, + st.kind AS movie_kind, + STRING_AGG(DISTINCT cn.name, ', ') AS production_companies, + COUNT(DISTINCT c.person_id) AS cast_count + FROM + aka_title mt + JOIN + movie_info mi ON mt.id = mi.movie_id + JOIN + movie_companies mc ON mt.id = mc.movie_id + JOIN + company_name cn ON mc.company_id = cn.id + JOIN + cast_info c ON mt.id = c.movie_id + JOIN + kind_type st ON mt.kind_id = st.id + WHERE + mi.info_type_id = (SELECT id FROM info_type WHERE info = 'summary') + GROUP BY + mt.id, mt.title, mt.production_year, st.kind +), +TopRatedMovies AS ( + SELECT + movie_id, + title, + production_year, + movie_kind, + production_companies, + cast_count, + RANK() OVER (ORDER BY cast_count DESC) AS rank + FROM + RankedMovies +) +SELECT + tr.movie_id, + tr.title, + tr.production_year, + tr.movie_kind, + tr.production_companies, + tr.cast_count +FROM + TopRatedMovies tr +WHERE + tr.rank <= 10 +ORDER BY + tr.cast_count DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/25148.sql b/vortex-bench/sqlstorm/job/25148.sql new file mode 100644 index 00000000000..2b820977ee7 --- /dev/null +++ b/vortex-bench/sqlstorm/job/25148.sql @@ -0,0 +1,57 @@ + +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + t.kind_id, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS rank_per_year + FROM + aka_title t + WHERE + t.production_year IS NOT NULL +), +MovieCast AS ( + SELECT + m.movie_id, + COUNT(c.person_id) AS cast_count, + STRING_AGG(CONCAT(a.name, ' (', r.role, ')'), ', ') AS full_cast + FROM + RankedMovies m + JOIN + cast_info c ON m.movie_id = c.movie_id + JOIN + aka_name a ON c.person_id = a.person_id + JOIN + role_type r ON c.role_id = r.id + GROUP BY + m.movie_id +), +TopMovies AS ( + SELECT + rm.movie_id, + rm.title, + rm.production_year, + mc.cast_count, + mc.full_cast, + rm.kind_id + FROM + RankedMovies rm + JOIN + MovieCast mc ON rm.movie_id = mc.movie_id + WHERE + rm.rank_per_year <= 3 +) +SELECT + tm.title, + tm.production_year, + tm.cast_count, + tm.full_cast, + ki.kind +FROM + TopMovies tm +JOIN + kind_type ki ON tm.kind_id = ki.id +ORDER BY + tm.production_year DESC, + tm.title; diff --git a/vortex-bench/sqlstorm/job/25347.sql b/vortex-bench/sqlstorm/job/25347.sql new file mode 100644 index 00000000000..9c9ac30c6a6 --- /dev/null +++ b/vortex-bench/sqlstorm/job/25347.sql @@ -0,0 +1,60 @@ +WITH RankedMovies AS ( + SELECT + a.title AS movie_title, + a.production_year, + p.name AS person_name, + rk.rnk, + ROW_NUMBER() OVER (PARTITION BY a.id ORDER BY cc.nr_order) AS cast_order + FROM + aka_title a + JOIN + cast_info cc ON a.id = cc.movie_id + JOIN + aka_name p ON cc.person_id = p.person_id + JOIN + role_type rt ON cc.role_id = rt.id + JOIN + movie_info mi ON a.id = mi.movie_id + JOIN + info_type it ON mi.info_type_id = it.id + LEFT JOIN + (SELECT + mi.movie_id, + COUNT(*) AS rnk + FROM + movie_info mi + JOIN + info_type it ON mi.info_type_id = it.id + WHERE + it.info LIKE '%Award%' + GROUP BY + mi.movie_id) rk ON a.id = rk.movie_id + WHERE + a.production_year >= 2000 + AND a.kind_id IN (SELECT id FROM kind_type WHERE kind IN ('feature', 'short')) +), + +FinalOutput AS ( + SELECT + rm.movie_title, + rm.production_year, + rm.person_name, + rm.cast_order, + COALESCE(rm.rnk, 0) AS rank_award_count + FROM + RankedMovies rm +) + +SELECT + movie_title, + production_year, + person_name, + cast_order, + rank_award_count +FROM + FinalOutput +ORDER BY + production_year DESC, + rank_award_count DESC, + cast_order ASC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/job/25497.sql b/vortex-bench/sqlstorm/job/25497.sql new file mode 100644 index 00000000000..8f83534f6c9 --- /dev/null +++ b/vortex-bench/sqlstorm/job/25497.sql @@ -0,0 +1,45 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + k.keyword, + COUNT(DISTINCT c.person_id) AS actor_count + FROM + aka_title t + JOIN + movie_keyword mk ON t.id = mk.movie_id + JOIN + keyword k ON mk.keyword_id = k.id + LEFT JOIN + cast_info c ON t.id = c.movie_id + WHERE + t.production_year >= 2000 + GROUP BY + t.id, t.title, t.production_year, k.keyword +), + +TopMovies AS ( + SELECT + movie_id, + title, + production_year, + keyword, + actor_count, + RANK() OVER (PARTITION BY keyword ORDER BY actor_count DESC) AS rank + FROM + RankedMovies +) + +SELECT + tm.title, + tm.production_year, + tm.keyword, + tm.actor_count +FROM + TopMovies tm +WHERE + tm.rank <= 10 +ORDER BY + tm.keyword, + tm.actor_count DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/25854.sql b/vortex-bench/sqlstorm/job/25854.sql new file mode 100644 index 00000000000..6cf5d8c234d --- /dev/null +++ b/vortex-bench/sqlstorm/job/25854.sql @@ -0,0 +1,55 @@ + +WITH movie_details AS ( + SELECT + t.title AS movie_title, + t.production_year, + k.keyword AS movie_keyword, + c.name AS company_name, + r.role AS cast_role, + n.name AS actor_name + FROM + title t + JOIN + movie_keyword mk ON t.id = mk.movie_id + JOIN + keyword k ON mk.keyword_id = k.id + JOIN + movie_companies mc ON t.id = mc.movie_id + JOIN + company_name c ON mc.company_id = c.id + JOIN + complete_cast cc ON t.id = cc.movie_id + JOIN + cast_info ci ON cc.subject_id = ci.id + JOIN + role_type r ON ci.role_id = r.id + JOIN + aka_name n ON ci.person_id = n.person_id + WHERE + t.production_year BETWEEN 2000 AND 2020 + AND k.keyword IS NOT NULL + AND c.country_code = 'USA' +), +keyword_stats AS ( + SELECT + movie_keyword, + COUNT(*) AS keyword_count, + STRING_AGG(DISTINCT movie_title, ', ') AS related_movies + FROM + movie_details + GROUP BY + movie_keyword +) +SELECT + ks.movie_keyword, + ks.keyword_count, + ks.related_movies, + AVG(EXTRACT(YEAR FROM CURRENT_DATE) - m.production_year) AS average_age +FROM + keyword_stats ks +JOIN + movie_details m ON ks.movie_keyword = m.movie_keyword +GROUP BY + ks.movie_keyword, ks.keyword_count, ks.related_movies +ORDER BY + ks.keyword_count DESC, ks.movie_keyword; diff --git a/vortex-bench/sqlstorm/job/26228.sql b/vortex-bench/sqlstorm/job/26228.sql new file mode 100644 index 00000000000..8a1b6a6a86e --- /dev/null +++ b/vortex-bench/sqlstorm/job/26228.sql @@ -0,0 +1,61 @@ +WITH ranked_titles AS ( + SELECT + a.id AS aka_id, + a.name AS aka_name, + t.id AS title_id, + t.title AS movie_title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY a.person_id ORDER BY t.production_year DESC) AS rn + FROM + aka_name a + JOIN + cast_info ci ON a.person_id = ci.person_id + JOIN + aka_title t ON ci.movie_id = t.movie_id + WHERE + a.name ILIKE '%John%' + AND t.production_year >= 2000 +), +top_titles AS ( + SELECT + aka_id, + aka_name, + title_id, + movie_title, + production_year + FROM + ranked_titles + WHERE + rn <= 5 +), +movie_details AS ( + SELECT + tt.aka_id, + tt.aka_name, + tt.movie_title, + tt.production_year, + array_agg(DISTINCT k.keyword) AS keywords, + array_agg(DISTINCT c.kind) AS company_types + FROM + top_titles tt + LEFT JOIN + movie_keyword mk ON tt.title_id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + LEFT JOIN + movie_companies mc ON tt.title_id = mc.movie_id + LEFT JOIN + company_type c ON mc.company_type_id = c.id + GROUP BY + tt.aka_id, tt.aka_name, tt.movie_title, tt.production_year +) +SELECT + md.aka_name, + md.movie_title, + md.production_year, + md.keywords, + md.company_types +FROM + movie_details md +ORDER BY + md.production_year DESC, md.aka_name; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/26273.sql b/vortex-bench/sqlstorm/job/26273.sql new file mode 100644 index 00000000000..e33f6d1a75d --- /dev/null +++ b/vortex-bench/sqlstorm/job/26273.sql @@ -0,0 +1,59 @@ +WITH movie_details AS ( + SELECT + mt.title AS movie_title, + mt.production_year, + ak.name AS actor_name, + ak.id AS actor_id, + ct.kind AS company_type, + cn.name AS company_name, + mi.info AS movie_info, + ko.keyword AS movie_keyword + FROM + aka_name ak + JOIN + cast_info ci ON ak.person_id = ci.person_id + JOIN + title mt ON ci.movie_id = mt.id + JOIN + movie_companies mc ON mt.id = mc.movie_id + JOIN + company_name cn ON mc.company_id = cn.id + JOIN + company_type ct ON mc.company_type_id = ct.id + LEFT JOIN + movie_info mi ON mt.id = mi.movie_id + LEFT JOIN + movie_keyword mk ON mt.id = mk.movie_id + LEFT JOIN + keyword ko ON mk.keyword_id = ko.id + WHERE + mt.production_year >= 2000 + AND ak.name IS NOT NULL + AND cn.country_code = 'USA' +), +aggregated_details AS ( + SELECT + movie_title, + production_year, + actor_name, + COUNT(DISTINCT actor_id) AS actor_count, + STRING_AGG(DISTINCT company_name, ', ') AS production_companies, + STRING_AGG(DISTINCT movie_info, ', ') AS additional_info, + STRING_AGG(DISTINCT movie_keyword, ', ') AS keywords + FROM + movie_details + GROUP BY + movie_title, production_year, actor_name +) +SELECT + actor_name, + movie_title, + production_year, + actor_count, + production_companies, + additional_info, + keywords +FROM + aggregated_details +ORDER BY + production_year DESC, actor_count DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/26787.sql b/vortex-bench/sqlstorm/job/26787.sql new file mode 100644 index 00000000000..c025ba9078a --- /dev/null +++ b/vortex-bench/sqlstorm/job/26787.sql @@ -0,0 +1,45 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + COUNT(DISTINCT ci.person_id) AS cast_count, + STRING_AGG(DISTINCT ak.name, ', ') AS actor_names + FROM + aka_title t + JOIN + cast_info ci ON ci.movie_id = t.id + JOIN + aka_name ak ON ak.person_id = ci.person_id + WHERE + t.production_year >= 2000 + GROUP BY + t.id, t.title, t.production_year +), +HighCastMovies AS ( + SELECT + rm.movie_id, + rm.title, + rm.production_year, + rm.cast_count, + rm.actor_names, + ROW_NUMBER() OVER (ORDER BY rm.cast_count DESC) AS rank + FROM + RankedMovies rm + WHERE + rm.cast_count > 5 +) +SELECT + hcm.title, + hcm.production_year, + hcm.cast_count, + hcm.actor_names, + k.keyword AS genre +FROM + HighCastMovies hcm +LEFT JOIN + movie_keyword mk ON mk.movie_id = hcm.movie_id +LEFT JOIN + keyword k ON k.id = mk.keyword_id +ORDER BY + hcm.rank, hcm.title; diff --git a/vortex-bench/sqlstorm/job/26788.sql b/vortex-bench/sqlstorm/job/26788.sql new file mode 100644 index 00000000000..643345ca6a3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/26788.sql @@ -0,0 +1,30 @@ +WITH RankedMovies AS ( + SELECT + a.title AS movie_title, + a.production_year, + p.name AS person_name, + r.role AS role_name, + ROW_NUMBER() OVER (PARTITION BY a.id ORDER BY a.production_year DESC) AS movie_rank + FROM + aka_title a + JOIN + cast_info ci ON a.id = ci.movie_id + JOIN + aka_name p ON p.person_id = ci.person_id + JOIN + role_type r ON r.id = ci.role_id + WHERE + a.kind_id = (SELECT id FROM kind_type WHERE kind = 'movie') +) +SELECT + rm.movie_title, + rm.production_year, + STRING_AGG(rm.person_name || ' (' || rm.role_name || ')', ', ') AS cast_details +FROM + RankedMovies rm +WHERE + rm.movie_rank <= 5 +GROUP BY + rm.movie_title, rm.production_year +ORDER BY + rm.production_year DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/27218.sql b/vortex-bench/sqlstorm/job/27218.sql new file mode 100644 index 00000000000..9de914d9f3c --- /dev/null +++ b/vortex-bench/sqlstorm/job/27218.sql @@ -0,0 +1,57 @@ + +WITH MovieDetails AS ( + SELECT + t.title AS movie_title, + t.production_year, + STRING_AGG(DISTINCT a.name, ', ') AS actors, + STRING_AGG(DISTINCT k.keyword, ', ') AS keywords, + ct.kind AS company_type, + STRING_AGG(DISTINCT cn.name, ', ') AS companies + FROM + aka_title t + JOIN + complete_cast cc ON t.id = cc.movie_id + JOIN + cast_info ci ON cc.subject_id = ci.person_id + JOIN + aka_name a ON ci.person_id = a.person_id + LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + JOIN + movie_companies mc ON t.id = mc.movie_id + JOIN + company_name cn ON mc.company_id = cn.id + JOIN + company_type ct ON mc.company_type_id = ct.id + GROUP BY + t.id, t.title, t.production_year, ct.kind +), +PopularMovies AS ( + SELECT + movie_title, + production_year, + actors, + keywords, + company_type, + companies, + ROW_NUMBER() OVER (PARTITION BY production_year ORDER BY COUNT(DISTINCT actors) DESC) AS rank + FROM + MovieDetails + GROUP BY + movie_title, production_year, actors, keywords, company_type, companies +) +SELECT + movie_title, + production_year, + actors, + keywords, + company_type, + companies +FROM + PopularMovies +WHERE + rank <= 5 +ORDER BY + production_year DESC, rank; diff --git a/vortex-bench/sqlstorm/job/27305.sql b/vortex-bench/sqlstorm/job/27305.sql new file mode 100644 index 00000000000..adb5e3781a4 --- /dev/null +++ b/vortex-bench/sqlstorm/job/27305.sql @@ -0,0 +1,51 @@ + +WITH RankedMovies AS ( + SELECT + t.title, + t.production_year, + t.kind_id, + STRING_AGG(DISTINCT ak.name, ', ') AS aka_names, + STRING_AGG(DISTINCT k.keyword, ', ') AS keywords, + ROW_NUMBER() OVER (PARTITION BY t.kind_id ORDER BY t.production_year DESC) AS rank + FROM + aka_title AS t + LEFT JOIN + movie_keyword AS mk ON t.id = mk.movie_id + LEFT JOIN + keyword AS k ON mk.keyword_id = k.id + LEFT JOIN + movie_companies AS mc ON t.id = mc.movie_id + LEFT JOIN + company_name AS cn ON mc.company_id = cn.id + LEFT JOIN + aka_name AS ak ON ak.person_id = mc.company_id + WHERE + t.production_year >= 2000 + GROUP BY + t.title, t.production_year, t.kind_id +), +TopRankedMovies AS ( + SELECT + title, + production_year, + aka_names, + keywords, + kind_id + FROM + RankedMovies + WHERE + rank <= 5 +) +SELECT + tr.title, + tr.production_year, + ct.kind AS company_type, + tr.aka_names, + tr.keywords +FROM + TopRankedMovies AS tr +JOIN + company_type AS ct ON tr.kind_id = ct.id +ORDER BY + tr.production_year DESC, + tr.title ASC; diff --git a/vortex-bench/sqlstorm/job/2758.sql b/vortex-bench/sqlstorm/job/2758.sql new file mode 100644 index 00000000000..7253e36c9be --- /dev/null +++ b/vortex-bench/sqlstorm/job/2758.sql @@ -0,0 +1,56 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + RANK() OVER (PARTITION BY t.production_year ORDER BY COUNT(c.person_id) DESC) AS rank + FROM + aka_title t + JOIN + complete_cast cc ON t.id = cc.movie_id + LEFT JOIN + cast_info c ON cc.subject_id = c.person_id + GROUP BY + t.id, t.title, t.production_year +), +MovieKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +), +DirectorInfo AS ( + SELECT + ci.movie_id, + STRING_AGG(DISTINCT a.name, ', ') AS directors + FROM + cast_info ci + JOIN + aka_name a ON ci.person_id = a.person_id + WHERE + ci.person_role_id = (SELECT id FROM role_type WHERE role = 'Director') + GROUP BY + ci.movie_id +) +SELECT + rm.movie_id, + rm.title, + rm.production_year, + rm.rank, + COALESCE(mk.keywords, 'No Keywords') AS keywords, + COALESCE(di.directors, 'Unknown Directors') AS directors +FROM + RankedMovies rm +LEFT JOIN + MovieKeywords mk ON rm.movie_id = mk.movie_id +LEFT JOIN + DirectorInfo di ON rm.movie_id = di.movie_id +WHERE + rm.rank <= 10 AND rm.production_year > 2000 +ORDER BY + rm.production_year DESC, rm.rank; diff --git a/vortex-bench/sqlstorm/job/27690.sql b/vortex-bench/sqlstorm/job/27690.sql new file mode 100644 index 00000000000..52bca2522fc --- /dev/null +++ b/vortex-bench/sqlstorm/job/27690.sql @@ -0,0 +1,50 @@ +WITH MovieDetails AS ( + SELECT + t.id AS movie_id, + t.title AS movie_title, + t.production_year, + ak.name AS actor_name, + ct.kind AS company_type, + k.keyword AS movie_keyword + FROM + aka_title t + JOIN + cast_info c ON t.id = c.movie_id + JOIN + aka_name ak ON c.person_id = ak.person_id + JOIN + movie_companies mc ON t.id = mc.movie_id + JOIN + company_type ct ON mc.company_type_id = ct.id + LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + WHERE + t.production_year >= 2000 + AND ak.name IS NOT NULL +), +AggregatedData AS ( + SELECT + movie_id, + movie_title, + production_year, + STRING_AGG(DISTINCT actor_name, ', ') AS actors, + STRING_AGG(DISTINCT company_type, ', ') AS companies, + STRING_AGG(DISTINCT movie_keyword, ', ') AS keywords + FROM + MovieDetails + GROUP BY + movie_id, movie_title, production_year +) +SELECT + movie_id, + movie_title, + production_year, + actors, + companies, + keywords +FROM + AggregatedData +ORDER BY + production_year DESC, movie_title; diff --git a/vortex-bench/sqlstorm/job/27878.sql b/vortex-bench/sqlstorm/job/27878.sql new file mode 100644 index 00000000000..b596c51984e --- /dev/null +++ b/vortex-bench/sqlstorm/job/27878.sql @@ -0,0 +1,52 @@ + +WITH RankedTitles AS ( + SELECT + a.title, + a.production_year, + a.imdb_index, + COUNT(DISTINCT c.person_id) AS actor_count + FROM + aka_title a + JOIN + complete_cast cc ON a.id = cc.movie_id + JOIN + cast_info c ON cc.subject_id = c.id + WHERE + a.production_year >= 2000 + GROUP BY + a.title, a.production_year, a.imdb_index + ORDER BY + actor_count DESC + LIMIT 10 +), +MovieKeywords AS ( + SELECT + m.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword m + JOIN + keyword k ON m.keyword_id = k.id + GROUP BY + m.movie_id +), +DetailedInfo AS ( + SELECT + r.title, + r.production_year, + r.actor_count, + mk.keywords + FROM + RankedTitles r + LEFT JOIN + MovieKeywords mk ON r.imdb_index = CAST(mk.movie_id AS VARCHAR) +) +SELECT + d.title, + d.production_year, + d.actor_count, + d.keywords +FROM + DetailedInfo d +ORDER BY + d.actor_count DESC, d.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/27886.sql b/vortex-bench/sqlstorm/job/27886.sql new file mode 100644 index 00000000000..8c9af1a829a --- /dev/null +++ b/vortex-bench/sqlstorm/job/27886.sql @@ -0,0 +1,47 @@ +WITH RankedMovies AS ( + SELECT + a.title AS movie_title, + a.production_year, + c.name AS company_name, + COUNT(DISTINCT ci.person_id) AS cast_count, + ROW_NUMBER() OVER (PARTITION BY a.production_year ORDER BY COUNT(DISTINCT ci.person_id) DESC) AS rnk + FROM + aka_title a + JOIN + movie_companies mc ON a.id = mc.movie_id + JOIN + company_name c ON mc.company_id = c.id + JOIN + complete_cast cc ON a.id = cc.movie_id + JOIN + cast_info ci ON cc.subject_id = ci.id + GROUP BY + a.title, a.production_year, c.name +), +TopMovies AS ( + SELECT + movie_title, + production_year, + company_name, + cast_count + FROM + RankedMovies + WHERE + rnk <= 5 +) + +SELECT + tm.movie_title, + tm.production_year, + tm.company_name, + tm.cast_count, + mi.info AS movie_info +FROM + TopMovies tm +LEFT JOIN + movie_info mi ON tm.movie_title = mi.info +WHERE + tm.production_year >= 2000 +ORDER BY + tm.production_year DESC, + tm.cast_count DESC; diff --git a/vortex-bench/sqlstorm/job/27941.sql b/vortex-bench/sqlstorm/job/27941.sql new file mode 100644 index 00000000000..c10e9db5435 --- /dev/null +++ b/vortex-bench/sqlstorm/job/27941.sql @@ -0,0 +1,46 @@ +WITH movie_details AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + a.name AS actor_name, + r.role AS role_type, + k.keyword AS movie_keyword + FROM + aka_title t + JOIN + cast_info ci ON t.id = ci.movie_id + JOIN + aka_name a ON ci.person_id = a.person_id + JOIN + role_type r ON ci.role_id = r.id + LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + WHERE + t.production_year BETWEEN 2000 AND 2023 + AND LENGTH(a.name) > 5 +), +aggregated_data AS ( + SELECT + production_year, + COUNT(DISTINCT movie_id) AS total_movies, + COUNT(DISTINCT actor_name) AS total_actors, + STRING_AGG(DISTINCT movie_keyword, ', ') AS keywords + FROM + movie_details + GROUP BY + production_year +) +SELECT + ad.production_year, + ad.total_movies, + ad.total_actors, + ad.keywords +FROM + aggregated_data ad +WHERE + ad.total_movies > 5 +ORDER BY + ad.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/28301.sql b/vortex-bench/sqlstorm/job/28301.sql new file mode 100644 index 00000000000..f9bb3622613 --- /dev/null +++ b/vortex-bench/sqlstorm/job/28301.sql @@ -0,0 +1,46 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + COUNT(DISTINCT ci.person_id) AS cast_count, + STRING_AGG(DISTINCT ak.name, ', ') AS aliases, + STRING_AGG(DISTINCT kw.keyword, ', ') AS keywords + FROM + aka_title t + JOIN + complete_cast cc ON t.id = cc.movie_id + JOIN + cast_info ci ON cc.subject_id = ci.id + JOIN + aka_name ak ON ci.person_id = ak.person_id + LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id + LEFT JOIN + keyword kw ON mk.keyword_id = kw.id + WHERE + t.production_year BETWEEN 2000 AND 2023 + GROUP BY + t.id, t.title, t.production_year + ORDER BY + cast_count DESC + LIMIT 10 +) + +SELECT + rm.movie_id, + rm.title, + rm.production_year, + rm.cast_count, + rm.aliases, + STRING_AGG( DISTINCT it.info, ', ') AS additional_info +FROM + RankedMovies rm +LEFT JOIN + movie_info mi ON rm.movie_id = mi.movie_id +LEFT JOIN + info_type it ON mi.info_type_id = it.id +GROUP BY + rm.movie_id, rm.title, rm.production_year, rm.cast_count, rm.aliases +ORDER BY + rm.cast_count DESC; diff --git a/vortex-bench/sqlstorm/job/28354.sql b/vortex-bench/sqlstorm/job/28354.sql new file mode 100644 index 00000000000..f9d82660642 --- /dev/null +++ b/vortex-bench/sqlstorm/job/28354.sql @@ -0,0 +1,49 @@ +WITH filtered_actors AS ( + SELECT + a.id AS actor_id, + a.name AS actor_name, + p.gender, + COUNT(ci.movie_id) AS movies_count + FROM + aka_name a + JOIN + cast_info ci ON a.person_id = ci.person_id + JOIN + name p ON a.person_id = p.imdb_id + GROUP BY + a.id, a.name, p.gender + HAVING + COUNT(ci.movie_id) > 3 +), + +top_movies AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + COUNT(ci.person_id) AS cast_count + FROM + aka_title m + JOIN + cast_info ci ON m.id = ci.movie_id + GROUP BY + m.id, m.title, m.production_year + ORDER BY + cast_count DESC + LIMIT 10 +) + +SELECT + a.actor_name, + a.gender, + tm.title AS movie_title, + tm.production_year, + tm.cast_count +FROM + filtered_actors a +JOIN + cast_info ci ON a.actor_id = ci.person_id +JOIN + top_movies tm ON ci.movie_id = tm.movie_id +ORDER BY + a.actor_name, tm.production_year DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/28423.sql b/vortex-bench/sqlstorm/job/28423.sql new file mode 100644 index 00000000000..a9e52dcffa6 --- /dev/null +++ b/vortex-bench/sqlstorm/job/28423.sql @@ -0,0 +1,65 @@ +WITH RankedMovies AS ( + SELECT + m.id AS movie_id, + m.title AS movie_title, + m.production_year, + COUNT(ci.person_id) AS cast_count, + STRING_AGG(DISTINCT a.name, ', ') AS actor_names + FROM + aka_title AS m + JOIN + cast_info AS ci ON m.id = ci.movie_id + JOIN + aka_name AS a ON ci.person_id = a.person_id + WHERE + m.production_year BETWEEN 2000 AND 2023 + GROUP BY + m.id, m.title, m.production_year +), +MovieInfo AS ( + SELECT + ri.movie_id, + ri.movie_title, + ri.production_year, + ri.cast_count, + ri.actor_names, + COUNT(DISTINCT mi.info_type_id) AS info_count, + STRING_AGG(DISTINCT k.keyword, ', ') AS keywords + FROM + RankedMovies AS ri + LEFT JOIN + movie_info AS mi ON ri.movie_id = mi.movie_id + LEFT JOIN + movie_keyword AS mk ON ri.movie_id = mk.movie_id + LEFT JOIN + keyword AS k ON mk.keyword_id = k.id + GROUP BY + ri.movie_id, ri.movie_title, ri.production_year, ri.cast_count, ri.actor_names +), +TopMovies AS ( + SELECT + movie_id, + movie_title, + production_year, + cast_count, + actor_names, + info_count, + keywords, + RANK() OVER (ORDER BY cast_count DESC) AS rank_by_cast_count + FROM + MovieInfo +) +SELECT + tm.movie_id, + tm.movie_title, + tm.production_year, + tm.cast_count, + tm.actor_names, + tm.info_count, + tm.keywords +FROM + TopMovies AS tm +WHERE + tm.rank_by_cast_count <= 10 +ORDER BY + tm.production_year DESC, tm.cast_count DESC; diff --git a/vortex-bench/sqlstorm/job/28481.sql b/vortex-bench/sqlstorm/job/28481.sql new file mode 100644 index 00000000000..36f4555eb85 --- /dev/null +++ b/vortex-bench/sqlstorm/job/28481.sql @@ -0,0 +1,58 @@ +WITH ActorRoleInfo AS ( + SELECT + a.id AS actor_id, + a.name AS actor_name, + c.movie_id, + t.title, + t.production_year, + r.role AS actor_role, + k.keyword AS movie_keyword + FROM + aka_name a + JOIN + cast_info c ON a.person_id = c.person_id + JOIN + title t ON c.movie_id = t.id + JOIN + role_type r ON c.role_id = r.id + JOIN + movie_keyword mk ON c.movie_id = mk.movie_id + JOIN + keyword k ON mk.keyword_id = k.id + WHERE + a.name ILIKE '%Smith%' +), +ActorProductionCount AS ( + SELECT + actor_id, + actor_name, + COUNT(DISTINCT movie_id) AS total_movies + FROM + ActorRoleInfo + GROUP BY + actor_id, actor_name +), +TopActors AS ( + SELECT + actor_id, + actor_name, + total_movies + FROM + ActorProductionCount + ORDER BY + total_movies DESC + LIMIT 10 +) +SELECT + ta.actor_name, + ta.total_movies, + ARRAY_AGG(DISTINCT ari.title) AS movie_titles, + ARRAY_AGG(DISTINCT ari.movie_keyword) AS keywords +FROM + TopActors ta +JOIN + ActorRoleInfo ari ON ta.actor_id = ari.actor_id +GROUP BY + ta.actor_id, ta.actor_name, ta.total_movies +ORDER BY + ta.total_movies DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/job/28607.sql b/vortex-bench/sqlstorm/job/28607.sql new file mode 100644 index 00000000000..2345cdd158d --- /dev/null +++ b/vortex-bench/sqlstorm/job/28607.sql @@ -0,0 +1,64 @@ +WITH RankedTitles AS ( + SELECT + t.id AS title_id, + t.title, + t.production_year, + a.name AS actor_name, + ROW_NUMBER() OVER (PARTITION BY t.id ORDER BY a.name) AS actor_rank + FROM + aka_title t + JOIN + complete_cast cc ON t.id = cc.movie_id + JOIN + cast_info ci ON cc.subject_id = ci.person_id + JOIN + aka_name a ON ci.person_id = a.person_id + WHERE + t.production_year >= 2000 +), + +RecentCompanyDetails AS ( + SELECT + mc.movie_id, + c.name AS company_name, + ct.kind AS company_type, + ROW_NUMBER() OVER (PARTITION BY mc.movie_id ORDER BY c.name) AS company_rank + FROM + movie_companies mc + JOIN + company_name c ON mc.company_id = c.id + JOIN + company_type ct ON mc.company_type_id = ct.id + WHERE + c.country_code = 'USA' +), + +KeywordCount AS ( + SELECT + mk.movie_id, + COUNT(k.keyword) AS keyword_count + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +) + +SELECT + rt.title, + rt.production_year, + rt.actor_name, + rc.company_name, + rc.company_type, + kc.keyword_count +FROM + RankedTitles rt +LEFT JOIN + RecentCompanyDetails rc ON rt.title_id = rc.movie_id +LEFT JOIN + KeywordCount kc ON rt.title_id = kc.movie_id +WHERE + rt.actor_rank <= 3 +ORDER BY + rt.production_year DESC, rt.title; diff --git a/vortex-bench/sqlstorm/job/28757.sql b/vortex-bench/sqlstorm/job/28757.sql new file mode 100644 index 00000000000..aabd78d31ae --- /dev/null +++ b/vortex-bench/sqlstorm/job/28757.sql @@ -0,0 +1,47 @@ + +WITH MovieStats AS ( + SELECT + a.title AS MovieTitle, + a.production_year AS ProductionYear, + COUNT(DISTINCT c.person_id) AS CastCount, + STRING_AGG(DISTINCT ak.name, ', ') AS Actors, + STRING_AGG(DISTINCT kw.keyword, ', ') AS Keywords + FROM + aka_title a + JOIN + complete_cast cc ON a.id = cc.movie_id + JOIN + cast_info c ON cc.subject_id = c.id + JOIN + aka_name ak ON c.person_id = ak.person_id + LEFT JOIN + movie_keyword mw ON a.id = mw.movie_id + LEFT JOIN + keyword kw ON mw.keyword_id = kw.id + WHERE + a.production_year >= 2000 + GROUP BY + a.title, a.production_year +), TopMovies AS ( + SELECT + MovieTitle, + ProductionYear, + CastCount, + Actors, + Keywords, + RANK() OVER (ORDER BY CastCount DESC) AS Rank + FROM + MovieStats +) +SELECT + MovieTitle, + ProductionYear, + CastCount, + Actors, + Keywords +FROM + TopMovies +WHERE + Rank <= 10 +ORDER BY + ProductionYear DESC, CastCount DESC; diff --git a/vortex-bench/sqlstorm/job/29718.sql b/vortex-bench/sqlstorm/job/29718.sql new file mode 100644 index 00000000000..0214cb465d3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/29718.sql @@ -0,0 +1,52 @@ +WITH ranked_titles AS ( + SELECT + a.title AS movie_title, + t.production_year, + r.role, + a.id AS title_id, + ROW_NUMBER() OVER (PARTITION BY a.id ORDER BY t.production_year DESC) AS year_rank + FROM + aka_title a + JOIN + title t ON a.movie_id = t.id + JOIN + cast_info ci ON a.movie_id = ci.movie_id + JOIN + role_type r ON ci.role_id = r.id + WHERE + t.production_year IS NOT NULL +), +top_movies AS ( + SELECT + movie_title, + production_year, + role, + title_id + FROM + ranked_titles + WHERE + year_rank = 1 +), +keyword_summary AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +) +SELECT + t.movie_title, + t.production_year, + t.role, + k.keywords +FROM + top_movies t +LEFT JOIN + keyword_summary k ON t.title_id = k.movie_id +ORDER BY + t.production_year DESC, + t.movie_title; diff --git a/vortex-bench/sqlstorm/job/29753.sql b/vortex-bench/sqlstorm/job/29753.sql new file mode 100644 index 00000000000..6c49313173d --- /dev/null +++ b/vortex-bench/sqlstorm/job/29753.sql @@ -0,0 +1,54 @@ + +WITH RankedMovies AS ( + SELECT + m.id AS movie_id, + m.title AS movie_title, + m.production_year, + COUNT(c.id) AS cast_count + FROM + aka_title m + JOIN + cast_info c ON m.id = c.movie_id + WHERE + m.production_year >= 2000 + GROUP BY + m.id, m.title, m.production_year +), +TopMovies AS ( + SELECT + movie_id, + movie_title, + production_year, + cast_count, + ROW_NUMBER() OVER (ORDER BY cast_count DESC) AS rank + FROM + RankedMovies + WHERE + cast_count > 5 +) +SELECT + tm.movie_title, + tm.production_year, + a.name AS actor_name, + a.name_pcode_nf, + a.name_pcode_cf, + r.role, + STRING_AGG(DISTINCT k.keyword, ',' ORDER BY k.keyword) AS keywords +FROM + TopMovies tm +JOIN + cast_info ci ON tm.movie_id = ci.movie_id +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + role_type r ON ci.role_id = r.id +JOIN + movie_keyword mk ON tm.movie_id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +WHERE + tm.rank <= 10 +GROUP BY + tm.movie_title, tm.production_year, a.name, a.name_pcode_nf, a.name_pcode_cf, r.role, tm.rank +ORDER BY + tm.rank, tm.movie_title; diff --git a/vortex-bench/sqlstorm/job/30692.sql b/vortex-bench/sqlstorm/job/30692.sql new file mode 100644 index 00000000000..d92a1c53db3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/30692.sql @@ -0,0 +1,73 @@ + +WITH RECURSIVE movie_hierarchy AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + 1 AS level + FROM + aka_title m + WHERE + m.episode_of_id IS NULL + UNION ALL + SELECT + ep.id AS movie_id, + ep.title, + ep.production_year, + mh.level + 1 + FROM + aka_title ep + INNER JOIN + movie_hierarchy mh ON ep.episode_of_id = mh.movie_id +), +ranked_cast AS ( + SELECT + ci.movie_id, + a.name AS actor_name, + ROW_NUMBER() OVER (PARTITION BY ci.movie_id ORDER BY ci.nr_order) AS actor_rank + FROM + cast_info ci + JOIN + aka_name a ON ci.person_id = a.person_id +), +company_info AS ( + SELECT + mc.movie_id, + STRING_AGG(cn.name, ', ') AS company_names, + STRING_AGG(ct.kind, ', ') AS company_types + FROM + movie_companies mc + JOIN + company_name cn ON mc.company_id = cn.id + JOIN + company_type ct ON mc.company_type_id = ct.id + GROUP BY + mc.movie_id +) +SELECT + mh.movie_id, + mh.title, + mh.production_year, + COALESCE(rc.actor_count, 0) AS total_actors, + COALESCE(ci.company_names, 'No Companies') AS companies, + COALESCE(ci.company_types, 'N/A') AS company_types, + RANK() OVER (ORDER BY mh.production_year DESC) AS production_rank +FROM + movie_hierarchy mh +LEFT JOIN ( + SELECT + movie_id, COUNT(*) AS actor_count + FROM + ranked_cast + GROUP BY + movie_id +) rc ON mh.movie_id = rc.movie_id +LEFT JOIN + company_info ci ON mh.movie_id = ci.movie_id +WHERE + mh.production_year >= 2000 +GROUP BY + mh.movie_id, mh.title, mh.production_year, rc.actor_count, ci.company_names, ci.company_types +ORDER BY + mh.title ASC, + production_rank DESC; diff --git a/vortex-bench/sqlstorm/job/31671.sql b/vortex-bench/sqlstorm/job/31671.sql new file mode 100644 index 00000000000..95a60060365 --- /dev/null +++ b/vortex-bench/sqlstorm/job/31671.sql @@ -0,0 +1,56 @@ + +WITH RECURSIVE MovieHierarchy AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + 1 AS level + FROM + aka_title m + WHERE + m.production_year > 2000 + UNION ALL + SELECT + mk.linked_movie_id AS movie_id, + m.title, + m.production_year, + mh.level + 1 AS level + FROM + movie_link mk + JOIN + aka_title m ON mk.linked_movie_id = m.id + JOIN + MovieHierarchy mh ON mk.movie_id = mh.movie_id +) +SELECT + m.id AS movie_id, + m.title, + m.production_year, + (SELECT COUNT(DISTINCT c.person_id) + FROM cast_info c + WHERE c.movie_id = m.id) AS total_cast, + cct.kind AS casting_type, + ROW_NUMBER() OVER (PARTITION BY m.production_year ORDER BY m.title) AS row_num, + COALESCE(NULLIF(m.note, ''), 'No note available') AS movie_note, + STRING_AGG(DISTINCT kw.keyword, ', ') AS keywords +FROM + aka_title m +LEFT JOIN + movie_companies mc ON m.id = mc.movie_id +LEFT JOIN + company_name cn ON mc.company_id = cn.id +LEFT JOIN + comp_cast_type cct ON mc.company_type_id = cct.id +LEFT JOIN + movie_keyword mk ON m.id = mk.movie_id +LEFT JOIN + keyword kw ON mk.keyword_id = kw.id +WHERE + m.production_year > 2000 + AND m.kind_id IN (SELECT id FROM kind_type WHERE kind LIKE '%Drama%') +GROUP BY + m.id, m.title, m.production_year, cct.kind, m.note +HAVING + COUNT(DISTINCT cct.kind) > 1 +ORDER BY + m.production_year DESC, m.title; diff --git a/vortex-bench/sqlstorm/job/34217.sql b/vortex-bench/sqlstorm/job/34217.sql new file mode 100644 index 00000000000..bf2f1a96530 --- /dev/null +++ b/vortex-bench/sqlstorm/job/34217.sql @@ -0,0 +1,73 @@ + +WITH RECURSIVE movie_hierarchy AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + 1 AS level + FROM + aka_title m + WHERE + m.episode_of_id IS NULL + + UNION ALL + + SELECT + m.id AS movie_id, + m.title, + m.production_year, + mh.level + 1 + FROM + aka_title m + JOIN + movie_hierarchy mh ON m.episode_of_id = mh.movie_id +), +cast_details AS ( + SELECT + c.movie_id, + a.name AS actor_name, + a.surname_pcode + FROM + cast_info c + JOIN + aka_name a ON a.person_id = c.person_id +), +movie_info_summary AS ( + SELECT + m.id AS movie_id, + COUNT(DISTINCT k.keyword) AS total_keywords, + MAX(m.production_year) AS latest_info_year + FROM + aka_title m + LEFT JOIN + movie_keyword mk ON mk.movie_id = m.id + LEFT JOIN + keyword k ON k.id = mk.keyword_id + GROUP BY + m.id +) +SELECT + mh.movie_id, + mh.title, + mh.production_year, + cd.actor_name, + cd.surname_pcode, + mi.total_keywords, + CASE + WHEN mi.latest_info_year IS NULL THEN 'No info available' + ELSE CAST(mi.latest_info_year AS VARCHAR) + END AS latest_info_year, + ROW_NUMBER() OVER (PARTITION BY mh.level ORDER BY mh.production_year DESC) AS rank_level +FROM + movie_hierarchy mh +LEFT JOIN + cast_details cd ON cd.movie_id = mh.movie_id +LEFT JOIN + movie_info_summary mi ON mi.movie_id = mh.movie_id +WHERE + mh.production_year >= 2000 + AND + cd.surname_pcode IS NOT NULL +ORDER BY + mh.level, + mh.production_year DESC; diff --git a/vortex-bench/sqlstorm/job/34816.sql b/vortex-bench/sqlstorm/job/34816.sql new file mode 100644 index 00000000000..cb885668e5a --- /dev/null +++ b/vortex-bench/sqlstorm/job/34816.sql @@ -0,0 +1,79 @@ +WITH RECURSIVE MovieHierarchy AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + CAST(m.title AS text) AS full_title, + 1 AS depth + FROM + aka_title m + WHERE + m.production_year IS NOT NULL + UNION ALL + SELECT + m.id AS movie_id, + m.title, + m.production_year, + CONCAT(mh.full_title, ' -> ', m.title) AS full_title, + mh.depth + 1 + FROM + MovieHierarchy mh + JOIN + aka_title m ON m.episode_of_id = mh.movie_id +), +CollatedCast AS ( + SELECT + ci.movie_id, + COUNT(ci.person_id) AS cast_count, + STRING_AGG(a.name, ', ') AS actors, + MAX(CASE WHEN a.name IS NOT NULL THEN 1 ELSE 0 END) AS has_actors + FROM + cast_info ci + JOIN + aka_name a ON ci.person_id = a.person_id + GROUP BY + ci.movie_id +), +MovieInfo AS ( + SELECT + m.id AS movie_id, + COALESCE(k.keyword, 'No Keyword') AS keyword, + COALESCE(mi.info, 'No Info') AS additional_info, + CASE + WHEN c.cast_count > 0 THEN ('This movie has ' || c.cast_count || ' total cast members.') + ELSE 'This movie has no cast members.' + END AS cast_description, + mh.title AS movie_title, + mh.production_year, + mh.depth + FROM + aka_title m + LEFT JOIN + CollatedCast c ON m.id = c.movie_id + LEFT JOIN + movie_keyword mk ON m.id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + LEFT JOIN + MovieHierarchy mh ON mh.movie_id = m.id + LEFT JOIN + movie_info mi ON m.id = mi.movie_id + WHERE + m.production_year BETWEEN 2000 AND 2020 +) +SELECT + mi.movie_title, + mi.production_year, + mi.keyword, + mi.additional_info, + mi.cast_description, + mh.depth AS hierarchy_level, + ROW_NUMBER() OVER (PARTITION BY mi.keyword ORDER BY mi.production_year DESC) AS keyword_rank +FROM + MovieInfo mi +JOIN + MovieHierarchy mh ON mi.movie_id = mh.movie_id +WHERE + mh.depth < 5 +ORDER BY + mi.production_year DESC, mi.keyword; diff --git a/vortex-bench/sqlstorm/job/3712.sql b/vortex-bench/sqlstorm/job/3712.sql new file mode 100644 index 00000000000..da2be3b41cd --- /dev/null +++ b/vortex-bench/sqlstorm/job/3712.sql @@ -0,0 +1,46 @@ +WITH MovieDetails AS ( + SELECT + a.title, + a.production_year, + COUNT(DISTINCT c.person_id) AS cast_count, + STRING_AGG(DISTINCT k.keyword, ', ') AS keywords + FROM + aka_title a + LEFT JOIN + complete_cast cc ON a.id = cc.movie_id + LEFT JOIN + cast_info c ON cc.subject_id = c.id + LEFT JOIN + movie_keyword mk ON a.id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + a.title, a.production_year +), +TopMovies AS ( + SELECT + title, + production_year, + cast_count, + keywords, + RANK() OVER (ORDER BY cast_count DESC) AS rank + FROM + MovieDetails +) +SELECT + tm.title, + tm.production_year, + tm.cast_count, + COALESCE(tm.keywords, 'No Keywords') AS keywords, + (SELECT AVG(cast_count) FROM TopMovies) AS avg_cast_count, + CASE + WHEN tm.cast_count > (SELECT AVG(cast_count) FROM TopMovies) + THEN 'Above Average' + ELSE 'Below Average' + END AS performance_category +FROM + TopMovies tm +WHERE + tm.rank <= 10 +ORDER BY + tm.rank; diff --git a/vortex-bench/sqlstorm/job/3777.sql b/vortex-bench/sqlstorm/job/3777.sql new file mode 100644 index 00000000000..c9e7f76294e --- /dev/null +++ b/vortex-bench/sqlstorm/job/3777.sql @@ -0,0 +1,64 @@ +WITH RankedMovies AS ( + SELECT + t.title, + t.production_year, + COUNT(DISTINCT mc.company_id) AS company_count, + RANK() OVER (PARTITION BY t.production_year ORDER BY COUNT(DISTINCT mc.company_id) DESC) AS rank_in_year + FROM + aka_title t + LEFT JOIN + movie_companies mc ON t.id = mc.movie_id + GROUP BY + t.id, t.title, t.production_year +), +TopMovies AS ( + SELECT + title, + production_year + FROM + RankedMovies + WHERE + rank_in_year <= 3 +), +ActorInfo AS ( + SELECT + ak.name AS actor_name, + t.title AS movie_title, + t.production_year, + ci.note AS role_note, + ROW_NUMBER() OVER (PARTITION BY t.id ORDER BY ci.nr_order) AS role_order + FROM + cast_info ci + JOIN + aka_name ak ON ci.person_id = ak.person_id + JOIN + aka_title t ON ci.movie_id = t.id + WHERE + ci.note IS NOT NULL +), +CombinedResults AS ( + SELECT + tm.production_year, + tm.title, + ai.actor_name, + ai.role_note, + CASE + WHEN ai.role_note IS NOT NULL THEN 'Role: ' || ai.role_note + ELSE 'Unknown Role' + END AS role_description + FROM + TopMovies tm + LEFT JOIN + ActorInfo ai ON tm.title = ai.movie_title AND tm.production_year = ai.production_year +) +SELECT + production_year, + title, + STRING_AGG(actor_name, ', ') AS actors, + MAX(role_description) AS sample_role_description +FROM + CombinedResults +GROUP BY + production_year, title +ORDER BY + production_year DESC, title; diff --git a/vortex-bench/sqlstorm/job/3802.sql b/vortex-bench/sqlstorm/job/3802.sql new file mode 100644 index 00000000000..237d2753564 --- /dev/null +++ b/vortex-bench/sqlstorm/job/3802.sql @@ -0,0 +1,64 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS rank + FROM + aka_title t + WHERE + t.production_year IS NOT NULL +), +CastDetails AS ( + SELECT + c.movie_id, + COUNT(*) AS cast_count, + MIN(a.name) AS first_actor_name, + MAX(a.name) AS last_actor_name + FROM + cast_info c + JOIN + aka_name a ON c.person_id = a.person_id + GROUP BY + c.movie_id +), +MovieKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS all_keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +), +MoviesWithDetails AS ( + SELECT + rm.movie_id, + rm.title, + rm.production_year, + cd.cast_count, + cd.first_actor_name, + cd.last_actor_name, + COALESCE(mk.all_keywords, 'No Keywords') AS keywords + FROM + RankedMovies rm + LEFT JOIN + CastDetails cd ON rm.movie_id = cd.movie_id + LEFT JOIN + MovieKeywords mk ON rm.movie_id = mk.movie_id +) +SELECT + m.title, + m.production_year, + m.cast_count, + m.first_actor_name, + m.last_actor_name, + m.keywords +FROM + MoviesWithDetails m +WHERE + m.production_year = (SELECT MAX(production_year) FROM RankedMovies) +ORDER BY + m.cast_count DESC, m.title; diff --git a/vortex-bench/sqlstorm/job/4129.sql b/vortex-bench/sqlstorm/job/4129.sql new file mode 100644 index 00000000000..38e87c9384f --- /dev/null +++ b/vortex-bench/sqlstorm/job/4129.sql @@ -0,0 +1,40 @@ +WITH RankedMovies AS ( + SELECT + mt.title, + mt.production_year, + COUNT(DISTINCT cc.subject_id) AS total_cast, + ROW_NUMBER() OVER (PARTITION BY mt.production_year ORDER BY COUNT(DISTINCT cc.subject_id) DESC) AS rank + FROM + title mt + LEFT JOIN + complete_cast cc ON mt.id = cc.movie_id + GROUP BY + mt.title, mt.production_year +), +MovieKeywords AS ( + SELECT + mt.title, + mk.keyword, + ROW_NUMBER() OVER (PARTITION BY mt.id ORDER BY mk.id) AS keyword_rank + FROM + title mt + JOIN + movie_keyword mvk ON mt.id = mvk.movie_id + JOIN + keyword mk ON mvk.keyword_id = mk.id +) +SELECT + rm.title, + rm.production_year, + rm.total_cast, + STRING_AGG(mk.keyword, ', ') AS keywords +FROM + RankedMovies rm +LEFT JOIN + MovieKeywords mk ON mk.title = rm.title +WHERE + rm.rank <= 5 +GROUP BY + rm.title, rm.production_year, rm.total_cast +ORDER BY + rm.production_year DESC, rm.total_cast DESC; diff --git a/vortex-bench/sqlstorm/job/4399.sql b/vortex-bench/sqlstorm/job/4399.sql new file mode 100644 index 00000000000..3525360b725 --- /dev/null +++ b/vortex-bench/sqlstorm/job/4399.sql @@ -0,0 +1,47 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS rank + FROM + aka_title t + WHERE + t.production_year IS NOT NULL +), +ActorCount AS ( + SELECT + c.movie_id, + COUNT(DISTINCT c.person_id) AS actor_count + FROM + cast_info c + GROUP BY + c.movie_id +), +MoviesWithActors AS ( + SELECT + rm.movie_id, + rm.title, + rm.production_year, + COALESCE(ac.actor_count, 0) AS actor_count + FROM + RankedMovies rm + LEFT JOIN + ActorCount ac ON rm.movie_id = ac.movie_id +) +SELECT + mwa.movie_id, + mwa.title, + mwa.production_year, + mwa.actor_count, + CASE + WHEN mwa.actor_count > 10 THEN 'Ensemble Cast' + WHEN mwa.actor_count BETWEEN 5 AND 10 THEN 'Moderate Cast' + ELSE 'Minimal Cast' + END AS cast_size_description +FROM + MoviesWithActors mwa +WHERE + mwa.actor_count > (SELECT AVG(actor_count) FROM ActorCount) +ORDER BY + mwa.production_year DESC, mwa.actor_count DESC; diff --git a/vortex-bench/sqlstorm/job/4482.sql b/vortex-bench/sqlstorm/job/4482.sql new file mode 100644 index 00000000000..5778a889ec7 --- /dev/null +++ b/vortex-bench/sqlstorm/job/4482.sql @@ -0,0 +1,54 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + COUNT(c.person_id) AS cast_count, + RANK() OVER (PARTITION BY t.production_year ORDER BY COUNT(c.person_id) DESC) AS rank_within_year + FROM + aka_title t + LEFT JOIN + cast_info c ON t.id = c.movie_id + WHERE + t.production_year IS NOT NULL + GROUP BY + t.id, t.title, t.production_year +), +FilteredMovies AS ( + SELECT + rm.movie_id, + rm.title, + rm.production_year, + rm.cast_count + FROM + RankedMovies rm + WHERE + rm.rank_within_year <= 5 +), +MovieKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +) +SELECT + f.title, + f.production_year, + f.cast_count, + COALESCE(mk.keywords, 'No Keywords') AS keywords, + CASE + WHEN f.cast_count IS NULL OR f.cast_count = 0 THEN 'No cast information.' + ELSE NULL + END AS cast_info_note +FROM + FilteredMovies f +LEFT JOIN + MovieKeywords mk ON f.movie_id = mk.movie_id +ORDER BY + f.production_year DESC, + f.cast_count DESC; diff --git a/vortex-bench/sqlstorm/job/5243.sql b/vortex-bench/sqlstorm/job/5243.sql new file mode 100644 index 00000000000..71be2022c05 --- /dev/null +++ b/vortex-bench/sqlstorm/job/5243.sql @@ -0,0 +1,26 @@ +SELECT + akn.name AS aka_name, + tit.title AS movie_title, + cnt.name AS company_name, + rt.role AS person_role, + pi.info AS person_info +FROM + aka_name akn +JOIN + cast_info ci ON akn.person_id = ci.person_id +JOIN + title tit ON ci.movie_id = tit.id +JOIN + movie_companies mc ON tit.id = mc.movie_id +JOIN + company_name cnt ON mc.company_id = cnt.id +JOIN + role_type rt ON ci.role_id = rt.id +JOIN + person_info pi ON akn.person_id = pi.person_id +WHERE + tit.production_year >= 2000 + AND cnt.country_code = 'USA' + AND pi.info_type_id IN (SELECT id FROM info_type WHERE info = 'Biography') +ORDER BY + tit.production_year DESC, akn.name; diff --git a/vortex-bench/sqlstorm/job/5278.sql b/vortex-bench/sqlstorm/job/5278.sql new file mode 100644 index 00000000000..b59c25b4823 --- /dev/null +++ b/vortex-bench/sqlstorm/job/5278.sql @@ -0,0 +1,32 @@ + +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.kind AS company_type, + ti.info AS movie_info, + STRING_AGG(DISTINCT k.keyword, ', ') AS keywords +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type c ON mc.company_type_id = c.id +JOIN + movie_info mi ON t.id = mi.movie_id +JOIN + info_type ti ON mi.info_type_id = ti.id +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year > 2000 + AND c.kind LIKE 'Production%' +GROUP BY + a.name, t.title, c.kind, ti.info +ORDER BY + actor_name, movie_title; diff --git a/vortex-bench/sqlstorm/job/5307.sql b/vortex-bench/sqlstorm/job/5307.sql new file mode 100644 index 00000000000..7c6dcf3fdfb --- /dev/null +++ b/vortex-bench/sqlstorm/job/5307.sql @@ -0,0 +1,30 @@ +SELECT + a.name AS aka_name, + t.title AS movie_title, + c.nr_order AS cast_order, + ci.kind AS company_type, + mi.info AS movie_info, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + title t ON c.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type ci ON mc.company_type_id = ci.id +LEFT JOIN + movie_info mi ON t.id = mi.movie_id AND mi.info_type_id = (SELECT id FROM info_type WHERE info = 'budget' LIMIT 1) +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year > 2000 +AND + ci.kind = 'Production' +ORDER BY + t.production_year DESC, + c.nr_order ASC; diff --git a/vortex-bench/sqlstorm/job/539.sql b/vortex-bench/sqlstorm/job/539.sql new file mode 100644 index 00000000000..82971e8f89f --- /dev/null +++ b/vortex-bench/sqlstorm/job/539.sql @@ -0,0 +1,62 @@ +WITH RECURSIVE MovieHierarchy AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + COALESCE(a.name, 'Unknown') AS actor_name, + CAST(COALESCE(c.role_id, 0) AS INTEGER) AS role_id, + ROW_NUMBER() OVER (PARTITION BY m.id ORDER BY CASE WHEN a.name IS NOT NULL THEN 1 ELSE 0 END DESC) AS role_order + FROM + aka_title m + LEFT JOIN + cast_info c ON m.id = c.movie_id + LEFT JOIN + aka_name a ON c.person_id = a.person_id +), RankedMovies AS ( + SELECT + movie_id, + title, + production_year, + actor_name, + role_id, + role_order, + COUNT(*) OVER (PARTITION BY production_year) AS movies_in_year + FROM + MovieHierarchy +), FilteredMovies AS ( + SELECT + movie_id, + title, + production_year, + actor_name, + role_id, + role_order, + movies_in_year, + CASE + WHEN role_order = 1 THEN 'Lead' + WHEN role_order > 1 AND role_order <= 3 THEN 'Supporting' + ELSE 'Minor' + END AS role_type + FROM + RankedMovies + WHERE + production_year > 2000 AND + (actor_name IS NOT NULL AND actor_name != 'Unknown') +) +SELECT + f.movie_id, + f.title, + f.production_year, + f.actor_name, + f.role_type, + f.movies_in_year, + COALESCE(SUM(mk.id), 0) AS keyword_count +FROM + FilteredMovies f +LEFT JOIN + movie_keyword mk ON f.movie_id = mk.movie_id +GROUP BY + f.movie_id, f.title, f.production_year, f.actor_name, f.role_type, f.movies_in_year +ORDER BY + f.production_year DESC, f.role_type DESC, keyword_count DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/job/5534.sql b/vortex-bench/sqlstorm/job/5534.sql new file mode 100644 index 00000000000..501311a365b --- /dev/null +++ b/vortex-bench/sqlstorm/job/5534.sql @@ -0,0 +1,56 @@ + +WITH RankedMovies AS ( + SELECT + m.id AS movie_id, + m.title, + m.production_year, + COUNT(DISTINCT mc.company_id) AS company_count + FROM + title m + JOIN + movie_companies mc ON m.id = mc.movie_id + GROUP BY + m.id, m.title, m.production_year + HAVING + COUNT(DISTINCT mc.company_id) > 1 +), +ActorDetails AS ( + SELECT + a.id AS actor_id, + a.name, + a.md5sum, + ci.movie_id, + ci.role_id + FROM + aka_name a + JOIN + cast_info ci ON a.person_id = ci.person_id + WHERE + ci.nr_order = 1 +), +MovieKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +) +SELECT + rm.title, + rm.production_year, + ad.name AS leading_actor, + mk.keywords, + rm.company_count +FROM + RankedMovies rm +JOIN + ActorDetails ad ON rm.movie_id = ad.movie_id +JOIN + MovieKeywords mk ON rm.movie_id = mk.movie_id +ORDER BY + rm.production_year DESC, + rm.company_count DESC; diff --git a/vortex-bench/sqlstorm/job/5580.sql b/vortex-bench/sqlstorm/job/5580.sql new file mode 100644 index 00000000000..9bba6415294 --- /dev/null +++ b/vortex-bench/sqlstorm/job/5580.sql @@ -0,0 +1,25 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + c.kind AS cast_type, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + comp_cast_type c ON ci.person_role_id = c.id +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year BETWEEN 2000 AND 2023 +AND + c.kind = 'actor' +ORDER BY + t.production_year DESC, + a.name; diff --git a/vortex-bench/sqlstorm/job/5729.sql b/vortex-bench/sqlstorm/job/5729.sql new file mode 100644 index 00000000000..bba3e184429 --- /dev/null +++ b/vortex-bench/sqlstorm/job/5729.sql @@ -0,0 +1,30 @@ + +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.kind AS company_type, + COUNT(DISTINCT mc.company_id) AS company_count, + STRING_AGG(DISTINCT kw.keyword, ', ') AS keywords, + MIN(t.production_year) AS first_year, + MAX(t.production_year) AS last_year +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type c ON mc.company_type_id = c.id +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword kw ON mk.keyword_id = kw.id +WHERE + t.production_year BETWEEN 2000 AND 2020 + AND c.kind = 'Distributor' +GROUP BY + a.name, t.title, c.kind +ORDER BY + first_year DESC; diff --git a/vortex-bench/sqlstorm/job/5749.sql b/vortex-bench/sqlstorm/job/5749.sql new file mode 100644 index 00000000000..d416b8bb7a7 --- /dev/null +++ b/vortex-bench/sqlstorm/job/5749.sql @@ -0,0 +1,43 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + COUNT(DISTINCT c.person_id) AS num_actors, + STRING_AGG(DISTINCT a.name, ', ') AS actor_names + FROM + aka_title t + JOIN + complete_cast cc ON t.id = cc.movie_id + JOIN + cast_info c ON cc.subject_id = c.id + JOIN + aka_name a ON c.person_id = a.person_id + WHERE + t.production_year >= 2000 + GROUP BY + t.id, t.title, t.production_year +), +TopMovies AS ( + SELECT + rm.movie_id, + rm.title, + rm.production_year, + rm.num_actors, + rm.actor_names, + RANK() OVER (ORDER BY rm.num_actors DESC) AS actor_rank + FROM + RankedMovies rm +) +SELECT + tm.movie_id, + tm.title, + tm.production_year, + tm.num_actors, + tm.actor_names +FROM + TopMovies tm +WHERE + tm.actor_rank <= 10 +ORDER BY + tm.num_actors DESC; diff --git a/vortex-bench/sqlstorm/job/5768.sql b/vortex-bench/sqlstorm/job/5768.sql new file mode 100644 index 00000000000..600d888f39e --- /dev/null +++ b/vortex-bench/sqlstorm/job/5768.sql @@ -0,0 +1,28 @@ + +SELECT + a.name AS actor_name, + m.title AS movie_title, + m.production_year, + STRING_AGG(k.keyword, ', ') AS keywords, + c.kind AS company_type +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + aka_title m ON ci.movie_id = m.id +JOIN + movie_companies mc ON m.id = mc.movie_id +JOIN + company_type c ON mc.company_type_id = c.id +LEFT JOIN + movie_keyword mk ON m.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +WHERE + m.production_year >= 2000 + AND c.kind IS NOT NULL +GROUP BY + a.name, m.title, m.production_year, c.kind +ORDER BY + m.production_year DESC, a.name ASC; diff --git a/vortex-bench/sqlstorm/job/6095.sql b/vortex-bench/sqlstorm/job/6095.sql new file mode 100644 index 00000000000..c3464c8ffdc --- /dev/null +++ b/vortex-bench/sqlstorm/job/6095.sql @@ -0,0 +1,60 @@ + +WITH RankedTitles AS ( + SELECT + t.id AS title_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS title_rank + FROM + title t + WHERE + t.production_year IS NOT NULL +), +ActorRoles AS ( + SELECT + a.id AS aka_id, + c.movie_id, + c.role_id, + r.role, + COUNT(*) AS role_count + FROM + cast_info c + JOIN + aka_name a ON c.person_id = a.person_id + JOIN + role_type r ON c.role_id = r.id + GROUP BY + a.id, c.movie_id, c.role_id, r.role +), +MovieCompanyDetails AS ( + SELECT + mc.movie_id, + STRING_AGG(CASE WHEN ct.kind = 'Producer' THEN cn.name ELSE NULL END, ', ') AS producers, + STRING_AGG(CASE WHEN ct.kind = 'Distributor' THEN cn.name ELSE NULL END, ', ') AS distributors + FROM + movie_companies mc + JOIN + company_name cn ON mc.company_id = cn.id + JOIN + company_type ct ON mc.company_type_id = ct.id + GROUP BY + mc.movie_id +) +SELECT + rt.title AS movie_title, + rt.production_year, + ar.role, + ar.role_count, + mcd.producers, + mcd.distributors +FROM + RankedTitles rt +JOIN + ActorRoles ar ON rt.title_id = ar.movie_id +JOIN + MovieCompanyDetails mcd ON rt.title_id = mcd.movie_id +WHERE + rt.title_rank <= 10 +ORDER BY + rt.production_year DESC, + ar.role_count DESC; diff --git a/vortex-bench/sqlstorm/job/6121.sql b/vortex-bench/sqlstorm/job/6121.sql new file mode 100644 index 00000000000..1529e5be7ea --- /dev/null +++ b/vortex-bench/sqlstorm/job/6121.sql @@ -0,0 +1,35 @@ + +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + STRING_AGG(DISTINCT k.keyword, ',' ORDER BY k.keyword) AS keywords, + c.kind AS company_type, + ci.role_id +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + movie_keyword mk ON mk.movie_id = t.id +JOIN + keyword k ON mk.keyword_id = k.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type c ON mc.company_type_id = c.id +WHERE + t.production_year >= 2000 +AND + c.kind LIKE 'Production%' +GROUP BY + a.name, + t.title, + t.production_year, + c.kind, + ci.role_id +ORDER BY + t.production_year DESC, + a.name; diff --git a/vortex-bench/sqlstorm/job/6441.sql b/vortex-bench/sqlstorm/job/6441.sql new file mode 100644 index 00000000000..14e79999a24 --- /dev/null +++ b/vortex-bench/sqlstorm/job/6441.sql @@ -0,0 +1,30 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + ct.kind AS company_type, + ki.keyword AS movie_keyword, + pi.info AS person_info, + COUNT(DISTINCT c.id) AS cast_count +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type ct ON mc.company_type_id = ct.id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword ki ON mk.keyword_id = ki.id +JOIN + person_info pi ON a.person_id = pi.person_id +WHERE + t.production_year BETWEEN 2000 AND 2020 + AND ct.kind LIKE 'Production%' +GROUP BY + a.name, t.title, ct.kind, ki.keyword, pi.info +ORDER BY + cast_count DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/6751.sql b/vortex-bench/sqlstorm/job/6751.sql new file mode 100644 index 00000000000..60d10554ac3 --- /dev/null +++ b/vortex-bench/sqlstorm/job/6751.sql @@ -0,0 +1,31 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.kind AS company_type, + k.keyword AS movie_keyword, + pi.info AS person_info +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name cn ON mc.company_id = cn.id +JOIN + company_type c ON mc.company_type_id = c.id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +JOIN + person_info pi ON a.person_id = pi.person_id +WHERE + t.production_year > 2000 + AND c.kind LIKE 'Film%' + AND pi.info_type_id IN (SELECT id FROM info_type WHERE info LIKE '%actor%') +ORDER BY + a.name, t.title +LIMIT 100; diff --git a/vortex-bench/sqlstorm/job/6939.sql b/vortex-bench/sqlstorm/job/6939.sql new file mode 100644 index 00000000000..f401a38631d --- /dev/null +++ b/vortex-bench/sqlstorm/job/6939.sql @@ -0,0 +1,30 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.note AS cast_note, + comp.name AS company_name, + kt.keyword AS movie_keyword, + it.info AS movie_info +FROM + aka_name a +JOIN + cast_info c ON a.person_id = c.person_id +JOIN + aka_title t ON c.movie_id = t.movie_id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name comp ON mc.company_id = comp.id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword kt ON mk.keyword_id = kt.id +JOIN + movie_info mi ON t.id = mi.movie_id +JOIN + info_type it ON mi.info_type_id = it.id +WHERE + t.production_year > 2000 + AND comp.country_code = 'USA' +ORDER BY + a.name, t.title; diff --git a/vortex-bench/sqlstorm/job/7330.sql b/vortex-bench/sqlstorm/job/7330.sql new file mode 100644 index 00000000000..68a4e22440e --- /dev/null +++ b/vortex-bench/sqlstorm/job/7330.sql @@ -0,0 +1,34 @@ +SELECT + a.id AS aka_id, + a.name AS aka_name, + t.title AS movie_title, + t.production_year, + c.name AS company_name, + r.role AS person_role, + pi.info AS person_info, + k.keyword AS movie_keyword, + COUNT(*) OVER (PARTITION BY t.id) AS total_cast +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name c ON mc.company_id = c.id +JOIN + role_type r ON ci.role_id = r.id +JOIN + person_info pi ON ci.person_id = pi.person_id +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year >= 2000 + AND c.country_code = 'USA' + AND pi.info_type_id IN (SELECT id FROM info_type WHERE info = 'Biography') +ORDER BY + t.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/7606.sql b/vortex-bench/sqlstorm/job/7606.sql new file mode 100644 index 00000000000..ee61a10d08b --- /dev/null +++ b/vortex-bench/sqlstorm/job/7606.sql @@ -0,0 +1,54 @@ +WITH RankedTitles AS ( + SELECT + t.id AS title_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.title) AS title_rank + FROM + title t + WHERE + t.production_year IS NOT NULL +), +CastDetails AS ( + SELECT + ci.movie_id, + a.name AS actor_name, + ci.note AS role_note, + r.role AS role_type + FROM + cast_info ci + JOIN + aka_name a ON ci.person_id = a.person_id + JOIN + role_type r ON ci.role_id = r.id +), +MovieKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +) +SELECT + rt.title, + rt.production_year, + cd.actor_name, + cd.role_note, + cd.role_type, + mk.keywords +FROM + RankedTitles rt +JOIN + complete_cast cc ON rt.title_id = cc.movie_id +JOIN + CastDetails cd ON cc.movie_id = cd.movie_id +LEFT JOIN + MovieKeywords mk ON cc.movie_id = mk.movie_id +WHERE + rt.title_rank <= 5 +ORDER BY + rt.production_year DESC, rt.title; diff --git a/vortex-bench/sqlstorm/job/7654.sql b/vortex-bench/sqlstorm/job/7654.sql new file mode 100644 index 00000000000..900d07f6902 --- /dev/null +++ b/vortex-bench/sqlstorm/job/7654.sql @@ -0,0 +1,31 @@ + +SELECT + a.name AS actor_name, + t.title AS movie_title, + c1.kind AS company_type, + c2.name AS company_name, + t.production_year, + COUNT(DISTINCT k.keyword) AS keyword_count +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type c1 ON mc.company_type_id = c1.id +JOIN + company_name c2 ON mc.company_id = c2.id +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year BETWEEN 2000 AND 2020 + AND c2.country_code = 'USA' +GROUP BY + a.name, t.title, c1.kind, c2.name, t.production_year +ORDER BY + keyword_count DESC, actor_name ASC; diff --git a/vortex-bench/sqlstorm/job/7850.sql b/vortex-bench/sqlstorm/job/7850.sql new file mode 100644 index 00000000000..da6a54ea40b --- /dev/null +++ b/vortex-bench/sqlstorm/job/7850.sql @@ -0,0 +1,27 @@ + +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.kind AS cast_type, + a.id AS actor_id, + t.production_year, + STRING_AGG(k.keyword, ', ') AS keywords +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + comp_cast_type c ON ci.person_role_id = c.id +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year >= 2000 + AND a.name IS NOT NULL +GROUP BY + a.name, t.title, c.kind, a.id, t.production_year +ORDER BY + t.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/8134.sql b/vortex-bench/sqlstorm/job/8134.sql new file mode 100644 index 00000000000..45866f8c049 --- /dev/null +++ b/vortex-bench/sqlstorm/job/8134.sql @@ -0,0 +1,28 @@ +SELECT + t.title AS movie_title, + a.name AS actor_name, + c.kind AS cast_type, + p.info AS person_info, + k.keyword AS movie_keyword +FROM + aka_title AS t +JOIN + movie_keyword AS mk ON t.id = mk.movie_id +JOIN + keyword AS k ON mk.keyword_id = k.id +JOIN + complete_cast AS cc ON t.id = cc.movie_id +JOIN + cast_info AS ci ON cc.subject_id = ci.id +JOIN + aka_name AS a ON ci.person_id = a.person_id +JOIN + comp_cast_type AS c ON ci.person_role_id = c.id +LEFT JOIN + person_info AS p ON a.person_id = p.person_id +WHERE + t.production_year BETWEEN 2000 AND 2023 + AND k.keyword IN ('Action', 'Drama', 'Comedy') +ORDER BY + t.production_year DESC, + a.name; diff --git a/vortex-bench/sqlstorm/job/8139.sql b/vortex-bench/sqlstorm/job/8139.sql new file mode 100644 index 00000000000..33d06d2efa0 --- /dev/null +++ b/vortex-bench/sqlstorm/job/8139.sql @@ -0,0 +1,31 @@ +SELECT + t.title AS movie_title, + a.name AS actor_name, + ct.kind AS company_type, + k.keyword AS movie_keyword, + p.info AS person_info +FROM + title t +JOIN + complete_cast cc ON t.id = cc.movie_id +JOIN + cast_info ci ON cc.subject_id = ci.person_id +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_type ct ON mc.company_type_id = ct.id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +JOIN + person_info p ON a.person_id = p.person_id +WHERE + t.production_year >= 2000 + AND ct.kind LIKE '%Production%' + AND p.info_type_id = (SELECT id FROM info_type WHERE info = 'Birthday') +ORDER BY + t.production_year DESC, + a.name; diff --git a/vortex-bench/sqlstorm/job/8140.sql b/vortex-bench/sqlstorm/job/8140.sql new file mode 100644 index 00000000000..8a0f10c66d4 --- /dev/null +++ b/vortex-bench/sqlstorm/job/8140.sql @@ -0,0 +1,32 @@ + +SELECT + a.name AS actor_name, + t.title AS movie_title, + mc.note AS company_note, + STRING_AGG(DISTINCT k.keyword, ',') AS keywords, + ci.nr_order AS cast_order, + ii.info AS movie_info +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name cn ON mc.company_id = cn.id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +JOIN + movie_info ii ON t.id = ii.movie_id +WHERE + t.production_year >= 2000 + AND cn.country_code = 'USA' + AND ii.info_type_id = (SELECT id FROM info_type WHERE info = 'Summary') +GROUP BY + a.name, t.title, mc.note, ci.nr_order, ii.info, t.production_year +ORDER BY + t.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/8236.sql b/vortex-bench/sqlstorm/job/8236.sql new file mode 100644 index 00000000000..911d520c2dd --- /dev/null +++ b/vortex-bench/sqlstorm/job/8236.sql @@ -0,0 +1,42 @@ +WITH RankedTitles AS ( + SELECT t.id AS title_id, + t.title, + t.production_year, + ROW_NUMBER() OVER (PARTITION BY t.production_year ORDER BY t.id) AS rn + FROM title t + WHERE t.production_year BETWEEN 2000 AND 2020 +), ActorMovies AS ( + SELECT ci.movie_id, + a.name AS actor_name, + COUNT(ci.person_id) AS actor_count + FROM cast_info ci + JOIN aka_name a ON a.person_id = ci.person_id + WHERE ci.nr_order = 1 + GROUP BY ci.movie_id, a.name +), CompanyMovies AS ( + SELECT mc.movie_id, + c.name AS company_name, + ct.kind AS company_type + FROM movie_companies mc + JOIN company_name c ON c.id = mc.company_id + JOIN company_type ct ON ct.id = mc.company_type_id + WHERE c.country_code = 'USA' +), MoviesWithKeywords AS ( + SELECT mk.movie_id, + k.keyword + FROM movie_keyword mk + JOIN keyword k ON k.id = mk.keyword_id + WHERE k.phonetic_code IS NOT NULL +) +SELECT rt.title, + rt.production_year, + am.actor_name, + cm.company_name, + cm.company_type, + mk.keyword +FROM RankedTitles rt +LEFT JOIN ActorMovies am ON am.movie_id = rt.title_id +LEFT JOIN CompanyMovies cm ON cm.movie_id = rt.title_id +LEFT JOIN MoviesWithKeywords mk ON mk.movie_id = rt.title_id +WHERE rt.rn <= 5 +ORDER BY rt.production_year DESC, rt.title; diff --git a/vortex-bench/sqlstorm/job/8650.sql b/vortex-bench/sqlstorm/job/8650.sql new file mode 100644 index 00000000000..21110c8b986 --- /dev/null +++ b/vortex-bench/sqlstorm/job/8650.sql @@ -0,0 +1,48 @@ +WITH RankedMovies AS ( + SELECT + mt.id AS movie_id, + mt.title, + mt.production_year, + COUNT(DISTINCT mc.company_id) AS company_count, + COUNT(DISTINCT mk.keyword_id) AS keyword_count, + ROW_NUMBER() OVER (PARTITION BY mt.production_year ORDER BY COUNT(DISTINCT mc.company_id) DESC) AS rank + FROM + aka_title mt + LEFT JOIN + movie_companies mc ON mt.id = mc.movie_id + LEFT JOIN + movie_keyword mk ON mt.id = mk.movie_id + GROUP BY + mt.id, mt.title, mt.production_year +), +TopRankedMovies AS ( + SELECT + movie_id, + title, + production_year, + company_count, + keyword_count + FROM + RankedMovies + WHERE + rank <= 5 +) +SELECT + tr.title, + tr.production_year, + ak.name AS actor_name, + COUNT(DISTINCT c.person_role_id) AS roles_played +FROM + TopRankedMovies tr +JOIN + complete_cast cc ON tr.movie_id = cc.movie_id +JOIN + cast_info c ON cc.subject_id = c.id +JOIN + aka_name ak ON c.person_id = ak.person_id +WHERE + ak.name IS NOT NULL +GROUP BY + tr.title, tr.production_year, ak.name +ORDER BY + tr.production_year DESC, roles_played DESC; diff --git a/vortex-bench/sqlstorm/job/875.sql b/vortex-bench/sqlstorm/job/875.sql new file mode 100644 index 00000000000..3450df21942 --- /dev/null +++ b/vortex-bench/sqlstorm/job/875.sql @@ -0,0 +1,55 @@ +WITH RankedMovies AS ( + SELECT + t.id AS movie_id, + t.title, + t.production_year, + COUNT(DISTINCT ci.person_id) AS cast_count, + RANK() OVER (PARTITION BY t.production_year ORDER BY COUNT(DISTINCT ci.person_id) DESC) AS rank_by_cast + FROM + aka_title t + LEFT JOIN + complete_cast cc ON t.id = cc.movie_id + LEFT JOIN + cast_info ci ON cc.subject_id = ci.id + WHERE + t.production_year >= 2000 + GROUP BY + t.id, t.title, t.production_year +), +TopMovies AS ( + SELECT + movie_id, + title, + production_year + FROM + RankedMovies + WHERE + rank_by_cast <= 5 +), +MovieKeywords AS ( + SELECT + mk.movie_id, + STRING_AGG(k.keyword, ', ') AS keywords + FROM + movie_keyword mk + JOIN + keyword k ON mk.keyword_id = k.id + GROUP BY + mk.movie_id +) +SELECT + tm.title, + tm.production_year, + COALESCE(mk.keywords, 'No keywords') AS keywords, + (SELECT COUNT(*) FROM movie_info mi WHERE mi.movie_id = tm.movie_id AND mi.info_type_id = 1) AS info_count, + CASE + WHEN mk.keywords IS NULL THEN 'Keywords not available' + ELSE 'Keywords available' + END AS keyword_status +FROM + TopMovies tm +LEFT JOIN + MovieKeywords mk ON tm.movie_id = mk.movie_id +ORDER BY + tm.production_year DESC, + tm.title; diff --git a/vortex-bench/sqlstorm/job/8928.sql b/vortex-bench/sqlstorm/job/8928.sql new file mode 100644 index 00000000000..230b098ec3f --- /dev/null +++ b/vortex-bench/sqlstorm/job/8928.sql @@ -0,0 +1,34 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + t.production_year, + ckt.kind AS cast_type, + co.name AS company_name, + mi.info AS movie_info, + kv.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + title t ON ci.movie_id = t.id +JOIN + complete_cast cc ON t.id = cc.movie_id +JOIN + comp_cast_type ckt ON ci.person_role_id = ckt.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name co ON mc.company_id = co.id +JOIN + movie_info mi ON t.id = mi.movie_id +JOIN + movie_keyword mk ON t.id = mk.movie_id +JOIN + keyword kv ON mk.keyword_id = kv.id +WHERE + t.production_year BETWEEN 1990 AND 2020 + AND a.name ILIKE '%Smith%' + AND ckt.kind = 'Actor' +ORDER BY + t.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/9183.sql b/vortex-bench/sqlstorm/job/9183.sql new file mode 100644 index 00000000000..db9cbf3ecff --- /dev/null +++ b/vortex-bench/sqlstorm/job/9183.sql @@ -0,0 +1,26 @@ +SELECT + a.name AS actor_name, + t.title AS movie_title, + c.kind AS cast_type, + p.info AS person_info, + k.keyword AS movie_keyword +FROM + aka_name a +JOIN + cast_info ci ON a.person_id = ci.person_id +JOIN + aka_title t ON ci.movie_id = t.movie_id +JOIN + comp_cast_type c ON ci.person_role_id = c.id +JOIN + person_info p ON a.person_id = p.person_id +JOIN + movie_keyword mk ON t.movie_id = mk.movie_id +JOIN + keyword k ON mk.keyword_id = k.id +WHERE + t.production_year BETWEEN 1990 AND 2000 + AND c.kind = 'actor' + AND p.info_type_id IN (SELECT id FROM info_type WHERE info = 'birth date') +ORDER BY + t.production_year DESC, a.name; diff --git a/vortex-bench/sqlstorm/job/9913.sql b/vortex-bench/sqlstorm/job/9913.sql new file mode 100644 index 00000000000..b3220de366e --- /dev/null +++ b/vortex-bench/sqlstorm/job/9913.sql @@ -0,0 +1,55 @@ +WITH MovieDetails AS ( + SELECT + t.title AS movie_title, + t.production_year, + r.role AS cast_role, + an.name AS actor_name, + c.name AS company_name, + k.keyword AS movie_keyword + FROM + title t + JOIN + complete_cast cc ON t.id = cc.movie_id + JOIN + cast_info ci ON cc.subject_id = ci.id + JOIN + aka_name an ON ci.person_id = an.person_id + JOIN + movie_companies mc ON t.id = mc.movie_id + JOIN + company_name c ON mc.company_id = c.id + LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id + LEFT JOIN + keyword k ON mk.keyword_id = k.id + JOIN + role_type r ON ci.role_id = r.id + WHERE + t.production_year >= 2000 + AND c.country_code = 'USA' +), +AggregatedResults AS ( + SELECT + movie_title, + production_year, + ARRAY_AGG(DISTINCT actor_name) AS actors, + ARRAY_AGG(DISTINCT company_name) AS production_companies, + ARRAY_AGG(DISTINCT movie_keyword) AS keywords, + COUNT(DISTINCT cast_role) AS role_count + FROM + MovieDetails + GROUP BY + movie_title, production_year +) +SELECT + movie_title, + production_year, + actors, + production_companies, + keywords, + role_count +FROM + AggregatedResults +ORDER BY + production_year DESC, + movie_title; diff --git a/vortex-bench/sqlstorm/job/9980.sql b/vortex-bench/sqlstorm/job/9980.sql new file mode 100644 index 00000000000..dd88c030148 --- /dev/null +++ b/vortex-bench/sqlstorm/job/9980.sql @@ -0,0 +1,57 @@ + +WITH RankedMovies AS ( + SELECT + a.title, + a.production_year, + k.keyword, + ROW_NUMBER() OVER (PARTITION BY a.id ORDER BY a.production_year DESC) AS rn, + a.id + FROM + aka_title a + JOIN + movie_keyword mk ON a.id = mk.movie_id + JOIN + keyword k ON mk.keyword_id = k.id + WHERE + a.production_year >= 2000 +), CompanyInfo AS ( + SELECT + mc.movie_id, + c.name AS company_name, + ct.kind AS company_type + FROM + movie_companies mc + JOIN + company_name c ON mc.company_id = c.id + JOIN + company_type ct ON mc.company_type_id = ct.id +), CompleteCastWithRoles AS ( + SELECT + c.movie_id, + an.name AS actor_name, + r.role AS role_name + FROM + cast_info c + JOIN + aka_name an ON c.person_id = an.person_id + JOIN + role_type r ON c.role_id = r.id +) +SELECT + rm.title, + rm.production_year, + rm.keyword, + ci.company_name, + ci.company_type, + cc.actor_name, + cc.role_name +FROM + RankedMovies rm +JOIN + CompanyInfo ci ON rm.id = ci.movie_id +JOIN + CompleteCastWithRoles cc ON rm.id = cc.movie_id +WHERE + rm.rn = 1 +ORDER BY + rm.production_year DESC, ci.company_name, cc.actor_name; diff --git a/vortex-bench/sqlstorm/job/9981.sql b/vortex-bench/sqlstorm/job/9981.sql new file mode 100644 index 00000000000..d72517b0b6f --- /dev/null +++ b/vortex-bench/sqlstorm/job/9981.sql @@ -0,0 +1,37 @@ +SELECT + t.title AS movie_title, + a.name AS actor_name, + r.role AS actor_role, + c.name AS company_name, + k.keyword AS movie_keyword, + i.info AS movie_info +FROM + title t +JOIN + complete_cast cc ON t.id = cc.movie_id +JOIN + cast_info ci ON cc.subject_id = ci.id +JOIN + aka_name a ON ci.person_id = a.person_id +JOIN + role_type r ON ci.role_id = r.id +JOIN + movie_companies mc ON t.id = mc.movie_id +JOIN + company_name c ON mc.company_id = c.id +LEFT JOIN + movie_keyword mk ON t.id = mk.movie_id +LEFT JOIN + keyword k ON mk.keyword_id = k.id +LEFT JOIN + movie_info mi ON t.id = mi.movie_id +LEFT JOIN + info_type i ON mi.info_type_id = i.id +WHERE + t.production_year > 2000 + AND c.country_code = 'USA' +ORDER BY + t.production_year DESC, + a.name ASC, + k.keyword ASC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/stackoverflow/10111.sql b/vortex-bench/sqlstorm/stackoverflow/10111.sql new file mode 100644 index 00000000000..30ecee6f673 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/10111.sql @@ -0,0 +1,55 @@ +WITH PostStats AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + COALESCE(a.AnswerCount, 0) AS AnswerCount, + COALESCE(c.CommentCount, 0) AS CommentCount, + COALESCE(b.BadgeCount, 0) AS BadgeCount + FROM + Posts p + LEFT JOIN ( + SELECT + ParentId, COUNT(*) AS AnswerCount + FROM + Posts + WHERE + PostTypeId = 2 + GROUP BY + ParentId + ) a ON p.Id = a.ParentId + LEFT JOIN ( + SELECT + PostId, COUNT(*) AS CommentCount + FROM + Comments + GROUP BY + PostId + ) c ON p.Id = c.PostId + LEFT JOIN ( + SELECT + UserId, COUNT(*) AS BadgeCount + FROM + Badges + GROUP BY + UserId + ) b ON p.OwnerUserId = b.UserId + WHERE + p.PostTypeId = 1 +) + +SELECT + Title, + CreationDate, + Score, + ViewCount, + AnswerCount, + CommentCount, + BadgeCount +FROM + PostStats +ORDER BY + Score DESC, ViewCount DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/10183.sql b/vortex-bench/sqlstorm/stackoverflow/10183.sql new file mode 100644 index 00000000000..f3bf26a278a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/10183.sql @@ -0,0 +1,69 @@ +WITH UserStats AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + U.Reputation, + COUNT(DISTINCT P.Id) AS PostCount, + COUNT(DISTINCT C.Id) AS CommentCount, + SUM(CASE WHEN V.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVoteCount, + SUM(CASE WHEN V.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVoteCount + FROM + Users U + LEFT JOIN + Posts P ON U.Id = P.OwnerUserId + LEFT JOIN + Comments C ON P.Id = C.PostId + LEFT JOIN + Votes V ON P.Id = V.PostId + GROUP BY + U.Id, U.DisplayName, U.Reputation +), + +PostStats AS ( + SELECT + P.Id AS PostId, + P.Title, + P.CreationDate, + P.OwnerUserId, + P.ViewCount, + P.Score, + P.AnswerCount, + P.CommentCount, + CASE + WHEN P.PostTypeId = 1 THEN 'Question' + WHEN P.PostTypeId = 2 THEN 'Answer' + ELSE 'Other' + END AS PostType, + COUNT(CASE WHEN V.VoteTypeId = 2 THEN 1 END) AS UpVotes, + COUNT(CASE WHEN V.VoteTypeId = 3 THEN 1 END) AS DownVotes + FROM + Posts P + LEFT JOIN + Votes V ON P.Id = V.PostId + GROUP BY + P.Id, P.Title, P.CreationDate, P.OwnerUserId, P.ViewCount, P.Score, P.AnswerCount, P.CommentCount, P.PostTypeId +) + +SELECT + U.DisplayName, + U.Reputation, + U.PostCount, + U.CommentCount, + U.UpVoteCount, + U.DownVoteCount, + P.PostId, + P.Title, + P.CreationDate, + P.ViewCount, + P.Score, + P.AnswerCount, + P.CommentCount, + P.PostType, + P.UpVotes, + P.DownVotes +FROM + UserStats U +JOIN + PostStats P ON U.UserId = P.OwnerUserId +ORDER BY + U.Reputation DESC, P.ViewCount DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/10314.sql b/vortex-bench/sqlstorm/stackoverflow/10314.sql new file mode 100644 index 00000000000..58432656252 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/10314.sql @@ -0,0 +1,46 @@ +WITH UserPostStatistics AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + COUNT(P.Id) AS TotalPosts, + COUNT(CASE WHEN P.PostTypeId = 1 THEN 1 END) AS TotalQuestions, + COUNT(CASE WHEN P.PostTypeId = 2 THEN 1 END) AS TotalAnswers, + SUM(P.Score) AS TotalScore, + SUM(P.ViewCount) AS TotalViews + FROM + Users U + LEFT JOIN + Posts P ON U.Id = P.OwnerUserId + GROUP BY + U.Id, U.DisplayName +), +UserBadgeStatistics AS ( + SELECT + B.UserId, + COUNT(B.Id) AS TotalBadges, + COUNT(CASE WHEN B.Class = 1 THEN 1 END) AS GoldBadges, + COUNT(CASE WHEN B.Class = 2 THEN 1 END) AS SilverBadges, + COUNT(CASE WHEN B.Class = 3 THEN 1 END) AS BronzeBadges + FROM + Badges B + GROUP BY + B.UserId +) +SELECT + UPS.UserId, + UPS.DisplayName, + UPS.TotalPosts, + UPS.TotalQuestions, + UPS.TotalAnswers, + UPS.TotalScore, + UPS.TotalViews, + COALESCE(UBS.TotalBadges, 0) AS TotalBadges, + COALESCE(UBS.GoldBadges, 0) AS GoldBadges, + COALESCE(UBS.SilverBadges, 0) AS SilverBadges, + COALESCE(UBS.BronzeBadges, 0) AS BronzeBadges +FROM + UserPostStatistics UPS +LEFT JOIN + UserBadgeStatistics UBS ON UPS.UserId = UBS.UserId +ORDER BY + UPS.TotalScore DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/10417.sql b/vortex-bench/sqlstorm/stackoverflow/10417.sql new file mode 100644 index 00000000000..00c628904e7 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/10417.sql @@ -0,0 +1,25 @@ +SELECT + u.Id AS UserId, + u.DisplayName, + u.Reputation, + u.CreationDate, + u.LastAccessDate, + COUNT(DISTINCT p.Id) AS TotalPosts, + COUNT(DISTINCT CASE WHEN p.PostTypeId = 1 THEN p.Id END) AS TotalQuestions, + COUNT(DISTINCT CASE WHEN p.PostTypeId = 2 THEN p.Id END) AS TotalAnswers, + SUM(COALESCE(p.Score, 0)) AS TotalScore, + SUM(COALESCE(c.Score, 0)) AS TotalCommentsScore, + COUNT(DISTINCT c.Id) AS TotalComments +FROM + Users u +LEFT JOIN + Posts p ON u.Id = p.OwnerUserId +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + u.Reputation > 0 +GROUP BY + u.Id, u.DisplayName, u.Reputation, u.CreationDate, u.LastAccessDate +ORDER BY + TotalPosts DESC, TotalScore DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/10608.sql b/vortex-bench/sqlstorm/stackoverflow/10608.sql new file mode 100644 index 00000000000..4941c7a81e8 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/10608.sql @@ -0,0 +1,46 @@ +WITH UserPostStats AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + COUNT(P.Id) AS TotalPosts, + SUM(CASE WHEN P.PostTypeId = 1 THEN 1 ELSE 0 END) AS TotalQuestions, + SUM(CASE WHEN P.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + SUM(P.Score) AS TotalScore, + AVG(P.ViewCount) AS AvgViewCount + FROM + Users U + LEFT JOIN + Posts P ON U.Id = P.OwnerUserId + GROUP BY + U.Id, U.DisplayName +), +TopUsers AS ( + SELECT + UserId, + DisplayName, + TotalPosts, + TotalQuestions, + TotalAnswers, + TotalScore, + AvgViewCount, + RANK() OVER (ORDER BY TotalScore DESC) AS RankByScore, + RANK() OVER (ORDER BY TotalPosts DESC) AS RankByPosts + FROM + UserPostStats +) +SELECT + UserId, + DisplayName, + TotalPosts, + TotalQuestions, + TotalAnswers, + TotalScore, + AvgViewCount, + RankByScore, + RankByPosts +FROM + TopUsers +WHERE + RankByScore <= 10 OR RankByPosts <= 10 +ORDER BY + RankByScore, RankByPosts; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/10689.sql b/vortex-bench/sqlstorm/stackoverflow/10689.sql new file mode 100644 index 00000000000..9caecc463c4 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/10689.sql @@ -0,0 +1,8 @@ +SELECT + (SELECT COUNT(*) FROM Posts) AS TotalPosts, + (SELECT COUNT(*) FROM Users) AS TotalUsers, + (SELECT COUNT(*) FROM Comments) AS TotalComments, + (SELECT COUNT(*) FROM Votes) AS TotalVotes, + AVG(Score) AS AveragePostScore +FROM + Posts; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/10888.sql b/vortex-bench/sqlstorm/stackoverflow/10888.sql new file mode 100644 index 00000000000..48f2370f316 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/10888.sql @@ -0,0 +1,29 @@ +WITH PostStats AS ( + SELECT + pt.Name AS PostType, + COUNT(p.Id) AS TotalPosts, + COUNT(DISTINCT p.OwnerUserId) AS UniqueUsers, + AVG(u.Reputation) AS AverageUserReputation, + SUM(p.ViewCount) AS TotalViews, + SUM(p.Score) AS TotalScore + FROM + Posts p + JOIN + PostTypes pt ON p.PostTypeId = pt.Id + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + GROUP BY + pt.Name +) + +SELECT + PostType, + TotalPosts, + UniqueUsers, + AverageUserReputation, + TotalViews, + TotalScore +FROM + PostStats +ORDER BY + TotalPosts DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/11148.sql b/vortex-bench/sqlstorm/stackoverflow/11148.sql new file mode 100644 index 00000000000..691721b58d2 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/11148.sql @@ -0,0 +1,42 @@ +WITH PostStatistics AS ( + SELECT + P.Id AS PostId, + P.Title, + P.CreationDate, + P.ViewCount, + P.Score, + COUNT(CASE WHEN C.Id IS NOT NULL THEN 1 END) AS CommentCount, + COUNT(CASE WHEN A.Id IS NOT NULL THEN 1 END) AS AnswerCount, + MAX(CASE WHEN V.Id IS NOT NULL THEN 1 ELSE 0 END) AS HasVote, + MAX(V.CreationDate) AS LastVoteDate + FROM + Posts P + LEFT JOIN + Comments C ON P.Id = C.PostId + LEFT JOIN + Posts A ON P.Id = A.ParentId + LEFT JOIN + Votes V ON P.Id = V.PostId + WHERE + P.CreationDate >= cast('2024-10-01' as date) - INTERVAL '1 year' + GROUP BY + P.Id, P.Title, P.CreationDate, P.ViewCount, P.Score +) +SELECT + PS.PostId, + PS.Title, + PS.CreationDate, + PS.ViewCount, + PS.Score, + PS.CommentCount, + PS.AnswerCount, + PS.HasVote, + PS.LastVoteDate, + U.DisplayName AS AuthorDisplayName, + U.Reputation AS AuthorReputation +FROM + PostStatistics PS +JOIN + Users U ON PS.PostId = U.Id +ORDER BY + PS.Score DESC, PS.ViewCount DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/1124.sql b/vortex-bench/sqlstorm/stackoverflow/1124.sql new file mode 100644 index 00000000000..23a95ed71d2 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/1124.sql @@ -0,0 +1,46 @@ +WITH RankedPosts AS ( + SELECT p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) AS PostRank, + u.Reputation AS UserReputation + FROM Posts p + JOIN Users u ON p.OwnerUserId = u.Id + WHERE p.Score IS NOT NULL +), CommentStatistics AS ( + SELECT PostId, + COUNT(*) AS TotalComments, + AVG(Score) AS AverageCommentScore + FROM Comments + GROUP BY PostId +), BadgeCounts AS ( + SELECT UserId, + COUNT(*) AS TotalBadges, + SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN Class = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN Class = 3 THEN 1 ELSE 0 END) AS BronzeBadges + FROM Badges + GROUP BY UserId +) +SELECT rp.PostId, + rp.Title, + rp.CreationDate, + rp.Score, + cs.TotalComments, + cs.AverageCommentScore, + bc.TotalBadges, + bc.GoldBadges, + bc.SilverBadges, + bc.BronzeBadges, + CASE + WHEN bc.UserId IS NOT NULL THEN 'Has Badges' + ELSE 'No Badges' + END AS BadgeStatus +FROM RankedPosts rp +LEFT JOIN CommentStatistics cs ON rp.PostId = cs.PostId +LEFT JOIN BadgeCounts bc ON rp.UserReputation = bc.UserId +WHERE rp.PostRank <= 5 + AND (cs.TotalComments IS NULL OR cs.TotalComments > 2) + AND (rp.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year') +ORDER BY rp.Score DESC, cs.TotalComments DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/11633.sql b/vortex-bench/sqlstorm/stackoverflow/11633.sql new file mode 100644 index 00000000000..5281ad001b3 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/11633.sql @@ -0,0 +1,50 @@ + +WITH PostStats AS ( + SELECT + p.Id AS PostId, + p.PostTypeId, + COUNT(c.Id) AS CommentCount, + COUNT(v.Id) AS VoteCount, + MAX(p.CreationDate) AS LastActive + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + p.Id, p.PostTypeId +), +UserStats AS ( + SELECT + u.Id AS UserId, + COUNT(b.Id) AS BadgeCount, + SUM(u.UpVotes) AS TotalUpVotes, + AVG(u.Reputation) AS AvgReputation + FROM + Users u + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id +) + +SELECT + ps.PostId, + ps.PostTypeId, + ps.CommentCount, + ps.VoteCount, + ps.LastActive, + us.UserId, + us.BadgeCount, + us.TotalUpVotes, + us.AvgReputation +FROM + PostStats ps +JOIN + UserStats us ON ps.PostTypeId = 1 +WHERE + ps.LastActive BETWEEN TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '30 days' AND TIMESTAMP '2024-10-01 12:34:56' +ORDER BY + ps.VoteCount DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/stackoverflow/11649.sql b/vortex-bench/sqlstorm/stackoverflow/11649.sql new file mode 100644 index 00000000000..d1edc7dc927 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/11649.sql @@ -0,0 +1,28 @@ + +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + u.DisplayName AS OwnerDisplayName, + u.Reputation AS OwnerReputation, + COUNT(c.Id) AS CommentCount, + COUNT(v.Id) AS VoteCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes +FROM + Posts p +LEFT JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.CreationDate >= TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 year' +GROUP BY + p.Id, p.Title, p.CreationDate, p.Score, p.ViewCount, u.DisplayName, u.Reputation +ORDER BY + p.CreationDate DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/stackoverflow/12028.sql b/vortex-bench/sqlstorm/stackoverflow/12028.sql new file mode 100644 index 00000000000..e07dae9a1ca --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/12028.sql @@ -0,0 +1,41 @@ +WITH UserPostStats AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + COUNT(DISTINCT P.Id) AS TotalPosts, + SUM(CASE WHEN P.PostTypeId = 1 THEN 1 ELSE 0 END) AS TotalQuestions, + SUM(CASE WHEN P.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + SUM(P.ViewCount) AS TotalViews, + SUM(P.Score) AS TotalScore + FROM + Users U + LEFT JOIN + Posts P ON U.Id = P.OwnerUserId + GROUP BY + U.Id, U.DisplayName +), +TopUsers AS ( + SELECT + UserId, + DisplayName, + TotalPosts, + TotalQuestions, + TotalAnswers, + TotalViews, + TotalScore, + ROW_NUMBER() OVER (ORDER BY TotalScore DESC) AS Rank + FROM + UserPostStats +) +SELECT + Rank, + DisplayName, + TotalPosts, + TotalQuestions, + TotalAnswers, + TotalViews, + TotalScore +FROM + TopUsers +WHERE + Rank <= 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/12033.sql b/vortex-bench/sqlstorm/stackoverflow/12033.sql new file mode 100644 index 00000000000..1f9220cb1ce --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/12033.sql @@ -0,0 +1,47 @@ +WITH UserPostStats AS ( + SELECT + u.Id AS UserId, + COUNT(p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS TotalQuestions, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + SUM(p.Score) AS TotalScore, + AVG(p.ViewCount) AS AvgViewCount + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + GROUP BY + u.Id +), +BadgeStats AS ( + SELECT + b.UserId, + COUNT(b.Id) AS TotalBadges, + SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END) AS TotalGoldBadges, + SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END) AS TotalSilverBadges, + SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END) AS TotalBronzeBadges + FROM + Badges b + GROUP BY + b.UserId +) +SELECT + u.DisplayName, + ups.TotalPosts, + ups.TotalQuestions, + ups.TotalAnswers, + ups.TotalScore, + ups.AvgViewCount, + bs.TotalBadges, + bs.TotalGoldBadges, + bs.TotalSilverBadges, + bs.TotalBronzeBadges +FROM + Users u +LEFT JOIN + UserPostStats ups ON u.Id = ups.UserId +LEFT JOIN + BadgeStats bs ON u.Id = bs.UserId +ORDER BY + ups.TotalScore DESC, + ups.TotalPosts DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/12064.sql b/vortex-bench/sqlstorm/stackoverflow/12064.sql new file mode 100644 index 00000000000..f9a98b0519d --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/12064.sql @@ -0,0 +1,41 @@ + +WITH PostStats AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.ViewCount, + p.Score, + p.AnswerCount, + COUNT(c.Id) AS CommentCount, + COALESCE(u.DisplayName, 'Community User') AS OwnerDisplayName, + COUNT(DISTINCT v.Id) AS VoteCount, + AVG(CASE WHEN v.VoteTypeId = 2 THEN 1.0 ELSE 0 END) AS AvgUpVotes, + AVG(CASE WHEN v.VoteTypeId = 3 THEN 1.0 ELSE 0 END) AS AvgDownVotes + FROM + Posts p + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + p.Id, p.Title, p.CreationDate, p.ViewCount, p.Score, p.AnswerCount, u.DisplayName +) +SELECT + ps.PostId, + ps.Title, + ps.CreationDate, + ps.ViewCount, + ps.Score, + ps.AnswerCount, + ps.CommentCount, + ps.OwnerDisplayName, + ps.VoteCount, + ps.AvgUpVotes, + ps.AvgDownVotes +FROM + PostStats ps +ORDER BY + ps.Score DESC, ps.ViewCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/1250.sql b/vortex-bench/sqlstorm/stackoverflow/1250.sql new file mode 100644 index 00000000000..cc40b7a4ed3 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/1250.sql @@ -0,0 +1,69 @@ + +WITH RankedPosts AS ( + SELECT + p.Id, + p.Title, + p.CreationDate, + p.Score, + COUNT(c.Id) AS CommentCount, + RANK() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) AS ScoreRank + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.CreationDate >= '2023-01-01' + AND p.Score IS NOT NULL + GROUP BY + p.Id, p.Title, p.CreationDate, p.Score, p.PostTypeId +), + +TopUsers AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + SUM(v.BountyAmount) AS TotalBounty + FROM + Users u + JOIN + Votes v ON u.Id = v.UserId + WHERE + v.VoteTypeId IN (8, 9) + GROUP BY + u.Id, u.DisplayName + HAVING + SUM(v.BountyAmount) > 0 +), + +ClosedPosts AS ( + SELECT + ph.PostId, + STRING_AGG(DISTINCT ctr.Name, ', ') AS ClosedReasons + FROM + PostHistory ph + JOIN + CloseReasonTypes ctr ON CAST(ph.Comment AS INTEGER) = ctr.Id + WHERE + ph.PostHistoryTypeId = 10 + GROUP BY + ph.PostId +) + +SELECT + rp.Title, + rp.Score, + rp.CommentCount, + tu.DisplayName AS TopUser, + tu.TotalBounty, + cp.ClosedReasons +FROM + RankedPosts rp +LEFT JOIN + TopUsers tu ON rp.ScoreRank = 1 AND tu.TotalBounty IS NOT NULL +LEFT JOIN + ClosedPosts cp ON rp.Id = cp.PostId +WHERE + rp.Score > 0 + AND COALESCE(cp.ClosedReasons, '') <> '' +ORDER BY + rp.Score DESC, rp.CreationDate DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/12508.sql b/vortex-bench/sqlstorm/stackoverflow/12508.sql new file mode 100644 index 00000000000..75065b8ebda --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/12508.sql @@ -0,0 +1,12 @@ +SELECT + pt.Name AS PostType, + COUNT(p.Id) AS TotalPosts, + AVG(p.Score) AS AverageScore +FROM + Posts p +JOIN + PostTypes pt ON p.PostTypeId = pt.Id +GROUP BY + pt.Name +ORDER BY + TotalPosts DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/12754.sql b/vortex-bench/sqlstorm/stackoverflow/12754.sql new file mode 100644 index 00000000000..e50344e24f4 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/12754.sql @@ -0,0 +1,30 @@ +SELECT + u.Id AS UserId, + u.DisplayName, + u.Reputation, + p.Id AS PostId, + p.Title, + p.CreationDate AS PostCreationDate, + p.Score AS PostScore, + p.ViewCount, + COALESCE(vote_counts.UpVotes, 0) AS UpVotes, + COALESCE(vote_counts.DownVotes, 0) AS DownVotes, + COALESCE(vote_counts.TotalVotes, 0) AS TotalVotes +FROM + Users u +LEFT JOIN + Posts p ON u.Id = p.OwnerUserId +LEFT JOIN + (SELECT + PostId, + SUM(CASE WHEN VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes, + COUNT(*) AS TotalVotes + FROM + Votes + GROUP BY + PostId) AS vote_counts ON p.Id = vote_counts.PostId +WHERE + p.Id IS NOT NULL +ORDER BY + u.Reputation DESC, p.CreationDate DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/12771.sql b/vortex-bench/sqlstorm/stackoverflow/12771.sql new file mode 100644 index 00000000000..3a50541b203 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/12771.sql @@ -0,0 +1,14 @@ +SELECT + pt.Name AS PostType, + COUNT(p.Id) AS PostCount, + AVG(p.Score) AS AverageScore, + AVG(p.ViewCount) AS AverageViewCount, + COUNT(DISTINCT p.OwnerUserId) AS ActiveUsers +FROM + Posts p +JOIN + PostTypes pt ON p.PostTypeId = pt.Id +GROUP BY + pt.Name +ORDER BY + PostCount DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/13743.sql b/vortex-bench/sqlstorm/stackoverflow/13743.sql new file mode 100644 index 00000000000..3610ebcd972 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/13743.sql @@ -0,0 +1,51 @@ +WITH UserPostStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(p.Id) AS PostCount, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS QuestionCount, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS AnswerCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVoteCount + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + u.Id, u.DisplayName +), +UserBadgeStats AS ( + SELECT + UserId, + COUNT(Id) AS BadgeCount, + SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS GoldBadgeCount, + SUM(CASE WHEN Class = 2 THEN 1 ELSE 0 END) AS SilverBadgeCount, + SUM(CASE WHEN Class = 3 THEN 1 ELSE 0 END) AS BronzeBadgeCount + FROM + Badges + GROUP BY + UserId +) +SELECT + u.Id AS UserId, + u.DisplayName, + COALESCE(ups.PostCount, 0) AS TotalPosts, + COALESCE(ups.QuestionCount, 0) AS TotalQuestions, + COALESCE(ups.AnswerCount, 0) AS TotalAnswers, + COALESCE(ups.UpVoteCount, 0) AS TotalUpVotes, + COALESCE(ups.DownVoteCount, 0) AS TotalDownVotes, + COALESCE(ubs.BadgeCount, 0) AS TotalBadges, + COALESCE(ubs.GoldBadgeCount, 0) AS TotalGoldBadges, + COALESCE(ubs.SilverBadgeCount, 0) AS TotalSilverBadges, + COALESCE(ubs.BronzeBadgeCount, 0) AS TotalBronzeBadges +FROM + Users u +LEFT JOIN + UserPostStats ups ON u.Id = ups.UserId +LEFT JOIN + UserBadgeStats ubs ON u.Id = ubs.UserId +ORDER BY + TotalPosts DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/13809.sql b/vortex-bench/sqlstorm/stackoverflow/13809.sql new file mode 100644 index 00000000000..46b0d19b9f8 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/13809.sql @@ -0,0 +1,53 @@ + +WITH UserStats AS ( + SELECT + u.Id AS UserId, + u.Reputation, + COUNT(DISTINCT p.Id) AS PostCount, + SUM(COALESCE(p.Score, 0)) AS TotalScore, + SUM(COALESCE(p.ViewCount, 0)) AS TotalViews, + SUM(CASE WHEN b.Id IS NOT NULL THEN 1 ELSE 0 END) AS BadgeCount + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id, u.Reputation +), +PostStats AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + COUNT(c.Id) AS CommentCount, + COUNT(DISTINCT l.RelatedPostId) AS LinkedPostCount + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + PostLinks l ON p.Id = l.PostId + GROUP BY + p.Id, p.Title, p.CreationDate +) +SELECT + us.UserId, + us.Reputation, + us.PostCount, + us.TotalScore, + us.TotalViews, + us.BadgeCount, + ps.PostId, + ps.Title, + ps.CreationDate, + ps.CommentCount, + ps.LinkedPostCount +FROM + UserStats us +JOIN + PostStats ps ON us.UserId = ps.PostId +ORDER BY + us.Reputation DESC, + ps.CreationDate DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/13845.sql b/vortex-bench/sqlstorm/stackoverflow/13845.sql new file mode 100644 index 00000000000..2cf623b6a42 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/13845.sql @@ -0,0 +1,41 @@ +WITH UserStats AS ( + SELECT + U.Id AS UserId, + U.Reputation, + U.Views, + COUNT(DISTINCT P.Id) AS PostCount, + SUM(CASE WHEN P.PostTypeId = 1 THEN 1 ELSE 0 END) AS QuestionCount, + SUM(CASE WHEN P.PostTypeId = 2 THEN 1 ELSE 0 END) AS AnswerCount, + SUM(CASE WHEN P.PostTypeId IN (1, 2) THEN P.Score ELSE 0 END) AS TotalScore + FROM + Users U + LEFT JOIN + Posts P ON U.Id = P.OwnerUserId + GROUP BY + U.Id, U.Reputation, U.Views +), +TopBadgeUsers AS ( + SELECT + B.UserId, + COUNT(B.Id) AS BadgeCount + FROM + Badges B + GROUP BY + B.UserId +) +SELECT + U.UserId, + U.Reputation, + U.Views, + U.PostCount, + U.QuestionCount, + U.AnswerCount, + U.TotalScore, + COALESCE(B.BadgeCount, 0) AS BadgeCount +FROM + UserStats U +LEFT JOIN + TopBadgeUsers B ON U.UserId = B.UserId +ORDER BY + U.Reputation DESC, U.TotalScore DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/14106.sql b/vortex-bench/sqlstorm/stackoverflow/14106.sql new file mode 100644 index 00000000000..41a8f45b9b6 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/14106.sql @@ -0,0 +1,54 @@ + +WITH PostStatistics AS ( + SELECT + p.Id AS PostId, + p.Title, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVoteCount, + AVG(p.Score) AS AverageScore, + MAX(p.CreationDate) AS LastActivityDate + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + p.Id, p.Title +), +UserStatistics AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(b.Id) AS BadgeCount, + SUM(u.UpVotes) AS TotalUpVotes, + SUM(u.DownVotes) AS TotalDownVotes + FROM + Users u + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id, u.DisplayName +) +SELECT + ps.PostId, + ps.Title, + ps.CommentCount, + ps.UpVoteCount, + ps.DownVoteCount, + ps.AverageScore, + ps.LastActivityDate, + us.UserId, + us.DisplayName, + us.BadgeCount, + us.TotalUpVotes, + us.TotalDownVotes +FROM + PostStatistics ps +JOIN + UserStatistics us ON ps.PostId = us.UserId +ORDER BY + ps.LastActivityDate DESC, + ps.AverageScore DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/stackoverflow/14250.sql b/vortex-bench/sqlstorm/stackoverflow/14250.sql new file mode 100644 index 00000000000..7c8158e960f --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/14250.sql @@ -0,0 +1,49 @@ + +WITH UserPostStats AS ( + SELECT + u.Id AS UserId, + COUNT(p.Id) AS PostCount, + SUM(COALESCE(p.Score, 0)) AS TotalScore, + AVG(COALESCE(p.ViewCount, 0)) AS AvgViewCount, + AVG(COALESCE(p.AcceptedAnswerId, 0)) AS AcceptedAnswerRatio + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + GROUP BY + u.Id +), +PostHistoryStats AS ( + SELECT + ph.PostId, + COUNT(ph.Id) AS EditCount, + MAX(ph.CreationDate) AS LastEdited + FROM + PostHistory ph + GROUP BY + ph.PostId +), +FinalStats AS ( + SELECT + up.UserId, + up.PostCount, + up.TotalScore, + up.AvgViewCount, + ph.EditCount, + ph.LastEdited + FROM + UserPostStats up + LEFT JOIN + PostHistoryStats ph ON up.UserId = ph.PostId +) +SELECT + UserId, + PostCount, + TotalScore, + AvgViewCount, + EditCount, + LastEdited +FROM + FinalStats +ORDER BY + TotalScore DESC, PostCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/14349.sql b/vortex-bench/sqlstorm/stackoverflow/14349.sql new file mode 100644 index 00000000000..24744a998bc --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/14349.sql @@ -0,0 +1,28 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + p.AnswerCount, + (SELECT COUNT(*) FROM Comments c WHERE c.PostId = p.Id) AS CommentCount, + ROW_NUMBER() OVER (ORDER BY p.CreationDate DESC) AS RowNumber + FROM + Posts p + WHERE + p.PostTypeId = 1 +) + +SELECT + rp.PostId, + rp.Title, + rp.CreationDate, + rp.Score, + rp.ViewCount, + rp.AnswerCount, + rp.CommentCount +FROM + RankedPosts rp +WHERE + rp.RowNumber <= 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/14549.sql b/vortex-bench/sqlstorm/stackoverflow/14549.sql new file mode 100644 index 00000000000..057f4316943 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/14549.sql @@ -0,0 +1,22 @@ +SELECT + PH.PostId, + COUNT(PH.Id) AS RevisionCount, + MIN(PH.CreationDate) AS FirstRevisionDate, + MAX(PH.CreationDate) AS LastRevisionDate, + U.DisplayName AS LastEditedBy, + P.Title, + P.Score, + P.ViewCount, + P.AnswerCount, + P.CommentCount +FROM + PostHistory PH +JOIN + Posts P ON PH.PostId = P.Id +LEFT JOIN + Users U ON PH.UserId = U.Id +GROUP BY + PH.PostId, U.DisplayName, P.Title, P.Score, P.ViewCount, P.AnswerCount, P.CommentCount +ORDER BY + RevisionCount DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/14700.sql b/vortex-bench/sqlstorm/stackoverflow/14700.sql new file mode 100644 index 00000000000..c0c15f823f2 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/14700.sql @@ -0,0 +1,49 @@ +WITH UserPostStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId IN (1, 2) THEN 1 ELSE 0 END) AS TotalQuestions, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + AVG(p.Score) AS AvgScore, + SUM(p.ViewCount) AS TotalViews, + SUM(p.FavoriteCount) AS TotalFavorites + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + GROUP BY + u.Id, u.DisplayName +), +UserBadges AS ( + SELECT + b.UserId, + COUNT(b.Id) AS TotalBadges, + SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END) AS BronzeBadges + FROM + Badges b + GROUP BY + b.UserId +) +SELECT + ups.UserId, + ups.DisplayName, + ups.TotalPosts, + ups.TotalQuestions, + ups.TotalAnswers, + ups.AvgScore, + ups.TotalViews, + ups.TotalFavorites, + COALESCE(ub.TotalBadges, 0) AS TotalBadges, + COALESCE(ub.GoldBadges, 0) AS GoldBadges, + COALESCE(ub.SilverBadges, 0) AS SilverBadges, + COALESCE(ub.BronzeBadges, 0) AS BronzeBadges +FROM + UserPostStats ups +LEFT JOIN + UserBadges ub ON ups.UserId = ub.UserId +ORDER BY + ups.TotalPosts DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/14966.sql b/vortex-bench/sqlstorm/stackoverflow/14966.sql new file mode 100644 index 00000000000..d8ff25a2171 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/14966.sql @@ -0,0 +1,44 @@ + +WITH PostStats AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + p.AnswerCount, + p.CommentCount, + u.Reputation AS OwnerReputation, + COALESCE(SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END), 0) AS UpVotes, + COALESCE(SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END), 0) AS DownVotes + FROM + Posts p + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + p.Id, p.Title, p.CreationDate, p.Score, p.ViewCount, p.AnswerCount, p.CommentCount, u.Reputation +), +TopPosts AS ( + SELECT + *, + ROW_NUMBER() OVER (ORDER BY Score DESC, ViewCount DESC) AS Rank + FROM + PostStats +) +SELECT + PostId, + Title, + CreationDate, + Score, + ViewCount, + AnswerCount, + CommentCount, + OwnerReputation, + UpVotes, + DownVotes +FROM + TopPosts +WHERE + Rank <= 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/15059.sql b/vortex-bench/sqlstorm/stackoverflow/15059.sql new file mode 100644 index 00000000000..0f5eb7f06c6 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/15059.sql @@ -0,0 +1,22 @@ + +SELECT + p.Id as PostId, + p.Title, + p.CreationDate, + u.DisplayName as OwnerDisplayName, + p.Score, + p.ViewCount, + COUNT(c.Id) as CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName, p.Score, p.ViewCount +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/15440.sql b/vortex-bench/sqlstorm/stackoverflow/15440.sql new file mode 100644 index 00000000000..f4b4e901152 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/15440.sql @@ -0,0 +1,24 @@ + +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS Author, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVoteCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/15490.sql b/vortex-bench/sqlstorm/stackoverflow/15490.sql new file mode 100644 index 00000000000..2e0482978fd --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/15490.sql @@ -0,0 +1,15 @@ +SELECT + p.Title, + p.CreationDate, + p.ViewCount, + p.Score, + u.DisplayName AS OwnerDisplayName +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/15552.sql b/vortex-bench/sqlstorm/stackoverflow/15552.sql new file mode 100644 index 00000000000..e65c27cf8e5 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/15552.sql @@ -0,0 +1,22 @@ + +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount, + COUNT(v.Id) AS VoteCount +FROM + Posts p +LEFT JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/15599.sql b/vortex-bench/sqlstorm/stackoverflow/15599.sql new file mode 100644 index 00000000000..1b73eb110d4 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/15599.sql @@ -0,0 +1,6 @@ +SELECT u.DisplayName, p.Title, p.CreationDate, p.ViewCount +FROM Posts p +JOIN Users u ON p.OwnerUserId = u.Id +WHERE p.PostTypeId = 1 +ORDER BY p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/15899.sql b/vortex-bench/sqlstorm/stackoverflow/15899.sql new file mode 100644 index 00000000000..9f8efa964d3 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/15899.sql @@ -0,0 +1,22 @@ + +SELECT + p.Title, + p.CreationDate, + u.DisplayName AS OwnerName, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVoteCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/15997.sql b/vortex-bench/sqlstorm/stackoverflow/15997.sql new file mode 100644 index 00000000000..ebb7426e9a1 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/15997.sql @@ -0,0 +1,19 @@ +SELECT + U.DisplayName AS UserName, + P.Title AS PostTitle, + P.CreationDate AS PostDate, + P.Score AS PostScore, + COUNT(C.Id) AS CommentCount +FROM + Posts P +JOIN + Users U ON P.OwnerUserId = U.Id +LEFT JOIN + Comments C ON P.Id = C.PostId +WHERE + P.PostTypeId = 1 +GROUP BY + U.DisplayName, P.Title, P.CreationDate, P.Score +ORDER BY + PostScore DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/16014.sql b/vortex-bench/sqlstorm/stackoverflow/16014.sql new file mode 100644 index 00000000000..3dab089e8f8 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/16014.sql @@ -0,0 +1,8 @@ +SELECT u.DisplayName, COUNT(p.Id) AS PostCount, SUM(v.BountyAmount) AS TotalBounty +FROM Users u +JOIN Posts p ON u.Id = p.OwnerUserId +LEFT JOIN Votes v ON p.Id = v.PostId AND v.VoteTypeId = 8 +WHERE u.Reputation > 1000 +GROUP BY u.DisplayName +ORDER BY PostCount DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/16141.sql b/vortex-bench/sqlstorm/stackoverflow/16141.sql new file mode 100644 index 00000000000..235831ecd3e --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/16141.sql @@ -0,0 +1,15 @@ +SELECT + u.DisplayName, + COUNT(p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS TotalQuestions, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + SUM(CASE WHEN p.PostTypeId IN (4, 5) THEN 1 ELSE 0 END) AS TotalTagWikis +FROM + Users u +LEFT JOIN + Posts p ON u.Id = p.OwnerUserId +GROUP BY + u.DisplayName +ORDER BY + TotalPosts DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/16315.sql b/vortex-bench/sqlstorm/stackoverflow/16315.sql new file mode 100644 index 00000000000..cd140b2e79a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/16315.sql @@ -0,0 +1,15 @@ + +SELECT + u.DisplayName, + COUNT(p.Id) AS NumberOfPosts, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS Questions, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS Answers +FROM + Users u +LEFT JOIN + Posts p ON u.Id = p.OwnerUserId +GROUP BY + u.DisplayName, u.Id +ORDER BY + NumberOfPosts DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/1654.sql b/vortex-bench/sqlstorm/stackoverflow/1654.sql new file mode 100644 index 00000000000..475b31d3ac3 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/1654.sql @@ -0,0 +1,66 @@ +WITH UserReputation AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + U.Reputation, + U.CreationDate, + ROW_NUMBER() OVER (ORDER BY U.Reputation DESC) AS ReputationRank + FROM + Users U +), +PostStatistics AS ( + SELECT + P.OwnerUserId, + COUNT(P.Id) AS TotalPosts, + COUNT(P.AcceptedAnswerId) AS AcceptedAnswers, + SUM(P.Score) AS TotalScore, + AVG(P.ViewCount) AS AvgViews + FROM + Posts P + GROUP BY + P.OwnerUserId +), +TopContributors AS ( + SELECT + UR.UserId, + UR.DisplayName, + PS.TotalPosts, + PS.AcceptedAnswers, + PS.TotalScore, + PS.AvgViews, + RANK() OVER (ORDER BY PS.TotalScore DESC) AS ScoreRank + FROM + UserReputation UR + JOIN + PostStatistics PS ON UR.UserId = PS.OwnerUserId + WHERE + UR.Reputation > 1000 +) + +SELECT + TC.DisplayName, + TC.TotalPosts, + TC.AcceptedAnswers, + TC.TotalScore, + TC.AvgViews, + CASE + WHEN TC.ScoreRank <= 10 THEN 'Top Contributor' + ELSE 'Contributor' + END AS ContributorType, + COALESCE(B.BadgeCount, 0) AS BadgeCount +FROM + TopContributors TC +LEFT JOIN ( + SELECT + UserId, + COUNT(*) AS BadgeCount + FROM + Badges + GROUP BY + UserId +) B ON TC.UserId = B.UserId +WHERE + TC.AvgViews > 50 +ORDER BY + TC.TotalScore DESC, + BadgeCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/16614.sql b/vortex-bench/sqlstorm/stackoverflow/16614.sql new file mode 100644 index 00000000000..361c1be1b74 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/16614.sql @@ -0,0 +1,15 @@ +SELECT + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + p.Score, + p.ViewCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/16832.sql b/vortex-bench/sqlstorm/stackoverflow/16832.sql new file mode 100644 index 00000000000..c64c22f52be --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/16832.sql @@ -0,0 +1,20 @@ + +SELECT + u.DisplayName, + u.Reputation, + p.Title, + p.CreationDate, + COUNT(c.Id) AS CommentCount +FROM + Users u +JOIN + Posts p ON u.Id = p.OwnerUserId +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + u.DisplayName, u.Reputation, p.Title, p.CreationDate +ORDER BY + u.Reputation DESC, p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17012.sql b/vortex-bench/sqlstorm/stackoverflow/17012.sql new file mode 100644 index 00000000000..6aad3226259 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17012.sql @@ -0,0 +1,15 @@ +SELECT + u.DisplayName, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/1718.sql b/vortex-bench/sqlstorm/stackoverflow/1718.sql new file mode 100644 index 00000000000..f9e216eaa03 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/1718.sql @@ -0,0 +1,65 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) AS PostRank + FROM + Posts p + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' +), +PostVotes AS ( + SELECT + v.PostId, + COUNT(CASE WHEN v.VoteTypeId = 2 THEN 1 END) AS UpVotesCount, + COUNT(CASE WHEN v.VoteTypeId = 3 THEN 1 END) AS DownVotesCount + FROM + Votes v + GROUP BY + v.PostId +), +FilteredPosts AS ( + SELECT + r.PostId, + r.Title, + r.CreationDate, + r.Score, + r.ViewCount, + COALESCE(pv.UpVotesCount, 0) AS UpVotes, + COALESCE(pv.DownVotesCount, 0) AS DownVotes + FROM + RankedPosts r + LEFT JOIN + PostVotes pv ON r.PostId = pv.PostId + WHERE + r.PostRank <= 10 +), +CommentsInfo AS ( + SELECT + c.PostId, + COUNT(c.Id) AS CommentCount, + STRING_AGG(c.Text, '; ') AS CommentTexts + FROM + Comments c + GROUP BY + c.PostId +) +SELECT + fp.PostId, + fp.Title, + fp.CreationDate, + fp.Score, + fp.ViewCount, + fp.UpVotes, + fp.DownVotes, + COALESCE(ci.CommentCount, 0) AS TotalComments, + COALESCE(ci.CommentTexts, '') AS LastCommentsSnippet +FROM + FilteredPosts fp +LEFT JOIN + CommentsInfo ci ON fp.PostId = ci.PostId +ORDER BY + fp.Score DESC, fp.ViewCount ASC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/17363.sql b/vortex-bench/sqlstorm/stackoverflow/17363.sql new file mode 100644 index 00000000000..9a141f563b2 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17363.sql @@ -0,0 +1,19 @@ +SELECT + Users.DisplayName, + Posts.Title, + Posts.CreationDate, + Posts.ViewCount, + COUNT(Comments.Id) AS CommentCount +FROM + Posts +JOIN + Users ON Posts.OwnerUserId = Users.Id +LEFT JOIN + Comments ON Comments.PostId = Posts.Id +WHERE + Posts.PostTypeId = 1 +GROUP BY + Users.DisplayName, Posts.Title, Posts.CreationDate, Posts.ViewCount +ORDER BY + Posts.ViewCount DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/17377.sql b/vortex-bench/sqlstorm/stackoverflow/17377.sql new file mode 100644 index 00000000000..10942aae211 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17377.sql @@ -0,0 +1,19 @@ + +SELECT + p.Title AS PostTitle, + p.CreationDate AS PostCreationDate, + u.DisplayName AS AuthorName, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17408.sql b/vortex-bench/sqlstorm/stackoverflow/17408.sql new file mode 100644 index 00000000000..56658c10350 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17408.sql @@ -0,0 +1,26 @@ + +SELECT + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + p.Score, + p.ViewCount, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Title, + p.CreationDate, + u.DisplayName, + p.Score, + p.ViewCount, + p.Id +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17469.sql b/vortex-bench/sqlstorm/stackoverflow/17469.sql new file mode 100644 index 00000000000..a469c4c2900 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17469.sql @@ -0,0 +1,21 @@ +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS Owner, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17540.sql b/vortex-bench/sqlstorm/stackoverflow/17540.sql new file mode 100644 index 00000000000..d738cec26d6 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17540.sql @@ -0,0 +1,23 @@ + +SELECT + p.Title, + p.CreationDate, + u.DisplayName AS OwnerName, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVoteCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17626.sql b/vortex-bench/sqlstorm/stackoverflow/17626.sql new file mode 100644 index 00000000000..2bfee1f266b --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17626.sql @@ -0,0 +1,22 @@ + +SELECT + P.Id AS PostId, + P.Title, + P.CreationDate, + U.DisplayName AS OwnerDisplayName, + P.Score, + P.ViewCount, + COUNT(C.Id) AS CommentCount +FROM + Posts P +JOIN + Users U ON P.OwnerUserId = U.Id +LEFT JOIN + Comments C ON P.Id = C.PostId +WHERE + P.PostTypeId = 1 +GROUP BY + P.Id, P.Title, P.CreationDate, U.DisplayName, P.Score, P.ViewCount +ORDER BY + P.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17668.sql b/vortex-bench/sqlstorm/stackoverflow/17668.sql new file mode 100644 index 00000000000..cd12877636a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17668.sql @@ -0,0 +1,22 @@ +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount, + COUNT(v.Id) AS VoteCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/17674.sql b/vortex-bench/sqlstorm/stackoverflow/17674.sql new file mode 100644 index 00000000000..ab48057b08d --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17674.sql @@ -0,0 +1,24 @@ + +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpvoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownvoteCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17697.sql b/vortex-bench/sqlstorm/stackoverflow/17697.sql new file mode 100644 index 00000000000..b51bd545be9 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17697.sql @@ -0,0 +1,20 @@ + +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17723.sql b/vortex-bench/sqlstorm/stackoverflow/17723.sql new file mode 100644 index 00000000000..2c4f2d78fac --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17723.sql @@ -0,0 +1,20 @@ +SELECT + u.DisplayName, + p.Title, + p.CreationDate, + p.ViewCount, + p.Score, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + u.DisplayName, p.Title, p.CreationDate, p.ViewCount, p.Score +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/17843.sql b/vortex-bench/sqlstorm/stackoverflow/17843.sql new file mode 100644 index 00000000000..a059e03e1b6 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17843.sql @@ -0,0 +1,21 @@ + +SELECT + p.Id AS PostId, + p.Title, + p.Score, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.Score, p.CreationDate, u.DisplayName +ORDER BY + p.Score DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/17894.sql b/vortex-bench/sqlstorm/stackoverflow/17894.sql new file mode 100644 index 00000000000..73b5fa1471e --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17894.sql @@ -0,0 +1,19 @@ +SELECT + U.DisplayName AS UserDisplayName, + P.Title AS PostTitle, + P.CreationDate AS PostCreationDate, + P.Score AS PostScore, + COUNT(C.Id) AS CommentCount +FROM + Users U +JOIN + Posts P ON U.Id = P.OwnerUserId +LEFT JOIN + Comments C ON P.Id = C.PostId +WHERE + P.PostTypeId = 1 +GROUP BY + U.DisplayName, P.Title, P.CreationDate, P.Score +ORDER BY + P.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/17952.sql b/vortex-bench/sqlstorm/stackoverflow/17952.sql new file mode 100644 index 00000000000..ce513a83e77 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/17952.sql @@ -0,0 +1,16 @@ +SELECT + p.Id AS PostId, + p.Title, + u.DisplayName AS OwnerDisplayName, + p.CreationDate, + p.Score, + p.ViewCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/18025.sql b/vortex-bench/sqlstorm/stackoverflow/18025.sql new file mode 100644 index 00000000000..200469e3a5a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/18025.sql @@ -0,0 +1,18 @@ +SELECT + p.Id AS PostID, + p.Title, + u.DisplayName AS OwnerDisplayName, + p.CreationDate, + p.Score, + p.ViewCount, + p.AnswerCount, + p.CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/18098.sql b/vortex-bench/sqlstorm/stackoverflow/18098.sql new file mode 100644 index 00000000000..9a9de7516e5 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/18098.sql @@ -0,0 +1,22 @@ +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount, + COUNT(v.Id) AS VoteCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/18205.sql b/vortex-bench/sqlstorm/stackoverflow/18205.sql new file mode 100644 index 00000000000..10411e4d03a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/18205.sql @@ -0,0 +1,25 @@ + +SELECT + p.Id AS PostId, + p.Title, + u.DisplayName AS OwnerDisplayName, + p.CreationDate, + p.Score, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, u.DisplayName, p.CreationDate, p.Score +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/18663.sql b/vortex-bench/sqlstorm/stackoverflow/18663.sql new file mode 100644 index 00000000000..58236c62a91 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/18663.sql @@ -0,0 +1,19 @@ +SELECT + P.Id AS PostID, + P.Title, + P.CreationDate, + U.DisplayName AS Author, + COUNT(C.Id) AS CommentCount +FROM + Posts P +JOIN + Users U ON P.OwnerUserId = U.Id +LEFT JOIN + Comments C ON P.Id = C.PostId +WHERE + P.PostTypeId = 1 +GROUP BY + P.Id, P.Title, P.CreationDate, U.DisplayName +ORDER BY + P.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/18796.sql b/vortex-bench/sqlstorm/stackoverflow/18796.sql new file mode 100644 index 00000000000..12720310424 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/18796.sql @@ -0,0 +1,21 @@ + +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS UserDisplayName, + pt.Name AS PostType, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +JOIN + PostTypes pt ON p.PostTypeId = pt.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName, pt.Name +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/18889.sql b/vortex-bench/sqlstorm/stackoverflow/18889.sql new file mode 100644 index 00000000000..4fb0cc56833 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/18889.sql @@ -0,0 +1,25 @@ + +SELECT + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + p.ViewCount, + p.Score, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Title, + p.CreationDate, + u.DisplayName, + p.ViewCount, + p.Score +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/19035.sql b/vortex-bench/sqlstorm/stackoverflow/19035.sql new file mode 100644 index 00000000000..f627dc0f43b --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19035.sql @@ -0,0 +1,8 @@ + +SELECT p.Id, p.Title, p.CreationDate, u.DisplayName, COUNT(c.Id) AS CommentCount +FROM Posts p +JOIN Users u ON p.OwnerUserId = u.Id +LEFT JOIN Comments c ON p.Id = c.PostId +GROUP BY p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/19250.sql b/vortex-bench/sqlstorm/stackoverflow/19250.sql new file mode 100644 index 00000000000..b989031c3e2 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19250.sql @@ -0,0 +1,19 @@ +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Id, p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/19393.sql b/vortex-bench/sqlstorm/stackoverflow/19393.sql new file mode 100644 index 00000000000..477442e13d2 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19393.sql @@ -0,0 +1,24 @@ + +SELECT + u.DisplayName, + p.Title, + p.CreationDate, + p.Score, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Votes v ON p.Id = v.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + u.DisplayName, p.Title, p.CreationDate, p.Score +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/19417.sql b/vortex-bench/sqlstorm/stackoverflow/19417.sql new file mode 100644 index 00000000000..39926d12753 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19417.sql @@ -0,0 +1,16 @@ +SELECT + p.Id AS PostId, + p.Title, + u.DisplayName AS Author, + p.CreationDate, + p.Score, + p.ViewCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/19586.sql b/vortex-bench/sqlstorm/stackoverflow/19586.sql new file mode 100644 index 00000000000..cf5e03b8472 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19586.sql @@ -0,0 +1,15 @@ +SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(p.Id) AS PostCount, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS QuestionCount, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS AnswerCount +FROM + Users u +LEFT JOIN + Posts p ON u.Id = p.OwnerUserId +GROUP BY + u.Id, u.DisplayName +ORDER BY + PostCount DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/19677.sql b/vortex-bench/sqlstorm/stackoverflow/19677.sql new file mode 100644 index 00000000000..139ed44702b --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19677.sql @@ -0,0 +1,15 @@ +SELECT + u.DisplayName, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.Score DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/19790.sql b/vortex-bench/sqlstorm/stackoverflow/19790.sql new file mode 100644 index 00000000000..96862245558 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19790.sql @@ -0,0 +1,17 @@ +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + p.Score, + p.ViewCount, + p.AnswerCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +WHERE + p.PostTypeId = 1 +ORDER BY + p.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/19820.sql b/vortex-bench/sqlstorm/stackoverflow/19820.sql new file mode 100644 index 00000000000..55a51cfdc81 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19820.sql @@ -0,0 +1,19 @@ + +SELECT + p.Title, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount +FROM + Posts p +JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Comments c ON p.Id = c.PostId +WHERE + p.PostTypeId = 1 +GROUP BY + p.Title, p.CreationDate, u.DisplayName +ORDER BY + p.CreationDate DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/19855.sql b/vortex-bench/sqlstorm/stackoverflow/19855.sql new file mode 100644 index 00000000000..9c980978092 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/19855.sql @@ -0,0 +1,6 @@ +SELECT P.Id, P.Title, P.CreationDate, U.DisplayName, P.Score +FROM Posts P +JOIN Users U ON P.OwnerUserId = U.Id +WHERE P.PostTypeId = 1 +ORDER BY P.CreationDate DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/2017.sql b/vortex-bench/sqlstorm/stackoverflow/2017.sql new file mode 100644 index 00000000000..c876c2eb62a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/2017.sql @@ -0,0 +1,71 @@ +WITH UserStatistics AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + U.Reputation, + COALESCE(SUM(CASE WHEN P.PostTypeId = 1 THEN 1 ELSE 0 END), 0) AS QuestionCount, + COALESCE(SUM(CASE WHEN P.PostTypeId = 2 THEN 1 ELSE 0 END), 0) AS AnswerCount, + COALESCE(SUM(CASE WHEN V.VoteTypeId = 2 THEN 1 ELSE 0 END), 0) AS UpVoteCount, + COALESCE(SUM(CASE WHEN V.VoteTypeId = 3 THEN 1 ELSE 0 END), 0) AS DownVoteCount + FROM + Users U + LEFT JOIN + Posts P ON U.Id = P.OwnerUserId + LEFT JOIN + Votes V ON P.Id = V.PostId + GROUP BY + U.Id, U.DisplayName, U.Reputation +), +TopUsers AS ( + SELECT + UserId, + DisplayName, + Reputation, + QuestionCount, + AnswerCount, + UpVoteCount, + DownVoteCount, + ROW_NUMBER() OVER (ORDER BY Reputation DESC) AS Rank + FROM + UserStatistics +), +PostDetails AS ( + SELECT + P.Id AS PostId, + P.Title, + P.CreationDate, + P.OwnerUserId, + CASE + WHEN PH.PostId IS NOT NULL THEN 'Closed' + ELSE 'Open' + END AS PostStatus, + COUNT(CASE WHEN C.Id IS NOT NULL THEN 1 END) AS CommentCount, + COUNT(DISTINCT PL.RelatedPostId) AS RelatedPostCount + FROM + Posts P + LEFT JOIN + PostHistory PH ON P.Id = PH.PostId AND PH.PostHistoryTypeId IN (10, 11) + LEFT JOIN + Comments C ON P.Id = C.PostId + LEFT JOIN + PostLinks PL ON P.Id = PL.PostId + GROUP BY + P.Id, P.Title, P.CreationDate, P.OwnerUserId, PH.PostId +) +SELECT + TU.DisplayName, + TU.Reputation, + PD.PostId, + PD.Title, + PD.CreationDate, + PD.PostStatus, + PD.CommentCount, + PD.RelatedPostCount +FROM + TopUsers TU +JOIN + PostDetails PD ON TU.UserId = PD.OwnerUserId +WHERE + TU.Rank <= 10 +ORDER BY + TU.Reputation DESC, PD.PostStatus; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/20832.sql b/vortex-bench/sqlstorm/stackoverflow/20832.sql new file mode 100644 index 00000000000..ba7e892c638 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/20832.sql @@ -0,0 +1,98 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.ViewCount, + p.Score, + p.CreationDate, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.ViewCount DESC, p.Score DESC) AS Rank, + COALESCE(p.AcceptedAnswerId, -1) AS AnswerStatus + FROM + Posts p + WHERE + p.CreationDate >= cast('2024-10-01' as date) - INTERVAL '1 year' + AND p.ViewCount IS NOT NULL +), +PostStatistics AS ( + SELECT + u.Id AS UserId, + SUM(CASE WHEN p.AnswerCount > 0 THEN 1 ELSE 0 END) AS TotalQuestionsAnswered, + COUNT(DISTINCT p.Id) AS TotalPosts, + AVG(p.Score) AS AverageScore, + SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END) AS BronzeBadges + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id +), +ClosedPostReasons AS ( + SELECT + ph.PostId, + COUNT(*) AS CloseVoteCount, + STRING_AGG(CASE WHEN ph.PostHistoryTypeId = 10 THEN cr.Name END, ', ') AS CloseReasons + FROM + PostHistory ph + LEFT JOIN + CloseReasonTypes cr ON cr.Id::text = ph.Comment + GROUP BY + ph.PostId +), +UserPostLinkages AS ( + SELECT + pl.PostId, + pl.RelatedPostId, + COUNT(pl.Id) AS LinkCount + FROM + PostLinks pl + JOIN + Posts p ON pl.PostId = p.Id + WHERE + p.CreationDate < cast('2024-10-01' as date) - INTERVAL '6 months' + GROUP BY + pl.PostId, pl.RelatedPostId +), +FinalStats AS ( + SELECT + ps.UserId, + ps.TotalQuestionsAnswered, + ps.TotalPosts, + ps.AverageScore, + ps.GoldBadges, + ps.SilverBadges, + ps.BronzeBadges, + COALESCE(rp.PostId, 0) AS TopPostId, + COALESCE(rp.Title, 'No Trending Post') AS TopPostTitle, + COALESCE(rp.ViewCount, 0) AS TopPostViewCount, + COALESCE(rp.Score, 0) AS TopPostScore, + COALESCE(cpr.CloseVoteCount, 0) AS TotalCloseVotes, + COALESCE(cpr.CloseReasons, 'No Close Reasons') AS CloseReasons, + COALESCE(pl.LinkCount, 0) AS TotalRelatedLinks + FROM + PostStatistics ps + LEFT JOIN + RankedPosts rp ON ps.UserId = rp.PostId + LEFT JOIN + ClosedPostReasons cpr ON rp.PostId = cpr.PostId + LEFT JOIN + UserPostLinkages pl ON ps.UserId = pl.PostId +) +SELECT + *, + CASE + WHEN TotalPosts = 0 THEN 'No activity' + ELSE 'Active User' + END AS UserActivityStatus +FROM + FinalStats +WHERE + TotalQuestionsAnswered > 5 + AND GoldBadges > 0 +ORDER BY + TotalPosts DESC, + AverageScore DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/21377.sql b/vortex-bench/sqlstorm/stackoverflow/21377.sql new file mode 100644 index 00000000000..981f29dcb99 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/21377.sql @@ -0,0 +1,75 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + p.OwnerUserId, + u.Reputation, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.CreationDate DESC) AS PostRank + FROM + Posts p + JOIN + Users u ON p.OwnerUserId = u.Id + WHERE + p.CreationDate > cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' +), +AggregatedData AS ( + SELECT + rp.OwnerUserId, + COUNT(rp.PostId) AS TotalPosts, + SUM(rp.Score) AS TotalScore, + AVG(rp.Reputation) AS AvgReputation + FROM + RankedPosts rp + WHERE + rp.Score > 0 AND rp.PostRank <= 5 + GROUP BY + rp.OwnerUserId +), +ClosedPosts AS ( + SELECT + p.Id AS ClosedPostId, + ph.UserId, + ph.CreationDate AS CloseDate, + ph.Comment AS CloseReason, + COUNT(*) OVER (PARTITION BY p.Id) AS CloseCount + FROM + PostHistory ph + JOIN + Posts p ON ph.PostId = p.Id + WHERE + ph.PostHistoryTypeId = 10 +), +UserBadges AS ( + SELECT + b.UserId, + STRING_AGG(b.Name, ', ') AS BadgeNames + FROM + Badges b + GROUP BY + b.UserId +) +SELECT + u.DisplayName, + a.TotalPosts, + a.TotalScore, + a.AvgReputation, + COALESCE(cp.CloseCount, 0) AS TotalClosedPosts, + ub.BadgeNames +FROM + Users u +JOIN + AggregatedData a ON u.Id = a.OwnerUserId +LEFT JOIN + ClosedPosts cp ON cp.UserId = u.Id +LEFT JOIN + UserBadges ub ON ub.UserId = u.Id +WHERE + u.Reputation > 1000 + AND NOT EXISTS (SELECT 1 FROM Votes v WHERE v.UserId = u.Id AND v.VoteTypeId = 3) +ORDER BY + a.TotalScore DESC, + a.TotalPosts DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/2228.sql b/vortex-bench/sqlstorm/stackoverflow/2228.sql new file mode 100644 index 00000000000..b4517edc401 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/2228.sql @@ -0,0 +1,70 @@ + +WITH RankedPosts AS ( + SELECT + P.Id AS PostId, + P.Title, + P.CreationDate, + P.Score, + COUNT(CASE WHEN C.Id IS NOT NULL THEN 1 END) AS CommentCount, + ROW_NUMBER() OVER (PARTITION BY P.OwnerUserId ORDER BY P.CreationDate DESC) AS PostRank, + P.OwnerUserId + FROM + Posts P + LEFT JOIN + Comments C ON P.Id = C.PostId + WHERE + P.CreationDate >= CURRENT_DATE - INTERVAL '1 year' + GROUP BY + P.Id, P.Title, P.CreationDate, P.Score, P.OwnerUserId +), +UserBadges AS ( + SELECT + U.Id AS UserId, + COUNT(CASE WHEN B.Class = 1 THEN 1 END) AS GoldCount, + COUNT(CASE WHEN B.Class = 2 THEN 1 END) AS SilverCount, + COUNT(CASE WHEN B.Class = 3 THEN 1 END) AS BronzeCount + FROM + Users U + LEFT JOIN + Badges B ON U.Id = B.UserId + GROUP BY + U.Id +), +TopUsers AS ( + SELECT + U.Id, + U.DisplayName, + U.Reputation, + UBad.GoldCount, + UBad.SilverCount, + UBad.BronzeCount, + R.PostRank, + R.PostId + FROM + Users U + JOIN + UserBadges UBad ON U.Id = UBad.UserId + LEFT JOIN + RankedPosts R ON U.Id = R.OwnerUserId + WHERE + U.Reputation > 1000 +) +SELECT + U.DisplayName, + U.Reputation, + COALESCE(R.PostId, -1) AS MostRecentPost, + COALESCE(R.Title, 'No Posts') AS LatestPostTitle, + COALESCE(R.CommentCount, 0) AS CommentsOnLatestPost, + (SELECT STRING_AGG(CASE WHEN B.Class = 1 THEN 'Gold' WHEN B.Class = 2 THEN 'Silver' WHEN B.Class = 3 THEN 'Bronze' END, ', ') + FROM Badges B + WHERE B.UserId = U.Id) AS BadgeList +FROM + Users U +LEFT JOIN + TopUsers T ON U.Id = T.Id +LEFT JOIN + RankedPosts R ON T.PostId = R.PostId +WHERE + T.PostRank IS NULL OR T.PostRank <= 5 +ORDER BY + U.Reputation DESC, U.DisplayName ASC; diff --git a/vortex-bench/sqlstorm/stackoverflow/22619.sql b/vortex-bench/sqlstorm/stackoverflow/22619.sql new file mode 100644 index 00000000000..bb8592d0504 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/22619.sql @@ -0,0 +1,71 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.Tags, + p.CreationDate, + p.Score, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.CreationDate DESC) AS UserPostRank, + p.OwnerUserId + FROM + Posts p + WHERE + p.CreationDate >= DATE_TRUNC('month', CAST('2024-10-01' AS DATE)) +), +UserBadges AS ( + SELECT + u.Id AS UserId, + COUNT(b.Id) AS BadgeCount, + SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END) AS BronzeBadges + FROM + Users u + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id +), +PostActivity AS ( + SELECT + ph.PostId, + ph.CreationDate, + ph.PostHistoryTypeId, + CASE + WHEN ph.PostHistoryTypeId IN (10, 11) THEN 'Closure Activity' + WHEN ph.PostHistoryTypeId IN (6, 4) THEN 'Tag or Title Activity' + ELSE 'Other Activity' + END AS ActivityType, + COUNT(DISTINCT ph.UserId) AS UniqueUserCount + FROM + PostHistory ph + GROUP BY + ph.PostId, ph.CreationDate, ph.PostHistoryTypeId +) +SELECT + rp.PostId, + rp.Title, + rp.Tags, + ub.BadgeCount, + ub.GoldBadges, + ub.SilverBadges, + ub.BronzeBadges, + pa.ActivityType, + pa.UniqueUserCount, + CASE + WHEN pa.UniqueUserCount > 5 THEN 'Highly Active' + WHEN pa.UniqueUserCount BETWEEN 3 AND 5 THEN 'Moderately Active' + ELSE 'Low Activity' + END AS ActivityLevel +FROM + RankedPosts rp +LEFT JOIN + UserBadges ub ON rp.OwnerUserId = ub.UserId +LEFT JOIN + PostActivity pa ON rp.PostId = pa.PostId +WHERE + rp.UserPostRank <= 3 + AND (pa.ActivityType IS NOT NULL OR ub.BadgeCount > 0) +ORDER BY + rp.Score DESC, rp.CreationDate ASC; diff --git a/vortex-bench/sqlstorm/stackoverflow/22971.sql b/vortex-bench/sqlstorm/stackoverflow/22971.sql new file mode 100644 index 00000000000..7622b16701d --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/22971.sql @@ -0,0 +1,52 @@ + +WITH RecentActivity AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(DISTINCT p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS QuestionsCount, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS AnswersCount, + SUM(CASE WHEN p.PostTypeId IN (3, 4, 5) THEN 1 ELSE 0 END) AS WikiPostsCount, + SUM(v.BountyAmount) AS TotalBounty, + RANK() OVER (PARTITION BY u.Id ORDER BY SUM(v.BountyAmount) DESC) AS BountyRank + FROM Users u + LEFT JOIN Posts p ON u.Id = p.OwnerUserId + LEFT JOIN Votes v ON p.Id = v.PostId AND v.VoteTypeId IN (8, 9) + WHERE u.Reputation > 1000 AND u.Location IS NOT NULL + GROUP BY u.Id, u.DisplayName +), +TopUsers AS ( + SELECT + UserId, + DisplayName, + TotalPosts, + QuestionsCount, + AnswersCount, + WikiPostsCount, + TotalBounty, + BountyRank + FROM RecentActivity + WHERE BountyRank <= 5 +), +UserBadges AS ( + SELECT + b.UserId, + COUNT(CASE WHEN b.Class = 1 THEN 1 END) AS GoldBadges, + COUNT(CASE WHEN b.Class = 2 THEN 1 END) AS SilverBadges, + COUNT(CASE WHEN b.Class = 3 THEN 1 END) AS BronzeBadges + FROM Badges b + GROUP BY b.UserId +) +SELECT + tu.DisplayName, + tu.TotalPosts, + tu.QuestionsCount, + tu.AnswersCount, + tu.WikiPostsCount, + COALESCE(ub.GoldBadges, 0) AS GoldBadges, + COALESCE(ub.SilverBadges, 0) AS SilverBadges, + COALESCE(ub.BronzeBadges, 0) AS BronzeBadges, + tu.TotalBounty +FROM TopUsers tu +LEFT JOIN UserBadges ub ON tu.UserId = ub.UserId +ORDER BY tu.TotalBounty DESC, tu.DisplayName ASC; diff --git a/vortex-bench/sqlstorm/stackoverflow/23542.sql b/vortex-bench/sqlstorm/stackoverflow/23542.sql new file mode 100644 index 00000000000..1a66df4de60 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/23542.sql @@ -0,0 +1,48 @@ + +WITH UserReputation AS ( + SELECT Id AS UserId, Reputation, LastAccessDate, + ROW_NUMBER() OVER (ORDER BY Reputation DESC) AS Rank + FROM Users + WHERE Reputation IS NOT NULL +), +PostStatistics AS ( + SELECT P.Id AS PostId, P.OwnerUserId, P.PostTypeId, + COUNT(CASE WHEN C.Id IS NOT NULL THEN 1 END) AS CommentCount, + SUM(CASE WHEN V.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN V.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes, + AVG(LENGTH(P.Body)) AS AvgBodyLength + FROM Posts P + LEFT JOIN Comments C ON P.Id = C.PostId + LEFT JOIN Votes V ON P.Id = V.PostId + GROUP BY P.Id, P.OwnerUserId, P.PostTypeId +), +TopPosts AS ( + SELECT PS.PostId, PS.OwnerUserId, PS.CommentCount, PS.UpVotes, PS.DownVotes, + PS.AvgBodyLength, + COALESCE(UR.Reputation, 0) AS OwnerReputation, + CASE + WHEN PS.CommentCount > 10 THEN 'Highly Discussed' + WHEN PS.CommentCount BETWEEN 5 AND 10 THEN 'Moderately Discussed' + ELSE 'Less Discussed' + END AS DiscussionCategory + FROM PostStatistics PS + LEFT JOIN UserReputation UR ON PS.OwnerUserId = UR.UserId +), +AggregateResults AS ( + SELECT DiscussionCategory, + COUNT(*) AS TotalPosts, + AVG(OwnerReputation) AS AvgOwnerReputation, + SUM(UpVotes) AS TotalUpVotes, + SUM(DownVotes) AS TotalDownVotes + FROM TopPosts + GROUP BY DiscussionCategory +) +SELECT AR.DiscussionCategory, AR.TotalPosts, + AR.AvgOwnerReputation, AR.TotalUpVotes, AR.TotalDownVotes, + CASE + WHEN AR.TotalPosts > 50 THEN 'Very Active' + WHEN AR.TotalPosts BETWEEN 20 AND 50 THEN 'Active' + ELSE 'Less Active' + END AS ActivityLevel +FROM AggregateResults AR +ORDER BY AR.TotalPosts DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/242.sql b/vortex-bench/sqlstorm/stackoverflow/242.sql new file mode 100644 index 00000000000..30a980e18b8 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/242.sql @@ -0,0 +1,61 @@ +WITH PostInfo AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + p.AnswerCount, + u.DisplayName AS OwnerName, + COALESCE(p.Score, 0) + COALESCE(b.NumBadges, 0) AS EngagementScore + FROM + Posts p + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN ( + SELECT UserId, COUNT(*) AS NumBadges + FROM Badges + WHERE Date > cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + GROUP BY UserId + ) b ON u.Id = b.UserId + WHERE + p.CreationDate > cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '3 months' + AND p.ViewCount > 100 +), +HighScorePosts AS ( + SELECT + PostId, + EngagementScore, + ROW_NUMBER() OVER (ORDER BY EngagementScore DESC) AS Rank + FROM + PostInfo +), +RecentVotes AS ( + SELECT + PostId, + COUNT(*) AS VoteCount + FROM + Votes + WHERE + CreationDate > cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 month' + GROUP BY + PostId +) +SELECT + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + COALESCE(v.VoteCount, 0) AS RecentVoteCount, + h.Rank +FROM + PostInfo p +LEFT JOIN + RecentVotes v ON p.PostId = v.PostId +JOIN + HighScorePosts h ON p.PostId = h.PostId +WHERE + h.Rank <= 10 +ORDER BY + p.Score DESC, + h.Rank \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/25080.sql b/vortex-bench/sqlstorm/stackoverflow/25080.sql new file mode 100644 index 00000000000..8a7c75e6460 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/25080.sql @@ -0,0 +1,49 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.Tags, + p.Body, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes, + ROW_NUMBER() OVER (PARTITION BY p.Tags ORDER BY COUNT(c.Id) DESC) AS Rank + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + WHERE + p.PostTypeId = 1 + GROUP BY + p.Id, p.Title, p.Tags, p.Body +), +FilteredPosts AS ( + SELECT + PostId, + Title, + Tags, + Body, + CommentCount, + UpVotes, + DownVotes + FROM + RankedPosts + WHERE + Rank <= 5 +) +SELECT + fp.Tags, + STRING_AGG(fp.Title, '; ') AS TopQuestions, + SUM(fp.CommentCount) AS TotalComments, + SUM(fp.UpVotes) AS TotalUpVotes, + SUM(fp.DownVotes) AS TotalDownVotes, + COUNT(fp.PostId) AS TotalPosts +FROM + FilteredPosts fp +GROUP BY + fp.Tags +ORDER BY + TotalUpVotes DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/255.sql b/vortex-bench/sqlstorm/stackoverflow/255.sql new file mode 100644 index 00000000000..05b095f27a7 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/255.sql @@ -0,0 +1,79 @@ +WITH RecentPosts AS ( + SELECT + P.Id AS PostId, + P.Title, + P.CreationDate, + P.Score, + P.ViewCount, + P.AnswerCount, + U.DisplayName AS OwnerDisplayName, + COALESCE(V.UpVotes, 0) AS UpVotes, + COALESCE(V.DownVotes, 0) AS DownVotes, + DENSE_RANK() OVER (PARTITION BY EXTRACT(YEAR FROM P.CreationDate) ORDER BY P.CreationDate DESC) AS YearRank + FROM + Posts P + JOIN + Users U ON P.OwnerUserId = U.Id + LEFT JOIN ( + SELECT + PostId, + SUM(CASE WHEN VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes + FROM + Votes + GROUP BY + PostId + ) V ON P.Id = V.PostId + WHERE + P.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 YEAR' +), +TopPosts AS ( + SELECT + PostId, + Title, + CreationDate, + Score, + ViewCount, + AnswerCount, + OwnerDisplayName, + UpVotes, + DownVotes + FROM + RecentPosts + WHERE + YearRank <= 10 +), +PostDetails AS ( + SELECT + T.*, + PH.PostHistoryTypeId, + PH.CreationDate AS HistoryDate, + PH.Comment AS CloseReason + FROM + TopPosts T + LEFT JOIN + PostHistory PH ON T.PostId = PH.PostId AND PH.PostHistoryTypeId IN (10, 11) +) +SELECT + PD.Title, + PD.OwnerDisplayName, + PD.CreationDate, + PD.Score, + PD.ViewCount, + PD.AnswerCount, + CASE + WHEN PD.CloseReason IS NOT NULL THEN 'Closed: ' || PD.CloseReason + ELSE 'Active' + END AS PostStatus, + (SELECT COUNT(*) FROM Comments C WHERE C.PostId = PD.PostId) AS CommentCount, + STRING_AGG(T.TagName, ', ') AS Tags +FROM + PostDetails PD +LEFT JOIN + PostLinks PL ON PD.PostId = PL.PostId +LEFT JOIN + Tags T ON PL.RelatedPostId = T.Id +GROUP BY + PD.PostId, PD.OwnerDisplayName, PD.Title, PD.CreationDate, PD.Score, PD.ViewCount, PD.AnswerCount, PD.CloseReason +ORDER BY + PD.Score DESC, PD.ViewCount DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/25507.sql b/vortex-bench/sqlstorm/stackoverflow/25507.sql new file mode 100644 index 00000000000..1156716f6db --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/25507.sql @@ -0,0 +1,58 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.Body, + U.DisplayName AS OwnerDisplayName, + p.CreationDate, + p.ViewCount, + p.AnswerCount, + p.Tags, + RANK() OVER (PARTITION BY p.Tags ORDER BY p.CreationDate DESC) AS RankByTags + FROM + Posts p + JOIN + Users U ON p.OwnerUserId = U.Id + WHERE + p.PostTypeId = 1 + AND p.CreationDate >= '2023-01-01' + AND p.ViewCount > 100 +), + +HistoryWithComments AS ( + SELECT + ph.PostId, + ph.UserDisplayName, + ph.CreationDate AS HistoryDate, + ph.Comment AS EditComment, + ph.Text AS EditText + FROM + PostHistory ph + JOIN + Posts p ON ph.PostId = p.Id + WHERE + ph.PostHistoryTypeId IN (4, 5, 6) + AND ph.CreationDate >= '2023-01-01' +) + +SELECT + rp.PostId, + rp.Title, + rp.OwnerDisplayName, + rp.CreationDate AS QuestionDate, + rp.ViewCount, + rp.AnswerCount, + rp.Tags, + hwc.HistoryDate, + hwc.UserDisplayName AS Editor, + hwc.EditComment, + hwc.EditText +FROM + RankedPosts rp +LEFT JOIN + HistoryWithComments hwc ON rp.PostId = hwc.PostId +WHERE + rp.RankByTags = 1 +ORDER BY + rp.Tags, + rp.CreationDate DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/26640.sql b/vortex-bench/sqlstorm/stackoverflow/26640.sql new file mode 100644 index 00000000000..27a177c17e0 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/26640.sql @@ -0,0 +1,67 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title AS PostTitle, + p.Tags, + p.CreationDate, + p.AcceptedAnswerId, + COUNT(a.Id) AS AnswerCount, + ROW_NUMBER() OVER(PARTITION BY p.Id ORDER BY p.CreationDate DESC) AS rn + FROM + Posts p + LEFT JOIN + Posts a ON p.Id = a.ParentId + WHERE + p.PostTypeId = 1 + GROUP BY + p.Id, p.Title, p.Tags, p.CreationDate, p.AcceptedAnswerId +), +MostVotedAnswers AS ( + SELECT + a.Id AS AnswerId, + a.ParentId, + a.Score, + u.DisplayName AS OwnerName + FROM + Posts a + JOIN + Users u ON a.OwnerUserId = u.Id + WHERE + a.PostTypeId = 2 + ORDER BY + a.Score DESC +), +ClosedPosts AS ( + SELECT + ph.PostId, + ph.CreationDate AS CloseDate, + c.Name AS CloseReason + FROM + PostHistory ph + JOIN + CloseReasonTypes c ON ph.Comment = CAST(c.Id AS VARCHAR) + WHERE + ph.PostHistoryTypeId = 10 +) +SELECT + rp.PostId, + rp.PostTitle, + rp.Tags, + rp.CreationDate AS QuestionDate, + mp.AnswerId, + mp.OwnerName AS AnswerOwner, + mp.Score AS AnswerScore, + cp.CloseDate, + cp.CloseReason +FROM + RankedPosts rp +LEFT JOIN + MostVotedAnswers mp ON rp.PostId = mp.ParentId +LEFT JOIN + ClosedPosts cp ON rp.PostId = cp.PostId +WHERE + rp.rn = 1 +ORDER BY + AnswerScore DESC NULLS LAST, + QuestionDate DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/2724.sql b/vortex-bench/sqlstorm/stackoverflow/2724.sql new file mode 100644 index 00000000000..c5b85cf348e --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/2724.sql @@ -0,0 +1,57 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.Score DESC) AS ScoreRank, + COUNT(c.Id) AS CommentCount, + AVG(CASE WHEN v.VoteTypeId = 8 THEN v.BountyAmount ELSE NULL END) AS AvgBounty + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + p.Id, p.OwnerUserId, p.Title, p.CreationDate, p.Score +), +UserStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + u.Reputation, + u.CreationDate, + u.LastAccessDate, + COALESCE(SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END), 0) AS GoldBadges, + COALESCE(SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END), 0) AS SilverBadges, + COALESCE(SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END), 0) AS BronzeBadges + FROM + Users u + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id, u.DisplayName, u.Reputation, u.CreationDate, u.LastAccessDate +) +SELECT + us.DisplayName, + us.Reputation, + rp.Title, + rp.Score, + rp.CreationDate, + rp.CommentCount, + us.GoldBadges, + us.SilverBadges, + us.BronzeBadges, + rp.AvgBounty +FROM + RankedPosts rp +JOIN + UserStats us ON rp.PostId = us.UserId +WHERE + rp.ScoreRank <= 3 + AND us.Reputation > (SELECT AVG(Reputation) FROM Users WHERE Reputation IS NOT NULL) +ORDER BY + us.Reputation DESC, rp.Score DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/27339.sql b/vortex-bench/sqlstorm/stackoverflow/27339.sql new file mode 100644 index 00000000000..6e172de17ff --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/27339.sql @@ -0,0 +1,56 @@ +WITH UserReputation AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + u.Reputation, + COUNT(DISTINCT b.Id) AS BadgeCount + FROM Users u + LEFT JOIN Badges b ON u.Id = b.UserId + WHERE u.Reputation > 1000 + GROUP BY u.Id, u.DisplayName, u.Reputation +), + +PostStatistics AS ( + SELECT + p.OwnerUserId, + COUNT(p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS TotalQuestions, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + SUM(CASE WHEN p.PostTypeId IN (1, 2, 6, 7) THEN p.Score ELSE 0 END) AS TotalScore, + AVG(p.ViewCount) AS AvgViews + FROM Posts p + GROUP BY p.OwnerUserId +), + +CombinedStatistics AS ( + SELECT + ur.UserId, + ur.DisplayName, + ur.Reputation, + ur.BadgeCount, + ps.TotalPosts, + ps.TotalQuestions, + ps.TotalAnswers, + ps.TotalScore, + ps.AvgViews + FROM UserReputation ur + JOIN PostStatistics ps ON ur.UserId = ps.OwnerUserId +) + +SELECT + UserId, + DisplayName, + Reputation, + BadgeCount, + TotalPosts, + TotalQuestions, + TotalAnswers, + TotalScore, + AvgViews, + CASE + WHEN TotalPosts > 100 THEN 'Veteran' + WHEN TotalPosts > 50 THEN 'Active' + ELSE 'Newcomer' + END AS UserRank +FROM CombinedStatistics +ORDER BY Reputation DESC, TotalScore DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/28524.sql b/vortex-bench/sqlstorm/stackoverflow/28524.sql new file mode 100644 index 00000000000..d221a755de8 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/28524.sql @@ -0,0 +1,84 @@ + +WITH TagCount AS ( + SELECT + TRIM(tag) AS TagName, + COUNT(*) AS PostCount + FROM ( + SELECT + UNNEST(string_to_array(SUBSTRING(Tags FROM 2 FOR LENGTH(Tags) - 2), '><')) AS tag + FROM + Posts + WHERE + PostTypeId = 1 + ) AS extracted_tags + GROUP BY + TRIM(tag) +), +TopTags AS ( + SELECT + TagName, + PostCount, + ROW_NUMBER() OVER (ORDER BY PostCount DESC) AS Rank + FROM + TagCount + WHERE + PostCount > 1 +), +MostActiveUsers AS ( + SELECT + Users.DisplayName, + Users.Reputation, + COUNT(Posts.Id) AS QuestionsAnswered, + SUM(COALESCE(Posts.AnswerCount, 0)) AS TotalAnswers, + SUM(COALESCE(Posts.Score, 0)) AS TotalScore + FROM + Users + JOIN + Posts ON Users.Id = Posts.OwnerUserId + WHERE + Posts.PostTypeId = 2 + GROUP BY + Users.DisplayName, Users.Reputation +), +TagUsage AS ( + SELECT + Posts.Id AS PostId, + Posts.Title, + Posts.CreationDate, + UNNEST(string_to_array(SUBSTRING(Posts.Tags FROM 2 FOR LENGTH(Posts.Tags) - 2), '><')) AS TagName, + Users.DisplayName AS Owner + FROM + Posts + JOIN + Users ON Posts.OwnerUserId = Users.Id + WHERE + Posts.PostTypeId = 1 +) +SELECT + TopTags.TagName, + TopTags.PostCount, + MostActiveUsers.DisplayName, + MostActiveUsers.Reputation, + MostActiveUsers.QuestionsAnswered, + MostActiveUsers.TotalAnswers, + MostActiveUsers.TotalScore, + COUNT(TagUsage.PostId) AS TagPostCount, + MIN(TagUsage.CreationDate) AS EarliestPostDate, + MAX(TagUsage.CreationDate) AS LatestPostDate +FROM + TopTags +JOIN + TagUsage ON TopTags.TagName = TagUsage.TagName +JOIN + MostActiveUsers ON TagUsage.Owner = MostActiveUsers.DisplayName +GROUP BY + TopTags.TagName, + TopTags.PostCount, + MostActiveUsers.DisplayName, + MostActiveUsers.Reputation, + MostActiveUsers.QuestionsAnswered, + MostActiveUsers.TotalAnswers, + MostActiveUsers.TotalScore +ORDER BY + TopTags.PostCount DESC, MostActiveUsers.TotalScore DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/2867.sql b/vortex-bench/sqlstorm/stackoverflow/2867.sql new file mode 100644 index 00000000000..947870ba8cf --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/2867.sql @@ -0,0 +1,77 @@ +WITH RecentPosts AS ( + SELECT + p.Id, + p.Title, + p.CreationDate, + p.ViewCount, + p.Score, + p.AnswerCount, + p.CommentCount, + u.DisplayName AS OwnerName, + CASE + WHEN p.ClosedDate IS NOT NULL THEN 'Closed' + ELSE 'Open' + END AS PostStatus + FROM + Posts p + JOIN + Users u ON p.OwnerUserId = u.Id + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '30 days' +), +PostStatistics AS ( + SELECT + rp.Id, + rp.Title, + rp.CreationDate, + rp.ViewCount, + rp.Score, + rp.AnswerCount, + rp.CommentCount, + rp.OwnerName, + rp.PostStatus, + ROW_NUMBER() OVER (PARTITION BY rp.PostStatus ORDER BY rp.Score DESC) AS Rank + FROM + RecentPosts rp +), +TopPosts AS ( + SELECT * + FROM PostStatistics + WHERE Rank <= 5 +), +PostVoteCounts AS ( + SELECT + p.Id AS PostId, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes, + SUM(CASE WHEN v.VoteTypeId = 5 THEN 1 ELSE 0 END) AS Favorites + FROM + Posts p + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + p.Id +) +SELECT + tp.Title, + tp.CreationDate, + tp.ViewCount, + tp.Score, + tp.AnswerCount, + tp.CommentCount, + tp.OwnerName, + tp.PostStatus, + COALESCE(v.UpVotes, 0) AS TotalUpVotes, + COALESCE(v.DownVotes, 0) AS TotalDownVotes, + COALESCE(v.Favorites, 0) AS TotalFavorites, + CASE + WHEN v.UpVotes IS NOT NULL AND v.DownVotes IS NOT NULL THEN + (CAST(v.UpVotes AS decimal) / NULLIF((v.UpVotes + v.DownVotes), 0)) * 100 + ELSE 0 + END AS UpVotePercentage +FROM + TopPosts tp +LEFT JOIN + PostVoteCounts v ON tp.Id = v.PostId +ORDER BY + tp.PostStatus, tp.Score DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/30424.sql b/vortex-bench/sqlstorm/stackoverflow/30424.sql new file mode 100644 index 00000000000..6243adfacba --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/30424.sql @@ -0,0 +1,66 @@ +WITH RecursivePostStats AS ( + SELECT + p.Id AS PostId, + p.Score, + p.ViewCount, + p.AnswerCount, + p.CommentCount, + p.CreationDate, + p.OwnerUserId, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.CreationDate DESC) AS rn + FROM Posts p + WHERE p.PostTypeId = 1 +), +UserBadges AS ( + SELECT + b.UserId, + COUNT(CASE WHEN b.Class = 1 THEN 1 END) AS GoldBadges, + COUNT(CASE WHEN b.Class = 2 THEN 1 END) AS SilverBadges, + COUNT(CASE WHEN b.Class = 3 THEN 1 END) AS BronzeBadges + FROM Badges b + GROUP BY b.UserId +), +VoteCounts AS ( + SELECT + v.PostId, + COUNT(CASE WHEN v.VoteTypeId = 2 THEN 1 END) AS UpVotes, + COUNT(CASE WHEN v.VoteTypeId = 3 THEN 1 END) AS DownVotes + FROM Votes v + GROUP BY v.PostId +), +PostHistoryAnalysis AS ( + SELECT + ph.PostId, + MAX(ph.CreationDate) AS LastActivityDate, + COUNT(DISTINCT CASE WHEN ph.PostHistoryTypeId = 10 THEN ph.UserId END) AS CloseVotes, + COUNT(DISTINCT CASE WHEN ph.PostHistoryTypeId = 11 THEN ph.UserId END) AS ReopenVotes + FROM PostHistory ph + GROUP BY ph.PostId +) +SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + ps.ViewCount, + ps.AnswerCount, + ps.CommentCount, + ps.OwnerUserId, + u.DisplayName AS OwnerDisplayName, + COALESCE(ub.GoldBadges, 0) AS GoldBadges, + COALESCE(ub.SilverBadges, 0) AS SilverBadges, + COALESCE(ub.BronzeBadges, 0) AS BronzeBadges, + COALESCE(vc.UpVotes, 0) - COALESCE(vc.DownVotes, 0) AS NetVotes, + pha.LastActivityDate, + pha.CloseVotes, + pha.ReopenVotes +FROM Posts p +JOIN RecursivePostStats ps ON p.Id = ps.PostId +JOIN Users u ON p.OwnerUserId = u.Id +LEFT JOIN UserBadges ub ON u.Id = ub.UserId +LEFT JOIN VoteCounts vc ON p.Id = vc.PostId +LEFT JOIN PostHistoryAnalysis pha ON p.Id = pha.PostId +WHERE ps.rn = 1 + AND p.CreationDate >= (cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year') +ORDER BY NetVotes DESC, p.CreationDate DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/3080.sql b/vortex-bench/sqlstorm/stackoverflow/3080.sql new file mode 100644 index 00000000000..d9b3c826033 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/3080.sql @@ -0,0 +1,66 @@ + +WITH RankedBadges AS ( + SELECT + b.UserId, + b.Name, + b.Class, + RANK() OVER (PARTITION BY b.UserId ORDER BY b.Date DESC) AS BadgeRank + FROM + Badges b +), +PostInformation AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.OwnerUserId, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes, + MAX(bh.CreationDate) AS LastEditDate + FROM + Posts p + LEFT JOIN Comments c ON p.Id = c.PostId + LEFT JOIN Votes v ON p.Id = v.PostId + LEFT JOIN PostHistory bh ON p.Id = bh.PostId AND bh.PostHistoryTypeId IN (4, 5) + WHERE + p.CreationDate > CURRENT_TIMESTAMP - INTERVAL '1 year' + GROUP BY + p.Id, p.Title, p.CreationDate, p.OwnerUserId +), +UserStatistics AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COALESCE(MAX(b.Name), 'No Badges') AS BestBadge, + COUNT(DISTINCT pi.PostId) AS PostCount, + SUM(COALESCE(pi.CommentCount, 0)) AS TotalComments, + SUM(pi.UpVotes) AS TotalUpVotes, + SUM(pi.DownVotes) AS TotalDownVotes + FROM + Users u + LEFT JOIN RankedBadges b ON u.Id = b.UserId AND b.BadgeRank = 1 + LEFT JOIN PostInformation pi ON u.Id = pi.OwnerUserId + GROUP BY + u.Id, u.DisplayName +) +SELECT + us.UserId, + us.DisplayName, + us.BestBadge, + us.PostCount, + us.TotalComments, + us.TotalUpVotes, + us.TotalDownVotes, + CASE + WHEN us.PostCount > 10 THEN 'Active' + ELSE 'Less Active' + END AS ActivityStatus, + NULLIF(us.TotalUpVotes - us.TotalDownVotes, 0) AS VoteBalance +FROM + UserStatistics us +WHERE + us.TotalComments > 0 OR us.PostCount > 0 +ORDER BY + us.TotalUpVotes DESC, us.TotalComments DESC +LIMIT 20; diff --git a/vortex-bench/sqlstorm/stackoverflow/30899.sql b/vortex-bench/sqlstorm/stackoverflow/30899.sql new file mode 100644 index 00000000000..b36177a8d14 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/30899.sql @@ -0,0 +1,45 @@ + +WITH RecursiveTagCounts AS ( + SELECT TagName, COUNT(*) AS PostCount + FROM Tags + GROUP BY TagName +), +UserReputation AS ( + SELECT U.Id AS UserId, U.DisplayName, U.Reputation, + RANK() OVER (ORDER BY U.Reputation DESC) AS ReputationRank + FROM Users U +), +RecentPosts AS ( + SELECT P.Id AS PostId, P.OwnerUserId, P.CreationDate, P.Title AS PostTitle, + PP.LastActivityDate, PT.Name AS PostTypeName, + ROW_NUMBER() OVER (PARTITION BY P.OwnerUserId ORDER BY P.CreationDate DESC) AS RecentPostRank + FROM Posts P + JOIN PostTypes PT ON P.PostTypeId = PT.Id + LEFT JOIN Posts PP ON P.ParentId = PP.Id + WHERE P.CreationDate > TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 month' + AND P.OwnerUserId IS NOT NULL +) +SELECT + U.DisplayName AS AuthorDisplayName, + U.Reputation AS AuthorReputation, + U.ReputationRank, + RT.TagName, + TC.PostCount, + RP.PostTitle, + RP.CreationDate AS RecentPostDate, + RP.PostTypeName, + RP.RecentPostRank, + COALESCE(( + SELECT COUNT(*) + FROM Votes V + WHERE V.PostId = RP.PostId AND V.VoteTypeId = 2 + ), 0) AS UpVotes +FROM UserReputation U +JOIN RecentPosts RP ON U.UserId = RP.OwnerUserId +JOIN PostLinks PL ON PL.PostId = RP.PostId +JOIN Tags RT ON RT.Id = PL.RelatedPostId +JOIN RecursiveTagCounts TC ON RT.TagName = TC.TagName +WHERE RP.RecentPostRank = 1 + AND RP.LastActivityDate > TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 week' + AND TC.PostCount > 5 +ORDER BY U.Reputation DESC, TC.PostCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/31034.sql b/vortex-bench/sqlstorm/stackoverflow/31034.sql new file mode 100644 index 00000000000..0d60bba5bef --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/31034.sql @@ -0,0 +1,60 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.OwnerUserId, + p.CreationDate, + p.Score, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.Score DESC) AS RowNum, + COUNT(*) OVER (PARTITION BY p.OwnerUserId) AS TotalPosts + FROM Posts p + WHERE p.CreationDate >= cast('2024-10-01' as date) - INTERVAL '1 year' +), +PostStatistics AS ( + SELECT + rp.OwnerUserId, + COUNT(rp.PostId) AS PostCount, + AVG(rp.Score) AS AvgScore, + MAX(rp.CreationDate) AS LastPostDate + FROM RankedPosts rp + WHERE rp.RowNum = 1 + GROUP BY rp.OwnerUserId +), +UserBadges AS ( + SELECT + u.Id AS UserId, + STRING_AGG(b.Name, ', ') AS BadgeNames + FROM Users u + LEFT JOIN Badges b ON u.Id = b.UserId + GROUP BY u.Id +), +PostHistorySummary AS ( + SELECT + ph.PostId, + COUNT(ph.Id) AS EditCount, + MAX(ph.CreationDate) AS LastEditTime + FROM PostHistory ph + WHERE ph.PostHistoryTypeId IN (4, 5) + GROUP BY ph.PostId +) +SELECT + u.DisplayName, + u.Reputation, + ps.PostCount, + ps.AvgScore, + ps.LastPostDate, + ub.BadgeNames, + COALESCE(phs.EditCount, 0) AS EditCount, + phs.LastEditTime, + CASE + WHEN ps.PostCount > 5 THEN 'Active User' + WHEN ps.LastPostDate >= cast('2024-10-01' as date) - INTERVAL '6 months' THEN 'Recent Contributor' + ELSE 'Inactive User' + END AS UserStatus +FROM Users u +JOIN PostStatistics ps ON u.Id = ps.OwnerUserId +LEFT JOIN UserBadges ub ON u.Id = ub.UserId +LEFT JOIN PostHistorySummary phs ON ps.OwnerUserId = phs.PostId +WHERE u.Reputation > 1000 +ORDER BY u.Reputation DESC, ps.AvgScore DESC +LIMIT 50; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/31711.sql b/vortex-bench/sqlstorm/stackoverflow/31711.sql new file mode 100644 index 00000000000..d71fba38cf7 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/31711.sql @@ -0,0 +1,67 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.PostTypeId, + p.OwnerUserId, + p.CreationDate, + RANK() OVER (PARTITION BY p.OwnerUserId ORDER BY p.Score DESC) AS Rank, + COUNT(DISTINCT c.Id) AS CommentCount, + p.Score + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.CreationDate >= CURRENT_DATE - INTERVAL '1 year' + GROUP BY + p.Id, p.Title, p.PostTypeId, p.OwnerUserId, p.CreationDate, p.Score +), +UserBadges AS ( + SELECT + b.UserId, + COUNT(CASE WHEN b.Class = 1 THEN 1 END) AS GoldBadges, + COUNT(CASE WHEN b.Class = 2 THEN 1 END) AS SilverBadges, + COUNT(CASE WHEN b.Class = 3 THEN 1 END) AS BronzeBadges + FROM + Badges b + GROUP BY + b.UserId +), +TopPosts AS ( + SELECT + rp.PostId, + rp.Title, + rp.OwnerUserId, + ub.GoldBadges, + ub.SilverBadges, + ub.BronzeBadges, + rp.CommentCount, + ROW_NUMBER() OVER (ORDER BY rp.Score DESC) AS TopRank + FROM + RankedPosts rp + JOIN + UserBadges ub ON rp.OwnerUserId = ub.UserId + WHERE + rp.Rank = 1 +) +SELECT + p.Title, + u.DisplayName, + u.Reputation, + COALESCE(tp.GoldBadges, 0) AS GoldBadges, + COALESCE(tp.SilverBadges, 0) AS SilverBadges, + COALESCE(tp.BronzeBadges, 0) AS BronzeBadges, + tp.CommentCount +FROM + TopPosts tp +JOIN + Users u ON tp.OwnerUserId = u.Id +JOIN + Posts p ON tp.PostId = p.Id +WHERE + u.Reputation > 500 +ORDER BY + tp.CommentCount DESC, + u.Reputation DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/31859.sql b/vortex-bench/sqlstorm/stackoverflow/31859.sql new file mode 100644 index 00000000000..1c1de47b6c3 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/31859.sql @@ -0,0 +1,73 @@ + +WITH RECURSIVE UserBadgeCounts AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END) AS GoldBadgeCount, + SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END) AS SilverBadgeCount, + SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END) AS BronzeBadgeCount + FROM + Users u + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id, u.DisplayName +), +PostMetrics AS ( + SELECT + p.OwnerUserId, + COUNT(DISTINCT p.Id) AS TotalPosts, + COUNT(DISTINCT CASE WHEN p.PostTypeId = 1 THEN p.Id END) AS TotalQuestions, + COUNT(DISTINCT CASE WHEN p.PostTypeId = 2 THEN p.Id END) AS TotalAnswers, + SUM(p.Score) AS TotalScore, + AVG(p.ViewCount) AS AvgViewCount, + MAX(p.CreationDate) AS LastPostDate + FROM + Posts p + GROUP BY + p.OwnerUserId +), +CombinedMetrics AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COALESCE(ubc.GoldBadgeCount, 0) AS GoldBadgeCount, + COALESCE(ubc.SilverBadgeCount, 0) AS SilverBadgeCount, + COALESCE(ubc.BronzeBadgeCount, 0) AS BronzeBadgeCount, + COALESCE(pm.TotalPosts, 0) AS TotalPosts, + COALESCE(pm.TotalQuestions, 0) AS TotalQuestions, + COALESCE(pm.TotalAnswers, 0) AS TotalAnswers, + COALESCE(pm.TotalScore, 0) AS TotalScore, + COALESCE(pm.AvgViewCount, 0) AS AvgViewCount, + pm.LastPostDate + FROM + Users u + LEFT JOIN + UserBadgeCounts ubc ON u.Id = ubc.UserId + LEFT JOIN + PostMetrics pm ON u.Id = pm.OwnerUserId +) +SELECT + c.UserId, + c.DisplayName, + c.GoldBadgeCount, + c.SilverBadgeCount, + c.BronzeBadgeCount, + c.TotalPosts, + c.TotalQuestions, + c.TotalAnswers, + c.TotalScore, + c.AvgViewCount, + CASE + WHEN c.LastPostDate IS NOT NULL THEN DATE '2024-10-01' - c.LastPostDate + ELSE NULL + END AS DaysSinceLastPost +FROM + CombinedMetrics c +WHERE + (c.TotalQuestions > 0 OR c.TotalAnswers > 0) + AND (c.GoldBadgeCount > 0 OR c.SilverBadgeCount > 0 OR c.BronzeBadgeCount > 0) +ORDER BY + c.TotalScore DESC, + c.TotalPosts DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/stackoverflow/32025.sql b/vortex-bench/sqlstorm/stackoverflow/32025.sql new file mode 100644 index 00000000000..1c6d4a8e5b2 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/32025.sql @@ -0,0 +1,77 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.ViewCount, + p.Score, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) AS PostRank + FROM + Posts p + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + AND p.PostTypeId = 1 +), +UserActivity AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(DISTINCT p.Id) AS QuestionsAsked, + SUM(v.BountyAmount) AS TotalBountySpent + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId AND p.PostTypeId = 1 + LEFT JOIN + Votes v ON u.Id = v.UserId + WHERE + u.Reputation > 1000 + AND u.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '2 years' + GROUP BY + u.Id, u.DisplayName +), +TopUsers AS ( + SELECT + ua.UserId, + ua.DisplayName, + ua.QuestionsAsked, + ua.TotalBountySpent, + RANK() OVER (ORDER BY ua.QuestionsAsked DESC) AS UserRank + FROM + UserActivity ua + WHERE + ua.TotalBountySpent IS NOT NULL +), +PostHistorySummary AS ( + SELECT + ph.PostId, + MAX(CASE WHEN pht.Name = 'Post Closed' THEN ph.CreationDate END) AS LastCloseDate, + COUNT(CASE WHEN ph.PostHistoryTypeId = 10 THEN 1 END) AS ClosureCount + FROM + PostHistory ph + JOIN + PostHistoryTypes pht ON ph.PostHistoryTypeId = pht.Id + GROUP BY + ph.PostId +) +SELECT + rp.PostId, + rp.Title, + rp.CreationDate, + rp.ViewCount, + rp.Score, + tu.DisplayName AS TopUserDisplayName, + tu.QuestionsAsked, + tu.TotalBountySpent, + phs.LastCloseDate, + phs.ClosureCount +FROM + RankedPosts rp +LEFT JOIN + TopUsers tu ON rp.ViewCount > 100 AND tu.UserRank <= 5 +LEFT JOIN + PostHistorySummary phs ON rp.PostId = phs.PostId +WHERE + rp.PostRank <= 10 +ORDER BY + rp.Score DESC, rp.CreationDate ASC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/32348.sql b/vortex-bench/sqlstorm/stackoverflow/32348.sql new file mode 100644 index 00000000000..8c6a1d37955 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/32348.sql @@ -0,0 +1,93 @@ + +WITH RECURSIVE RecursivePostHierarchy AS ( + SELECT + Id AS PostId, + Title, + ParentId, + CreationDate, + 0 AS Level + FROM + Posts + WHERE + ParentId IS NULL + + UNION ALL + + SELECT + p.Id AS PostId, + p.Title, + p.ParentId, + p.CreationDate, + r.Level + 1 + FROM + Posts p + INNER JOIN + RecursivePostHierarchy r ON p.ParentId = r.PostId +), +PostStats AS ( + SELECT + p.Id AS PostId, + COUNT(c.Id) AS CommentCount, + COUNT(DISTINCT v.UserId) FILTER (WHERE v.VoteTypeId = 2) AS UpvoteCount, + COUNT(DISTINCT v.UserId) FILTER (WHERE v.VoteTypeId = 3) AS DownvoteCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 WHEN v.VoteTypeId = 3 THEN -1 ELSE 0 END) AS NetVotes, + MAX(ph.CreationDate) AS LastHistoryUpdate + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + LEFT JOIN + PostHistory ph ON p.Id = ph.PostId + GROUP BY + p.Id +), +UserBadges AS ( + SELECT + u.Id AS UserId, + COUNT(b.Id) FILTER (WHERE b.Class = 1) AS GoldBadges, + COUNT(b.Id) FILTER (WHERE b.Class = 2) AS SilverBadges, + COUNT(b.Id) FILTER (WHERE b.Class = 3) AS BronzeBadges, + SUM(CASE WHEN b.TagBased THEN 1 ELSE 0 END) AS TagBasedBadges + FROM + Users u + LEFT JOIN + Badges b ON u.Id = b.UserId + GROUP BY + u.Id +) +SELECT + p.Title AS PostTitle, + p.CreationDate AS PostCreationDate, + ps.CommentCount, + ps.UpvoteCount, + ps.DownvoteCount, + ps.NetVotes, + COALESCE(u.DisplayName, 'Unknown User') AS OwnerDisplayName, + ub.GoldBadges, + ub.SilverBadges, + ub.BronzeBadges, + ph.Level AS PostLevel, + ph.ParentId AS ParentPostId, + CASE + WHEN ps.LastHistoryUpdate IS NOT NULL AND ps.LastHistoryUpdate < TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 year' + THEN 'Stale Post' + ELSE 'Active Post' + END AS PostStatus +FROM + Posts p +LEFT JOIN + PostStats ps ON p.Id = ps.PostId +LEFT JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + UserBadges ub ON u.Id = ub.UserId +LEFT JOIN + RecursivePostHierarchy ph ON p.Id = ph.PostId +WHERE + p.CreationDate > TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 year' +ORDER BY + ps.NetVotes DESC, + ps.CommentCount DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/stackoverflow/33282.sql b/vortex-bench/sqlstorm/stackoverflow/33282.sql new file mode 100644 index 00000000000..7ee541706ef --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/33282.sql @@ -0,0 +1,79 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + u.Id AS UserId, + u.DisplayName AS OwnerDisplayName, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.Score DESC) AS RankPerUser + FROM Posts p + JOIN Users u ON p.OwnerUserId = u.Id + WHERE p.CreationDate >= cast('2024-10-01' as date) - INTERVAL '1 year' + AND p.PostTypeId = 1 +), +PostVoteCounts AS ( + SELECT + v.PostId, + COUNT(CASE WHEN v.VoteTypeId = 2 THEN 1 END) AS UpVotes, + COUNT(CASE WHEN v.VoteTypeId = 3 THEN 1 END) AS DownVotes + FROM Votes v + GROUP BY v.PostId +), +AggregatedData AS ( + SELECT + rp.PostId, + rp.Title, + rp.CreationDate, + rp.Score, + rp.ViewCount, + rp.OwnerDisplayName, + COALESCE(pvc.UpVotes, 0) AS TotalUpVotes, + COALESCE(pvc.DownVotes, 0) AS TotalDownVotes, + rp.RankPerUser + FROM RankedPosts rp + LEFT JOIN PostVoteCounts pvc ON rp.PostId = pvc.PostId +), +TopPosts AS ( + SELECT + PostId, + Title, + CreationDate, + Score, + ViewCount, + OwnerDisplayName, + TotalUpVotes, + TotalDownVotes, + RankPerUser + FROM AggregatedData + WHERE RankPerUser <= 5 +), +PostHistorySummary AS ( + SELECT + ph.PostId, + MIN(ph.CreationDate) AS FirstHistoryDate, + COUNT(*) AS TotalEdits, + SUM(CASE WHEN ph.PostHistoryTypeId = 10 THEN 1 ELSE 0 END) AS CloseVotes + FROM PostHistory ph + GROUP BY ph.PostId +) +SELECT + tp.PostId, + tp.Title, + tp.OwnerDisplayName, + tp.CreationDate, + tp.Score, + tp.ViewCount, + tp.TotalUpVotes, + tp.TotalDownVotes, + phs.FirstHistoryDate, + phs.TotalEdits, + phs.CloseVotes, + CASE + WHEN phs.TotalEdits > 0 THEN 'Edited' + ELSE 'Not Edited' + END AS EditStatus +FROM TopPosts tp +LEFT JOIN PostHistorySummary phs ON tp.PostId = phs.PostId +ORDER BY tp.Score DESC, tp.CreationDate DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/33473.sql b/vortex-bench/sqlstorm/stackoverflow/33473.sql new file mode 100644 index 00000000000..eef332e3d7f --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/33473.sql @@ -0,0 +1,65 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) AS RankScore, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + AND p.Score > 0 + GROUP BY + p.Id, p.Title, p.CreationDate, p.Score, p.ViewCount, p.PostTypeId +), +RecentBadges AS ( + SELECT + b.UserId, + COUNT(b.Id) AS BadgeCount, + STRING_AGG(b.Name, ', ') AS BadgeNames + FROM + Badges b + WHERE + b.Date >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + GROUP BY + b.UserId +), +PostLinkCount AS ( + SELECT + pl.PostId, + COUNT(pl.RelatedPostId) AS RelatedPostsCount + FROM + PostLinks pl + GROUP BY + pl.PostId +) +SELECT + rp.PostId, + rp.Title, + rp.CreationDate, + rp.Score, + rp.CommentCount, + rp.UpVotes, + rp.DownVotes, + COALESCE(rb.BadgeCount, 0) AS RecentBadgesCount, + COALESCE(rb.BadgeNames, 'No Badges') AS RecentBadgeNames, + COALESCE(plc.RelatedPostsCount, 0) AS RelatedPostsCount +FROM + RankedPosts rp +LEFT JOIN + RecentBadges rb ON rp.PostId = rb.UserId +LEFT JOIN + PostLinkCount plc ON rp.PostId = plc.PostId +WHERE + rp.RankScore <= 5 +ORDER BY + rp.Score DESC, rp.CreationDate DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/3382.sql b/vortex-bench/sqlstorm/stackoverflow/3382.sql new file mode 100644 index 00000000000..a4c8886d928 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/3382.sql @@ -0,0 +1,71 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.OwnerUserId, + COUNT(c.Id) AS CommentCount, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.CreationDate DESC) AS RN + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + GROUP BY + p.Id, p.Title, p.CreationDate, p.OwnerUserId +), +UserStatistics AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END) AS BronzeBadges, + COALESCE(SUM(v.BountyAmount), 0) AS TotalBounty + FROM + Users u + LEFT JOIN + Badges b ON u.Id = b.UserId + LEFT JOIN + Votes v ON u.Id = v.UserId AND v.VoteTypeId = 9 + WHERE + u.Reputation > 1000 + GROUP BY + u.Id, u.DisplayName +), +ClosedPosts AS ( + SELECT + ph.PostId, + ph.CreationDate, + MAX(ph.CreationDate) AS LastClosedDate + FROM + PostHistory ph + WHERE + ph.PostHistoryTypeId = 10 + GROUP BY + ph.PostId, ph.CreationDate +) +SELECT + p.Title, + p.CommentCount, + u.DisplayName AS Owner, + us.GoldBadges, + us.SilverBadges, + us.BronzeBadges, + us.TotalBounty, + cp.LastClosedDate, + COALESCE(CASE WHEN cp.LastClosedDate IS NOT NULL THEN 'Closed' ELSE 'Open' END, 'Unknown') AS PostStatus +FROM + RankedPosts p +JOIN + Users u ON p.OwnerUserId = u.Id +JOIN + UserStatistics us ON u.Id = us.UserId +LEFT JOIN + ClosedPosts cp ON p.PostId = cp.PostId +WHERE + p.RN = 1 +ORDER BY + p.CreationDate DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/33929.sql b/vortex-bench/sqlstorm/stackoverflow/33929.sql new file mode 100644 index 00000000000..2e5e6921287 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/33929.sql @@ -0,0 +1,99 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + p.OwnerUserId, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.CreationDate DESC) AS PostRank + FROM + Posts p + WHERE + p.PostTypeId = 1 +), +UserReputation AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + u.Reputation, + COUNT(DISTINCT p.Id) AS QuestionsCount, + SUM(COALESCE(v.BountyAmount, 0)) AS TotalBounty + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId AND p.PostTypeId = 1 + LEFT JOIN + Votes v ON p.Id = v.PostId AND v.VoteTypeId IN (8, 9) + WHERE + u.Reputation > 0 + GROUP BY + u.Id, u.DisplayName, u.Reputation +), +TopUsers AS ( + SELECT + ur.UserId, + ur.DisplayName, + ur.Reputation, + ur.QuestionsCount, + ur.TotalBounty, + DENSE_RANK() OVER (ORDER BY ur.Reputation DESC) AS ReputationRank + FROM + UserReputation ur + WHERE + ur.QuestionsCount > 5 +), +OpenQuestions AS ( + SELECT + p.Id, + p.Title, + p.CreationDate, + c.UserDisplayName AS LastCommenter, + ROW_NUMBER() OVER (PARTITION BY p.Id ORDER BY c.CreationDate DESC) AS LatestCommentRank + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.PostTypeId = 1 AND p.ClosedDate IS NULL +), +FinalResults AS ( + SELECT + tp.UserId, + tp.DisplayName, + tp.Reputation, + tp.QuestionsCount, + tp.TotalBounty, + rq.PostId, + rq.Title, + rq.CreationDate, + rq.Score, + rq.ViewCount, + oq.LastCommenter + FROM + TopUsers tp + JOIN + RankedPosts rq ON tp.UserId = rq.OwnerUserId + LEFT JOIN + OpenQuestions oq ON rq.PostId = oq.Id + WHERE + tp.ReputationRank <= 10 +) + +SELECT + fr.UserId, + fr.DisplayName, + fr.Reputation, + fr.QuestionsCount, + fr.TotalBounty, + fr.PostId, + fr.Title AS PostTitle, + fr.CreationDate, + fr.Score, + fr.ViewCount, + COALESCE(fr.LastCommenter, 'No comments yet') AS LastCommenter +FROM + FinalResults fr +ORDER BY + fr.Reputation DESC, + fr.CreationDate DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/33961.sql b/vortex-bench/sqlstorm/stackoverflow/33961.sql new file mode 100644 index 00000000000..145224d73df --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/33961.sql @@ -0,0 +1,82 @@ + +WITH RecentPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + u.DisplayName AS OwnerDisplayName, + COUNT(DISTINCT c.Id) AS CommentCount, + ROW_NUMBER() OVER (PARTITION BY u.Id ORDER BY p.CreationDate DESC) AS UserPostRank + FROM + Posts p + JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.CreationDate >= TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '30 days' AND + p.PostTypeId = 1 + GROUP BY + p.Id, p.Title, p.CreationDate, p.Score, p.ViewCount, u.DisplayName, u.Id +), +TopUsers AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + SUM(p.Score) AS TotalScore, + DENSE_RANK() OVER (ORDER BY SUM(p.Score) DESC) AS UserRank + FROM + Users u + JOIN + Posts p ON u.Id = p.OwnerUserId + WHERE + p.PostTypeId = 1 + GROUP BY + u.Id, u.DisplayName + HAVING + COUNT(p.Id) > 5 +), +PostHistoryAggregated AS ( + SELECT + ph.PostId, + STRING_AGG(ph.Comment, ', ') AS EditComments, + MAX(ph.CreationDate) AS LastEditDate, + MAX(ph.UserDisplayName) AS LastEditedBy + FROM + PostHistory ph + WHERE + ph.PostHistoryTypeId IN (4, 5, 6) + GROUP BY + ph.PostId +) +SELECT + rp.PostId, + rp.Title, + rp.CreationDate, + rp.Score, + rp.ViewCount, + rp.OwnerDisplayName, + rp.CommentCount, + tu.TotalScore AS OwnerTotalScore, + tu.UserRank AS OwnerRank, + pha.EditComments, + pha.LastEditDate, + pha.LastEditedBy, + CASE + WHEN rp.CommentCount >= 10 THEN 'Hot' + WHEN rp.Score > 100 THEN 'Popular' + ELSE 'Normal' + END AS PostStatus +FROM + RecentPosts rp +LEFT JOIN + TopUsers tu ON rp.OwnerDisplayName = tu.DisplayName +LEFT JOIN + PostHistoryAggregated pha ON rp.PostId = pha.PostId +WHERE + rp.UserPostRank = 1 +ORDER BY + rp.Score DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/stackoverflow/4592.sql b/vortex-bench/sqlstorm/stackoverflow/4592.sql new file mode 100644 index 00000000000..acd1bbbe74e --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/4592.sql @@ -0,0 +1,62 @@ + +WITH UserStatistics AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + u.Reputation, + u.CreationDate, + u.UpVotes, + u.DownVotes, + DENSE_RANK() OVER (ORDER BY u.Reputation DESC) AS ReputationRank, + ROW_NUMBER() OVER (PARTITION BY u.Location ORDER BY u.Reputation DESC) AS LocationRank, + u.Location + FROM Users u +), +PostDetails AS ( + SELECT + p.Id AS PostId, + p.OwnerUserId, + p.Title, + p.Score, + p.ViewCount, + p.CreationDate, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpvoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownvoteCount + FROM Posts p + LEFT JOIN Comments c ON p.Id = c.PostId + LEFT JOIN Votes v ON p.Id = v.PostId + GROUP BY p.Id, p.OwnerUserId, p.Title, p.Score, p.ViewCount, p.CreationDate +), +TopPosts AS ( + SELECT + pd.PostId, + pd.Title, + pd.Score, + pd.ViewCount, + pd.CommentCount, + ROW_NUMBER() OVER (ORDER BY pd.Score DESC, pd.ViewCount DESC) AS ScoreRank + FROM PostDetails pd + WHERE pd.Score > 0 +) +SELECT + us.DisplayName AS UserName, + us.Reputation, + pp.Title AS PostTitle, + pp.Score, + pp.ViewCount, + pp.CommentCount, + CASE + WHEN us.Location IS NULL THEN 'Unknown Location' + ELSE us.Location + END AS UserLocation, + pht.Name AS PostHistoryType +FROM UserStatistics us +LEFT JOIN Posts p ON us.UserId = p.OwnerUserId +LEFT JOIN TopPosts pp ON p.Id = pp.PostId +LEFT JOIN PostHistory ph ON p.Id = ph.PostId +LEFT JOIN PostHistoryTypes pht ON ph.PostHistoryTypeId = pht.Id +WHERE us.ReputationRank <= 50 + AND (pp.ScoreRank <= 10 OR pp.ViewCount > 100) + AND pp.CommentCount > COALESCE((SELECT AVG(CommentCount) FROM PostDetails), 0) +ORDER BY us.Reputation DESC, pp.Score DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/4726.sql b/vortex-bench/sqlstorm/stackoverflow/4726.sql new file mode 100644 index 00000000000..289270447bd --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/4726.sql @@ -0,0 +1,55 @@ +WITH UserPostStats AS ( + SELECT + u.Id AS UserId, + u.Reputation, + COUNT(p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + SUM(CASE WHEN p.PostTypeId = 1 AND p.AcceptedAnswerId IS NOT NULL THEN 1 ELSE 0 END) AS AcceptedAnswers + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + GROUP BY + u.Id, u.Reputation +), +TopUsers AS ( + SELECT + UserId, + Reputation, + TotalPosts, + TotalAnswers, + AcceptedAnswers, + RANK() OVER (ORDER BY Reputation DESC) AS ReputationRank + FROM + UserPostStats +), +UserBadges AS ( + SELECT + b.UserId, + COUNT(b.Id) AS BadgeCount, + STRING_AGG(b.Name, ', ') AS BadgeNames + FROM + Badges b + GROUP BY + b.UserId +) +SELECT + u.UserId, + u.Reputation, + u.TotalPosts, + u.TotalAnswers, + u.AcceptedAnswers, + ub.BadgeCount, + COALESCE(ub.BadgeNames, 'No Badges') AS BadgeNames, + CASE + WHEN u.AcceptedAnswers > 0 THEN 'Yes' + ELSE 'No' + END AS HasAcceptedAnswers +FROM + TopUsers u +LEFT JOIN + UserBadges ub ON u.UserId = ub.UserId +WHERE + u.ReputationRank <= 10 +ORDER BY + u.Reputation DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/4812.sql b/vortex-bench/sqlstorm/stackoverflow/4812.sql new file mode 100644 index 00000000000..455b25f2d46 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/4812.sql @@ -0,0 +1,28 @@ +WITH MostActiveUsers AS ( + SELECT u.Id, u.DisplayName, COUNT(p.Id) AS PostCount, SUM(COALESCE(p.ViewCount, 0)) AS TotalViews + FROM Users u + JOIN Posts p ON u.Id = p.OwnerUserId + WHERE u.Reputation > 1000 + GROUP BY u.Id, u.DisplayName +), UserBadges AS ( + SELECT b.UserId, COUNT(b.Id) AS BadgeCount, MAX(b.Class) AS HighestBadgeClass + FROM Badges b + GROUP BY b.UserId +), UserPostStats AS ( + SELECT ua.Id, ua.DisplayName, ua.PostCount, ua.TotalViews, + COALESCE(ub.BadgeCount, 0) AS BadgeCount, + COALESCE(ub.HighestBadgeClass, 0) AS HighestBadgeClass + FROM MostActiveUsers ua + LEFT JOIN UserBadges ub ON ua.Id = ub.UserId +), RecentPosts AS ( + SELECT p.Id, p.Title, p.CreationDate, p.OwnerUserId, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.CreationDate DESC) AS rn + FROM Posts p + WHERE p.CreationDate > cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '30 days' +) +SELECT ups.DisplayName, ups.PostCount, ups.TotalViews, ups.BadgeCount, + ups.HighestBadgeClass, rp.Title AS LatestPostTitle, rp.CreationDate AS LatestPostDate +FROM UserPostStats ups +LEFT JOIN RecentPosts rp ON ups.Id = rp.OwnerUserId AND rp.rn = 1 +WHERE ups.TotalViews > 100 +ORDER BY ups.PostCount DESC, ups.TotalViews DESC, ups.BadgeCount DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/5214.sql b/vortex-bench/sqlstorm/stackoverflow/5214.sql new file mode 100644 index 00000000000..a3c8191e7f1 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/5214.sql @@ -0,0 +1,45 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpvoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownvoteCount, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY COUNT(c.Id) DESC, SUM(v.VoteTypeId) DESC) AS Rank + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + GROUP BY + p.Id, p.Title, p.PostTypeId +), TopPosts AS ( + SELECT + rp.PostId, + rp.Title, + rp.CommentCount, + rp.UpvoteCount, + rp.DownvoteCount + FROM + RankedPosts rp + WHERE + rp.Rank <= 5 +) +SELECT + tp.PostId, + tp.Title, + tp.CommentCount, + tp.UpvoteCount, + tp.DownvoteCount, + CASE + WHEN tp.UpvoteCount - tp.DownvoteCount > 0 THEN 'Positive' + WHEN tp.UpvoteCount - tp.DownvoteCount < 0 THEN 'Negative' + ELSE 'Neutral' + END AS Sentiment +FROM + TopPosts tp +ORDER BY + tp.UpvoteCount DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/5416.sql b/vortex-bench/sqlstorm/stackoverflow/5416.sql new file mode 100644 index 00000000000..277421d3121 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/5416.sql @@ -0,0 +1,59 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.ViewCount, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY p.ViewCount DESC) AS ViewRank, + COUNT(DISTINCT c.Id) AS CommentCount, + p.OwnerUserId + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.CreationDate >= TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '365 days' AND + p.PostTypeId = 1 + GROUP BY + p.Id, p.Title, p.CreationDate, p.ViewCount, p.OwnerUserId +), +UserStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + SUM(COALESCE(p.ViewCount, 0)) AS TotalViews, + COUNT(DISTINCT p.Id) AS TotalPosts, + COALESCE(SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END), 0) AS GoldBadges, + COALESCE(SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END), 0) AS SilverBadges, + COALESCE(SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END), 0) AS BronzeBadges + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Badges b ON u.Id = b.UserId + WHERE + u.Reputation > 1000 + GROUP BY + u.Id, u.DisplayName +) +SELECT + us.DisplayName, + us.TotalPosts, + us.TotalViews, + us.GoldBadges, + us.SilverBadges, + us.BronzeBadges, + rp.Title, + rp.CreationDate, + rp.ViewCount, + rp.CommentCount +FROM + UserStats us +JOIN + RankedPosts rp ON us.UserId = rp.OwnerUserId +WHERE + rp.ViewRank <= 3 +ORDER BY + us.TotalViews DESC, rp.ViewCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/5790.sql b/vortex-bench/sqlstorm/stackoverflow/5790.sql new file mode 100644 index 00000000000..eed5bc9fd28 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/5790.sql @@ -0,0 +1,49 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.ViewCount, + p.Score, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC, p.ViewCount DESC) AS Rank + FROM Posts p + JOIN Users u ON p.OwnerUserId = u.Id + LEFT JOIN Comments c ON p.Id = c.PostId + WHERE p.CreationDate >= TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 year' + GROUP BY p.Id, p.Title, p.CreationDate, p.ViewCount, p.Score, u.DisplayName, p.PostTypeId +), +TopPosts AS ( + SELECT + PostId, + Title, + CreationDate, + ViewCount, + Score, + OwnerDisplayName + FROM RankedPosts + WHERE Rank <= 10 +), +VoteSummary AS ( + SELECT + PostId, + SUM(CASE WHEN VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes + FROM Votes + GROUP BY PostId +) +SELECT + tp.PostId, + tp.Title, + tp.CreationDate, + tp.ViewCount, + tp.Score, + tp.OwnerDisplayName, + COALESCE(vs.UpVotes, 0) AS UpVotes, + COALESCE(vs.DownVotes, 0) AS DownVotes, + (tp.ViewCount + COALESCE(vs.UpVotes, 0) - COALESCE(vs.DownVotes, 0)) AS EngagementScore +FROM TopPosts tp +LEFT JOIN VoteSummary vs ON tp.PostId = vs.PostId +ORDER BY EngagementScore DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/5914.sql b/vortex-bench/sqlstorm/stackoverflow/5914.sql new file mode 100644 index 00000000000..5c455c26839 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/5914.sql @@ -0,0 +1,55 @@ + +WITH UserReputation AS ( + SELECT + u.Id AS UserId, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes, + COUNT(DISTINCT p.Id) AS PostCount, + COALESCE(SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END), 0) AS QuestionCount, + COALESCE(SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END), 0) AS AnswerCount, + COALESCE(SUM(CASE WHEN p.AcceptedAnswerId IS NOT NULL THEN 1 ELSE 0 END), 0) AS AcceptedAnswerCount, + u.Reputation + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Votes v ON p.Id = v.PostId + WHERE + u.Reputation > 1000 + GROUP BY + u.Id, u.Reputation +), +UserBadges AS ( + SELECT + b.UserId, + COUNT(*) FILTER (WHERE b.Class = 1) AS GoldBadges, + COUNT(*) FILTER (WHERE b.Class = 2) AS SilverBadges, + COUNT(*) FILTER (WHERE b.Class = 3) AS BronzeBadges + FROM + Badges b + GROUP BY + b.UserId +) +SELECT + ur.UserId, + u.DisplayName, + ur.UpVotes, + ur.DownVotes, + ur.PostCount, + ur.QuestionCount, + ur.AnswerCount, + ur.AcceptedAnswerCount, + COALESCE(ub.GoldBadges, 0) AS GoldBadges, + COALESCE(ub.SilverBadges, 0) AS SilverBadges, + COALESCE(ub.BronzeBadges, 0) AS BronzeBadges, + (ur.UpVotes - ur.DownVotes) AS NetVotes +FROM + UserReputation ur +JOIN + Users u ON ur.UserId = u.Id +LEFT JOIN + UserBadges ub ON ur.UserId = ub.UserId +ORDER BY + NetVotes DESC, ur.Reputation DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/stackoverflow/5999.sql b/vortex-bench/sqlstorm/stackoverflow/5999.sql new file mode 100644 index 00000000000..f5f66cc312a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/5999.sql @@ -0,0 +1,43 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + u.DisplayName AS AuthorName, + p.ViewCount, + p.Score, + ROW_NUMBER() OVER (PARTITION BY pt.Name ORDER BY p.ViewCount DESC) AS RankByViews, + ROW_NUMBER() OVER (PARTITION BY pt.Name ORDER BY p.Score DESC) AS RankByScore + FROM + Posts p + JOIN + PostTypes pt ON p.PostTypeId = pt.Id + JOIN + Users u ON p.OwnerUserId = u.Id + WHERE + p.CreationDate > (cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '30 days') + AND + p.ViewCount > 100 +) + +SELECT + rp.PostId, + rp.Title, + rp.CreationDate, + rp.AuthorName, + rp.ViewCount, + rp.Score, + CASE + WHEN rp.RankByViews <= 10 THEN 'Top 10 Viewed' + ELSE 'Other' + END AS ViewRankCategory, + CASE + WHEN rp.RankByScore <= 10 THEN 'Top 10 Scored' + ELSE 'Other' + END AS ScoreRankCategory +FROM + RankedPosts rp +WHERE + rp.RankByViews <= 10 OR rp.RankByScore <= 10 +ORDER BY + rp.ViewCount DESC, rp.Score DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/6041.sql b/vortex-bench/sqlstorm/stackoverflow/6041.sql new file mode 100644 index 00000000000..4f39efcc25d --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/6041.sql @@ -0,0 +1,39 @@ +WITH PostActivity AS ( + SELECT + p.Id AS PostId, + p.Title, + u.DisplayName AS OwnerDisplayName, + p.CreationDate, + p.LastActivityDate, + p.ViewCount, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes, + COUNT(DISTINCT ph.Id) AS HistoryCount + FROM Posts p + LEFT JOIN Users u ON p.OwnerUserId = u.Id + LEFT JOIN Comments c ON p.Id = c.PostId + LEFT JOIN Votes v ON p.Id = v.PostId + LEFT JOIN PostHistory ph ON p.Id = ph.PostId + WHERE p.CreationDate >= cast('2024-10-01' as date) - INTERVAL '1 year' AND p.PostTypeId = 1 + GROUP BY p.Id, p.Title, u.DisplayName, p.CreationDate, p.LastActivityDate, p.ViewCount +), +PostRanked AS ( + SELECT + pa.*, + RANK() OVER (ORDER BY pa.ViewCount DESC, pa.UpVotes DESC, pa.LastActivityDate DESC) AS Rank + FROM PostActivity pa +) +SELECT + Rank, + Title, + OwnerDisplayName, + CreationDate, + LastActivityDate, + ViewCount, + CommentCount, + UpVotes, + DownVotes, + HistoryCount +FROM PostRanked +WHERE Rank <= 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/6245.sql b/vortex-bench/sqlstorm/stackoverflow/6245.sql new file mode 100644 index 00000000000..3fc5a4ddec9 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/6245.sql @@ -0,0 +1,50 @@ + +WITH UserActivity AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + COUNT(DISTINCT P.Id) AS TotalPosts, + COUNT(DISTINCT C.Id) AS TotalComments, + SUM(CASE WHEN V.VoteTypeId = 2 THEN 1 ELSE 0 END) AS TotalUpVotes, + SUM(CASE WHEN V.VoteTypeId = 3 THEN 1 ELSE 0 END) AS TotalDownVotes, + SUM(P.Score) AS TotalScore + FROM + Users U + LEFT JOIN + Posts P ON U.Id = P.OwnerUserId + LEFT JOIN + Comments C ON P.Id = C.PostId + LEFT JOIN + Votes V ON P.Id = V.PostId + WHERE + U.Reputation > 1000 + GROUP BY + U.Id, U.DisplayName +), +TopUsers AS ( + SELECT + UserId, + DisplayName, + TotalPosts, + TotalComments, + TotalUpVotes, + TotalDownVotes, + TotalScore, + RANK() OVER (ORDER BY TotalScore DESC) AS Rank + FROM + UserActivity +) +SELECT + T.DisplayName, + T.TotalPosts, + T.TotalComments, + T.TotalUpVotes, + T.TotalDownVotes, + T.TotalScore, + T.Rank +FROM + TopUsers T +WHERE + T.Rank <= 10 +ORDER BY + T.Rank; diff --git a/vortex-bench/sqlstorm/stackoverflow/6597.sql b/vortex-bench/sqlstorm/stackoverflow/6597.sql new file mode 100644 index 00000000000..e5bcf20e91a --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/6597.sql @@ -0,0 +1,48 @@ +WITH UserStats AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + U.Reputation, + COUNT(DISTINCT P.Id) AS PostCount, + SUM(CASE WHEN P.ViewCount > 1000 THEN 1 ELSE 0 END) AS PopularPosts, + SUM(CASE WHEN P.Score > 50 THEN 1 ELSE 0 END) AS HighScorePosts + FROM Users U + LEFT JOIN Posts P ON U.Id = P.OwnerUserId + WHERE U.Reputation > 100 + GROUP BY U.Id, U.DisplayName, U.Reputation +), +BadgeCounts AS ( + SELECT + B.UserId, + COUNT(*) AS BadgeCount + FROM Badges B + GROUP BY B.UserId +), +Report AS ( + SELECT + US.UserId, + US.DisplayName, + US.Reputation, + US.PostCount, + US.PopularPosts, + US.HighScorePosts, + COALESCE(BC.BadgeCount, 0) AS BadgeCount + FROM UserStats US + LEFT JOIN BadgeCounts BC ON US.UserId = BC.UserId +) +SELECT + R.DisplayName, + R.Reputation, + R.PostCount, + R.PopularPosts, + R.HighScorePosts, + R.BadgeCount, + CASE + WHEN R.Reputation > 1000 THEN 'Elite' + WHEN R.Reputation > 500 THEN 'Pro' + ELSE 'Novice' + END AS UserTier +FROM Report R +WHERE R.PostCount > 10 +ORDER BY R.Reputation DESC, R.PostCount DESC +LIMIT 20; diff --git a/vortex-bench/sqlstorm/stackoverflow/6625.sql b/vortex-bench/sqlstorm/stackoverflow/6625.sql new file mode 100644 index 00000000000..ae651faf1c7 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/6625.sql @@ -0,0 +1,41 @@ +WITH RankedPosts AS ( + SELECT + p.Id, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + u.DisplayName AS OwnerDisplayName, + DENSE_RANK() OVER (PARTITION BY p.OwnerUserId ORDER BY p.Score DESC) AS PostRank + FROM + Posts p + JOIN + Users u ON p.OwnerUserId = u.Id + WHERE + p.PostTypeId = 1 + AND p.CreationDate >= '2022-01-01' +), +MaxRank AS ( + SELECT + OwnerDisplayName, + MAX(PostRank) AS MaxPostRank + FROM + RankedPosts + GROUP BY + OwnerDisplayName +) +SELECT + rp.Title, + rp.CreationDate, + rp.Score, + rp.ViewCount, + rp.OwnerDisplayName +FROM + RankedPosts rp +JOIN + MaxRank mr ON rp.OwnerDisplayName = mr.OwnerDisplayName +WHERE + rp.PostRank = mr.MaxPostRank +ORDER BY + rp.Score DESC, + rp.ViewCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/7047.sql b/vortex-bench/sqlstorm/stackoverflow/7047.sql new file mode 100644 index 00000000000..e4ab6ef04e7 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/7047.sql @@ -0,0 +1,59 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + COALESCE(SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END), 0) AS UpVotes, + COALESCE(SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END), 0) AS DownVotes, + COUNT(DISTINCT c.Id) AS CommentCount, + ROW_NUMBER() OVER (PARTITION BY p.OwnerUserId ORDER BY COALESCE(SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END), 0) DESC) AS VoteRank, + p.OwnerUserId + FROM + Posts p + LEFT JOIN + Votes v ON p.Id = v.PostId + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.CreationDate >= CURRENT_TIMESTAMP - INTERVAL '1 year' + GROUP BY + p.Id, p.OwnerUserId, p.Title, p.CreationDate +), +UserStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(DISTINCT p.Id) AS TotalPosts, + SUM(CASE WHEN COALESCE(b.Class, 0) = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN COALESCE(b.Class, 0) = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN COALESCE(b.Class, 0) = 3 THEN 1 ELSE 0 END) AS BronzeBadges + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Badges b ON u.Id = b.UserId + WHERE + u.Reputation > 1000 + GROUP BY + u.Id, u.DisplayName +) +SELECT + us.DisplayName, + us.TotalPosts, + us.GoldBadges, + us.SilverBadges, + us.BronzeBadges, + COUNT(rp.PostId) AS ActivePostCount, + AVG(rp.UpVotes - rp.DownVotes) AS AverageVoteDifference +FROM + UserStats us +LEFT JOIN + RankedPosts rp ON us.UserId = rp.OwnerUserId +WHERE + rp.VoteRank <= 5 +GROUP BY + us.DisplayName, us.TotalPosts, us.GoldBadges, us.SilverBadges, us.BronzeBadges +ORDER BY + AverageVoteDifference DESC, us.TotalPosts ASC; diff --git a/vortex-bench/sqlstorm/stackoverflow/7073.sql b/vortex-bench/sqlstorm/stackoverflow/7073.sql new file mode 100644 index 00000000000..65e05bc37ef --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/7073.sql @@ -0,0 +1,61 @@ + +WITH UserActivity AS ( + SELECT + U.Id AS UserId, + U.DisplayName, + COALESCE(SUM(CASE WHEN V.VoteTypeId = 2 THEN 1 ELSE 0 END), 0) AS UpVotes, + COALESCE(SUM(CASE WHEN V.VoteTypeId = 3 THEN 1 ELSE 0 END), 0) AS DownVotes, + COUNT(DISTINCT P.Id) AS PostCount, + COUNT(DISTINCT C.Id) AS CommentCount, + COUNT(DISTINCT B.Id) AS BadgeCount + FROM Users U + LEFT JOIN Posts P ON U.Id = P.OwnerUserId + LEFT JOIN Comments C ON P.Id = C.PostId + LEFT JOIN Votes V ON P.Id = V.PostId AND V.UserId = U.Id + LEFT JOIN Badges B ON U.Id = B.UserId + WHERE U.CreationDate >= '2023-01-01' + GROUP BY U.Id, U.DisplayName +), +PostStatistics AS ( + SELECT + P.Id AS PostId, + P.Title, + P.CreationDate, + P.ViewCount, + P.Score, + P.Tags, + COUNT(DISTINCT C.Id) AS CommentCount, + COUNT(DISTINCT V.Id) AS VoteCount + FROM Posts P + LEFT JOIN Comments C ON P.Id = C.PostId + LEFT JOIN Votes V ON P.Id = V.PostId + GROUP BY P.Id, P.Title, P.CreationDate, P.ViewCount, P.Score, P.Tags +), +TopPosts AS ( + SELECT + PS.PostId, + PS.Title, + PS.CreationDate, + PS.ViewCount, + PS.Score, + PS.Tags, + PS.CommentCount, + PS.VoteCount, + ROW_NUMBER() OVER (ORDER BY PS.Score DESC, PS.ViewCount DESC) AS Rank + FROM PostStatistics PS +) +SELECT + UA.DisplayName, + UA.UpVotes, + UA.DownVotes, + UA.PostCount, + UA.CommentCount, + UA.BadgeCount, + TP.Title AS TopPostTitle, + TP.ViewCount AS TopPostViewCount, + TP.Score AS TopPostScore, + TP.Rank +FROM UserActivity UA +LEFT JOIN TopPosts TP ON UA.UserId = TP.PostId +WHERE TP.Rank <= 10 +ORDER BY UA.BadgeCount DESC, UA.UpVotes DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/7397.sql b/vortex-bench/sqlstorm/stackoverflow/7397.sql new file mode 100644 index 00000000000..b74683a4b44 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/7397.sql @@ -0,0 +1,43 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.ViewCount, + p.Score, + COUNT(c.Id) AS CommentCount, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC, p.ViewCount DESC) AS Rank + FROM + Posts p + LEFT JOIN + Comments c ON p.Id = c.PostId + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + GROUP BY + p.Id, p.Title, p.ViewCount, p.Score, p.PostTypeId +), +TopPosts AS ( + SELECT + rp.PostId, + rp.Title, + rp.ViewCount, + rp.Score, + rp.CommentCount, + CASE + WHEN rp.Rank <= 10 THEN 'Top 10' + ELSE 'Other' + END AS RankingCategory + FROM + RankedPosts rp +) +SELECT + tp.RankingCategory, + AVG(tp.ViewCount) AS AvgViewCount, + SUM(tp.CommentCount) AS TotalComments, + SUM(tp.Score) AS TotalScore, + COUNT(tp.PostId) AS TotalPosts +FROM + TopPosts tp +GROUP BY + tp.RankingCategory +ORDER BY + tp.RankingCategory DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/7612.sql b/vortex-bench/sqlstorm/stackoverflow/7612.sql new file mode 100644 index 00000000000..26eaed22c3d --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/7612.sql @@ -0,0 +1,57 @@ + +WITH UserStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + u.Reputation, + COUNT(DISTINCT p.Id) AS PostCount, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS QuestionCount, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS AnswerCount, + SUM(CASE WHEN p.PostTypeId IN (10, 11, 12) THEN 1 ELSE 0 END) AS ClosedPosts, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes + FROM Users u + LEFT JOIN Posts p ON u.Id = p.OwnerUserId + LEFT JOIN Votes v ON p.Id = v.PostId + GROUP BY u.Id, u.DisplayName, u.Reputation +), BadgeStats AS ( + SELECT + b.UserId, + COUNT(b.Id) AS BadgeCount, + COUNT(CASE WHEN b.Class = 1 THEN 1 END) AS GoldBadges, + COUNT(CASE WHEN b.Class = 2 THEN 1 END) AS SilverBadges, + COUNT(CASE WHEN b.Class = 3 THEN 1 END) AS BronzeBadges + FROM Badges b + GROUP BY b.UserId +), PostHistoryStats AS ( + SELECT + ph.UserId, + COUNT(ph.Id) AS EditCount, + SUM(CASE WHEN ph.PostHistoryTypeId IN (4, 5, 6) THEN 1 ELSE 0 END) AS TitleEdits, + SUM(CASE WHEN ph.PostHistoryTypeId IN (10, 11) THEN 1 ELSE 0 END) AS ClosedPostChanges + FROM PostHistory ph + GROUP BY ph.UserId +) +SELECT + us.UserId, + us.DisplayName, + us.Reputation, + us.PostCount, + us.QuestionCount, + us.AnswerCount, + us.ClosedPosts, + us.UpVotes, + us.DownVotes, + bs.BadgeCount, + bs.GoldBadges, + bs.SilverBadges, + bs.BronzeBadges, + phs.EditCount, + phs.TitleEdits, + phs.ClosedPostChanges +FROM UserStats us +LEFT JOIN BadgeStats bs ON us.UserId = bs.UserId +LEFT JOIN PostHistoryStats phs ON us.UserId = phs.UserId +WHERE us.Reputation > 1000 +ORDER BY us.Reputation DESC, us.PostCount DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/stackoverflow/7693.sql b/vortex-bench/sqlstorm/stackoverflow/7693.sql new file mode 100644 index 00000000000..bcf06c59703 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/7693.sql @@ -0,0 +1,60 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + u.DisplayName AS Author, + p.CreationDate, + p.ViewCount, + p.Score, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC, p.ViewCount DESC) AS Rank, + COUNT(DISTINCT v.Id) AS VoteCount, + COUNT(c.Id) AS CommentCount, + COUNT(b.Id) AS BadgeCount + FROM + Posts p + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN + Votes v ON p.Id = v.PostId + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Badges b ON u.Id = b.UserId + WHERE + p.CreationDate > CURRENT_DATE - INTERVAL '1 year' + GROUP BY + p.Id, p.Title, u.DisplayName, p.CreationDate, p.ViewCount, p.Score, p.PostTypeId +), +FilteredPosts AS ( + SELECT + rp.PostId, + rp.Title, + rp.Author, + rp.CreationDate, + rp.ViewCount, + rp.Score, + rp.Rank, + rp.VoteCount, + rp.CommentCount, + rp.BadgeCount + FROM + RankedPosts rp + WHERE + rp.Rank <= 10 +) +SELECT + fp.PostId, + fp.Title, + fp.Author, + fp.CreationDate, + fp.ViewCount, + fp.Score, + fp.VoteCount, + fp.CommentCount, + fp.BadgeCount +FROM + FilteredPosts fp +ORDER BY + fp.Score DESC, + fp.ViewCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/7876.sql b/vortex-bench/sqlstorm/stackoverflow/7876.sql new file mode 100644 index 00000000000..775167da4be --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/7876.sql @@ -0,0 +1,71 @@ + +WITH UserStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(DISTINCT p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS Questions, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS Answers, + SUM(CASE WHEN p.PostTypeId = 3 THEN 1 ELSE 0 END) AS Wikis, + SUM(p.ViewCount) AS TotalViews, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS Upvotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS Downvotes + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Votes v ON p.Id = v.PostId + WHERE + u.Reputation > 1000 + GROUP BY + u.Id, u.DisplayName +), +BadgeCounts AS ( + SELECT + UserId, + COUNT(*) AS TotalBadges, + SUM(CASE WHEN Class = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN Class = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN Class = 3 THEN 1 ELSE 0 END) AS BronzeBadges + FROM + Badges + GROUP BY + UserId +), +PostHistoryAnalytics AS ( + SELECT + ph.UserId, + COUNT(*) AS EditsCount, + SUM(CASE WHEN ph.PostHistoryTypeId IN (4, 5) THEN 1 ELSE 0 END) AS TitleAndBodyEdits, + SUM(CASE WHEN ph.PostHistoryTypeId IN (10, 11) THEN 1 ELSE 0 END) AS CloseReopenCounts + FROM + PostHistory ph + GROUP BY + ph.UserId +) +SELECT + us.UserId, + us.DisplayName, + us.TotalPosts, + us.Questions, + us.Answers, + us.Wikis, + us.TotalViews, + us.Upvotes, + us.Downvotes, + COALESCE(bc.TotalBadges, 0) AS TotalBadges, + COALESCE(bc.GoldBadges, 0) AS GoldBadges, + COALESCE(bc.SilverBadges, 0) AS SilverBadges, + COALESCE(bc.BronzeBadges, 0) AS BronzeBadges, + COALESCE(ph.EditsCount, 0) AS EditsCount, + COALESCE(ph.TitleAndBodyEdits, 0) AS TitleAndBodyEdits, + COALESCE(ph.CloseReopenCounts, 0) AS CloseReopenCounts +FROM + UserStats us +LEFT JOIN + BadgeCounts bc ON us.UserId = bc.UserId +LEFT JOIN + PostHistoryAnalytics ph ON us.UserId = ph.UserId +ORDER BY + us.TotalPosts DESC, us.Upvotes DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/7917.sql b/vortex-bench/sqlstorm/stackoverflow/7917.sql new file mode 100644 index 00000000000..8bfa205aa29 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/7917.sql @@ -0,0 +1,71 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + p.AnswerCount, + COALESCE(u.DisplayName, 'Community') AS OwnerDisplayName, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) AS Rank + FROM + Posts p + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + WHERE + p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' +), +PostStats AS ( + SELECT + PostId, + Title, + CreationDate, + Score, + ViewCount, + AnswerCount, + OwnerDisplayName, + Rank + FROM + RankedPosts + WHERE + Rank <= 10 +), +CommentsInfo AS ( + SELECT + c.PostId, + COUNT(c.Id) AS CommentCount, + MAX(c.CreationDate) AS LastCommentDate + FROM + Comments c + GROUP BY + c.PostId +), +FinalReport AS ( + SELECT + ps.PostId, + ps.Title, + ps.CreationDate, + ps.Score, + ps.ViewCount, + ps.AnswerCount, + ps.OwnerDisplayName, + ci.CommentCount, + ci.LastCommentDate + FROM + PostStats ps + LEFT JOIN + CommentsInfo ci ON ps.PostId = ci.PostId +) +SELECT + Title, + CreationDate, + Score, + ViewCount, + AnswerCount, + OwnerDisplayName, + COALESCE(CommentCount, 0) AS CommentCount, + LastCommentDate +FROM + FinalReport +ORDER BY + Score DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/8042.sql b/vortex-bench/sqlstorm/stackoverflow/8042.sql new file mode 100644 index 00000000000..8c6e4d2fde0 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/8042.sql @@ -0,0 +1,59 @@ +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.Score, + p.CreationDate, + u.DisplayName AS OwnerDisplayName, + COUNT(c.Id) AS CommentCount, + COUNT(DISTINCT v.Id) AS VoteCount, + ROW_NUMBER() OVER (PARTITION BY p.Id ORDER BY p.CreationDate DESC) AS PostRank + FROM + Posts p + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + WHERE + p.PostTypeId = 1 + GROUP BY + p.Id, p.Title, p.Score, p.CreationDate, u.DisplayName +), +TopPosts AS ( + SELECT + PostId, + Title, + Score, + CreationDate, + OwnerDisplayName, + CommentCount, + VoteCount + FROM + RankedPosts + WHERE + PostRank = 1 + ORDER BY + Score DESC, CreationDate DESC + LIMIT 10 +) +SELECT + tp.Title, + tp.Score, + tp.CreationDate, + tp.OwnerDisplayName, + tp.CommentCount, + tp.VoteCount, + pht.Name AS PostHistoryType, + COUNT(ph.Id) AS HistoryCount +FROM + TopPosts tp +LEFT JOIN + PostHistory ph ON tp.PostId = ph.PostId +LEFT JOIN + PostHistoryTypes pht ON ph.PostHistoryTypeId = pht.Id +GROUP BY + tp.PostId, tp.Title, tp.Score, tp.CreationDate, tp.OwnerDisplayName, tp.CommentCount, tp.VoteCount, pht.Name +ORDER BY + tp.Score DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/stackoverflow/8708.sql b/vortex-bench/sqlstorm/stackoverflow/8708.sql new file mode 100644 index 00000000000..1c1faa812ba --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/8708.sql @@ -0,0 +1,53 @@ + +WITH UserPostStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(p.Id) AS TotalPosts, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS TotalQuestions, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS TotalAnswers, + SUM(CASE WHEN p.Score > 0 THEN 1 ELSE 0 END) AS PositivePosts, + SUM(CASE WHEN p.Score < 0 THEN 1 ELSE 0 END) AS NegativePosts, + SUM(p.ViewCount) AS TotalViews + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + GROUP BY + u.Id, u.DisplayName +), +TopUsers AS ( + SELECT + UserId, + DisplayName, + TotalPosts, + TotalQuestions, + TotalAnswers, + PositivePosts, + NegativePosts, + TotalViews, + RANK() OVER (ORDER BY TotalPosts DESC) AS RankByPosts, + RANK() OVER (ORDER BY TotalViews DESC) AS RankByViews + FROM + UserPostStats +) +SELECT + tu.DisplayName, + tu.TotalPosts, + tu.TotalQuestions, + tu.TotalAnswers, + tu.PositivePosts, + tu.NegativePosts, + tu.TotalViews, + CASE + WHEN tu.RankByPosts <= 10 THEN 'Top Contributor' + WHEN tu.RankByViews <= 10 THEN 'Popular User' + ELSE 'Regular User' + END AS UserCategory +FROM + TopUsers tu +WHERE + tu.TotalPosts > 50 OR tu.TotalViews > 1000 +ORDER BY + tu.TotalPosts DESC, + tu.TotalViews DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/8858.sql b/vortex-bench/sqlstorm/stackoverflow/8858.sql new file mode 100644 index 00000000000..5db99388252 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/8858.sql @@ -0,0 +1,55 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + u.DisplayName AS OwnerName, + p.CreationDate, + p.Score, + COUNT(DISTINCT c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVoteCount, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) AS Rank + FROM + Posts p + JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + WHERE + p.CreationDate >= TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 year' + GROUP BY + p.Id, p.Title, u.DisplayName, p.CreationDate, p.Score, p.PostTypeId +), +TopPosts AS ( + SELECT + rp.PostId, + rp.Title, + rp.OwnerName, + rp.CreationDate, + rp.Score, + rp.CommentCount, + rp.UpVoteCount, + rp.DownVoteCount + FROM + RankedPosts rp + WHERE + rp.Rank <= 10 +) +SELECT + tp.Title, + tp.OwnerName, + tp.CreationDate, + tp.Score, + tp.CommentCount, + tp.UpVoteCount, + tp.DownVoteCount, + COALESCE(b.Name, 'No Badge') AS BadgeName +FROM + TopPosts tp +LEFT JOIN + Badges b ON tp.PostId = b.UserId +ORDER BY + tp.Score DESC, tp.CommentCount DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/9087.sql b/vortex-bench/sqlstorm/stackoverflow/9087.sql new file mode 100644 index 00000000000..99c9b33f817 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/9087.sql @@ -0,0 +1,50 @@ + +WITH RankedPosts AS ( + SELECT + p.Id AS PostId, + p.Title, + p.CreationDate, + p.Score, + p.ViewCount, + u.DisplayName AS OwnerDisplayName, + RANK() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC, p.CreationDate DESC) AS RankScore, + COUNT(c.Id) AS CommentCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpvoteCount, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownvoteCount, + p.PostTypeId -- Added to the GROUP BY clause + FROM + Posts p + LEFT JOIN + Users u ON p.OwnerUserId = u.Id + LEFT JOIN + Comments c ON p.Id = c.PostId + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + p.Id, p.Title, p.CreationDate, p.Score, p.ViewCount, u.DisplayName, p.PostTypeId -- Added p.PostTypeId +), FilteredPosts AS ( + SELECT + rp.*, + pt.Name AS PostTypeName + FROM + RankedPosts rp + JOIN + PostTypes pt ON rp.PostTypeId = pt.Id + WHERE + rp.RankScore <= 5 +) +SELECT + fp.PostId, + fp.Title, + fp.PostTypeName, + fp.CreationDate, + fp.Score, + fp.ViewCount, + fp.CommentCount, + fp.UpvoteCount, + fp.DownvoteCount, + fp.OwnerDisplayName +FROM + FilteredPosts fp +ORDER BY + fp.PostTypeName, fp.RankScore; diff --git a/vortex-bench/sqlstorm/stackoverflow/9110.sql b/vortex-bench/sqlstorm/stackoverflow/9110.sql new file mode 100644 index 00000000000..3aae3c8282b --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/9110.sql @@ -0,0 +1,52 @@ + +WITH UserStats AS ( + SELECT + u.Id AS UserId, + u.DisplayName, + COUNT(DISTINCT p.Id) AS PostCount, + SUM(CASE WHEN p.PostTypeId = 1 THEN 1 ELSE 0 END) AS QuestionCount, + SUM(CASE WHEN p.PostTypeId = 2 THEN 1 ELSE 0 END) AS AnswerCount, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN v.VoteTypeId = 3 THEN 1 ELSE 0 END) AS DownVotes + FROM + Users u + LEFT JOIN + Posts p ON u.Id = p.OwnerUserId + LEFT JOIN + Votes v ON p.Id = v.PostId + GROUP BY + u.Id, u.DisplayName +), +BadgeCounts AS ( + SELECT + b.UserId, + COUNT(b.Id) AS BadgeCount, + SUM(CASE WHEN b.Class = 1 THEN 1 ELSE 0 END) AS GoldBadges, + SUM(CASE WHEN b.Class = 2 THEN 1 ELSE 0 END) AS SilverBadges, + SUM(CASE WHEN b.Class = 3 THEN 1 ELSE 0 END) AS BronzeBadges + FROM + Badges b + GROUP BY + b.UserId +) +SELECT + us.UserId, + us.DisplayName, + us.PostCount, + us.QuestionCount, + us.AnswerCount, + COALESCE(bc.BadgeCount, 0) AS TotalBadges, + COALESCE(bc.GoldBadges, 0) AS GoldBadges, + COALESCE(bc.SilverBadges, 0) AS SilverBadges, + COALESCE(bc.BronzeBadges, 0) AS BronzeBadges, + us.UpVotes, + us.DownVotes +FROM + UserStats us +LEFT JOIN + BadgeCounts bc ON us.UserId = bc.UserId +WHERE + us.PostCount > 10 +ORDER BY + us.UpVotes DESC, us.DownVotes ASC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/stackoverflow/9214.sql b/vortex-bench/sqlstorm/stackoverflow/9214.sql new file mode 100644 index 00000000000..80195789b3c --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/9214.sql @@ -0,0 +1,26 @@ + +SELECT + pt.Name AS PostType, + COUNT(p.Id) AS TotalPosts, + AVG(u.Reputation) AS AverageUserReputation, + SUM(CASE WHEN v.VoteTypeId = 2 THEN 1 ELSE 0 END) AS TotalUpVotes, + COUNT(DISTINCT c.Id) AS TotalComments, + COUNT(DISTINCT b.Id) AS TotalBadges +FROM + Posts p +JOIN + PostTypes pt ON p.PostTypeId = pt.Id +LEFT JOIN + Users u ON p.OwnerUserId = u.Id +LEFT JOIN + Votes v ON p.Id = v.PostId +LEFT JOIN + Comments c ON p.Id = c.PostId +LEFT JOIN + Badges b ON u.Id = b.UserId +WHERE + p.CreationDate >= TIMESTAMP '2024-10-01 12:34:56' - INTERVAL '1 year' +GROUP BY + pt.Name, u.Reputation +ORDER BY + TotalPosts DESC, AverageUserReputation DESC; diff --git a/vortex-bench/sqlstorm/stackoverflow/9563.sql b/vortex-bench/sqlstorm/stackoverflow/9563.sql new file mode 100644 index 00000000000..81d4c0fd790 --- /dev/null +++ b/vortex-bench/sqlstorm/stackoverflow/9563.sql @@ -0,0 +1,40 @@ +WITH RankedPosts AS ( + SELECT p.Id AS PostId, + p.Title, + p.Score, + p.CreationDate, + u.DisplayName AS Author, + ROW_NUMBER() OVER (PARTITION BY p.PostTypeId ORDER BY p.Score DESC) as PostRank + FROM Posts p + JOIN Users u ON p.OwnerUserId = u.Id + WHERE p.CreationDate >= cast('2024-10-01 12:34:56' as timestamp) - INTERVAL '1 year' + AND p.ViewCount > 100 +), +AggregatedVotes AS ( + SELECT v.PostId, + SUM(CASE WHEN vt.Name = 'UpMod' THEN 1 ELSE 0 END) AS UpVotes, + SUM(CASE WHEN vt.Name = 'DownMod' THEN 1 ELSE 0 END) AS DownVotes + FROM Votes v + JOIN VoteTypes vt ON v.VoteTypeId = vt.Id + GROUP BY v.PostId +), +TopPosts AS ( + SELECT rp.PostId, + rp.Title, + rp.Score, + rp.CreationDate, + rp.Author, + av.UpVotes, + av.DownVotes + FROM RankedPosts rp + LEFT JOIN AggregatedVotes av ON rp.PostId = av.PostId + WHERE rp.PostRank <= 5 +) +SELECT tp.Title, + tp.Score, + tp.CreationDate, + tp.Author, + COALESCE(tp.UpVotes, 0) AS TotalUpVotes, + COALESCE(tp.DownVotes, 0) AS TotalDownVotes +FROM TopPosts tp +ORDER BY tp.Score DESC, tp.CreationDate DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/10175.sql b/vortex-bench/sqlstorm/tpcds/10175.sql new file mode 100644 index 00000000000..89caac7c7d5 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10175.sql @@ -0,0 +1,20 @@ + +SELECT + c.c_customer_id, + c.c_first_name, + c.c_last_name, + SUM(ss.ss_sales_price) AS total_sales, + d.d_year +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_customer_id, c.c_first_name, c.c_last_name, d.d_year +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/10181.sql b/vortex-bench/sqlstorm/tpcds/10181.sql new file mode 100644 index 00000000000..8e65b6ac71f --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10181.sql @@ -0,0 +1,19 @@ + +SELECT + c.c_first_name, + c.c_last_name, + SUM(ss.ss_sales_price) AS total_sales, + COUNT(DISTINCT ss.ss_ticket_number) AS sales_count +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_first_name, c.c_last_name +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/10220.sql b/vortex-bench/sqlstorm/tpcds/10220.sql new file mode 100644 index 00000000000..3115f0def84 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10220.sql @@ -0,0 +1,19 @@ + +SELECT + c.c_customer_id, + SUM(ss.ss_sales_price) AS total_sales, + COUNT(ss.ss_item_sk) AS item_count, + AVG(ss.ss_sales_price) AS avg_sales_price +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_customer_id +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/10508.sql b/vortex-bench/sqlstorm/tpcds/10508.sql new file mode 100644 index 00000000000..8455c4a43e3 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10508.sql @@ -0,0 +1,18 @@ + +SELECT + c.c_customer_id, + ca.ca_city, + SUM(ws.ws_ext_sales_price) AS total_sales +FROM + customer c +JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +WHERE + ws.ws_sold_date_sk BETWEEN 1000 AND 1010 +GROUP BY + c.c_customer_id, ca.ca_city +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/10628.sql b/vortex-bench/sqlstorm/tpcds/10628.sql new file mode 100644 index 00000000000..214f8bbc2c1 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10628.sql @@ -0,0 +1,20 @@ + +SELECT + c.c_first_name, + c.c_last_name, + SUM(ss.ss_quantity) AS total_quantity_sold, + SUM(ss.ss_net_paid) AS total_net_paid +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_first_name, + c.c_last_name +ORDER BY + total_net_paid DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/10755.sql b/vortex-bench/sqlstorm/tpcds/10755.sql new file mode 100644 index 00000000000..1ffb81ffda9 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10755.sql @@ -0,0 +1,18 @@ + +SELECT + c.c_customer_id, + SUM(ws.ws_quantity) AS total_quantity_sold, + SUM(ws.ws_sales_price * ws.ws_quantity) AS total_sales +FROM + customer c +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_customer_id +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/10768.sql b/vortex-bench/sqlstorm/tpcds/10768.sql new file mode 100644 index 00000000000..d2e4f22f793 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10768.sql @@ -0,0 +1,20 @@ + +SELECT + c.c_customer_id, + c.c_first_name, + c.c_last_name, + SUM(ss.ss_net_paid) AS total_sales, + COUNT(DISTINCT ss.ss_ticket_number) AS total_transactions +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_customer_id, c.c_first_name, c.c_last_name +ORDER BY + total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/10889.sql b/vortex-bench/sqlstorm/tpcds/10889.sql new file mode 100644 index 00000000000..2d5af8ad428 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/10889.sql @@ -0,0 +1,19 @@ + +SELECT + c.c_customer_id, + SUM(ss.ss_sales_price) AS total_sales, + COUNT(ss.ss_ticket_number) AS total_transactions, + AVG(ss.ss_sales_price) AS average_sales_price, + MAX(ss.ss_sales_price) AS max_sales_price +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + c.c_birth_year BETWEEN 1980 AND 1990 + AND ss.ss_sold_date_sk IN (SELECT d_date_sk FROM date_dim WHERE d_year = 2023) +GROUP BY + c.c_customer_id +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/11801.sql b/vortex-bench/sqlstorm/tpcds/11801.sql new file mode 100644 index 00000000000..f53021a3bb0 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/11801.sql @@ -0,0 +1,18 @@ + +SELECT + c.c_first_name, + c.c_last_name, + SUM(ws.ws_sales_price) AS total_sales +FROM + customer AS c +JOIN + web_sales AS ws ON c.c_customer_sk = ws.ws_bill_customer_sk +JOIN + date_dim AS d ON ws.ws_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_first_name, c.c_last_name +ORDER BY + total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/12025.sql b/vortex-bench/sqlstorm/tpcds/12025.sql new file mode 100644 index 00000000000..442a6ebf444 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/12025.sql @@ -0,0 +1,21 @@ + +SELECT + c.c_customer_id, + ca.ca_city, + cd.cd_gender, + SUM(ss.ss_net_profit) AS total_net_profit +FROM + customer c +JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + ca.ca_state = 'CA' +GROUP BY + c.c_customer_id, ca.ca_city, cd.cd_gender +ORDER BY + total_net_profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/12221.sql b/vortex-bench/sqlstorm/tpcds/12221.sql new file mode 100644 index 00000000000..49c89403383 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/12221.sql @@ -0,0 +1,8 @@ +SELECT w.w_warehouse_name, COUNT(ss.ss_ticket_number) as total_sales, SUM(ss.ss_net_profit) as total_profit +FROM warehouse w +JOIN store s ON w.w_warehouse_sk = s.s_store_sk +JOIN store_sales ss ON s.s_store_sk = ss.ss_store_sk +WHERE ss.ss_sold_date_sk BETWEEN 2451565 AND 2451592 +GROUP BY w.w_warehouse_name +ORDER BY total_profit DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/12377.sql b/vortex-bench/sqlstorm/tpcds/12377.sql new file mode 100644 index 00000000000..8ac15dd2321 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/12377.sql @@ -0,0 +1,18 @@ + +SELECT + c.c_customer_id, + SUM(ss.ss_quantity) AS total_quantity_sold, + SUM(ss.ss_sales_price) AS total_sales_amount +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + item i ON ss.ss_item_sk = i.i_item_sk +WHERE + i.i_current_price > 50.00 +GROUP BY + c.c_customer_id +ORDER BY + total_sales_amount DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/12492.sql b/vortex-bench/sqlstorm/tpcds/12492.sql new file mode 100644 index 00000000000..c75875ee05d --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/12492.sql @@ -0,0 +1,16 @@ + +SELECT + c.c_customer_id, + SUM(ss.ss_sales_price) AS total_sales, + COUNT(ss.ss_ticket_number) AS total_transactions +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + ss.ss_sold_date_sk BETWEEN 10001 AND 10031 +GROUP BY + c.c_customer_id +ORDER BY + total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/12750.sql b/vortex-bench/sqlstorm/tpcds/12750.sql new file mode 100644 index 00000000000..f8e68934b44 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/12750.sql @@ -0,0 +1,24 @@ + +SELECT + c.c_customer_id, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_sales_price) AS total_sales, + COUNT(ws.ws_order_number) AS total_orders, + d.d_year +FROM + customer c +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_customer_id, + c.c_first_name, + c.c_last_name, + d.d_year +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/12885.sql b/vortex-bench/sqlstorm/tpcds/12885.sql new file mode 100644 index 00000000000..9d6181b9ea9 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/12885.sql @@ -0,0 +1,17 @@ + +SELECT + c.c_first_name, + c.c_last_name, + SUM(ss.ss_sales_price) AS total_sales, + COUNT(ss.ss_ticket_number) AS total_transactions +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + c.c_birth_year BETWEEN 1980 AND 1990 +GROUP BY + c.c_first_name, c.c_last_name +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/12959.sql b/vortex-bench/sqlstorm/tpcds/12959.sql new file mode 100644 index 00000000000..6b130147412 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/12959.sql @@ -0,0 +1,17 @@ + +SELECT + c.c_first_name, + c.c_last_name, + SUM(ws.ws_sales_price) AS total_spent, + COUNT(ws.ws_order_number) AS total_orders +FROM + customer c +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +WHERE + ws.ws_sold_date_sk BETWEEN 20200101 AND 20201231 +GROUP BY + c.c_first_name, c.c_last_name +ORDER BY + total_spent DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/13258.sql b/vortex-bench/sqlstorm/tpcds/13258.sql new file mode 100644 index 00000000000..62045bd395d --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/13258.sql @@ -0,0 +1,16 @@ +SELECT + c.c_customer_id, + SUM(ws.ws_ext_sales_price) AS total_sales +FROM + customer c +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2001 +GROUP BY + c.c_customer_id +ORDER BY + total_sales DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/13338.sql b/vortex-bench/sqlstorm/tpcds/13338.sql new file mode 100644 index 00000000000..9e475fbbd04 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/13338.sql @@ -0,0 +1,14 @@ + +SELECT + c.c_first_name, + c.c_last_name, + SUM(ws.ws_sales_price) AS total_sales +FROM + customer c +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +GROUP BY + c.c_first_name, c.c_last_name +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/13345.sql b/vortex-bench/sqlstorm/tpcds/13345.sql new file mode 100644 index 00000000000..9071d7f213c --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/13345.sql @@ -0,0 +1,17 @@ + +SELECT + COUNT(DISTINCT c.c_customer_id) AS unique_customers, + SUM(ss.ss_net_paid) AS total_sales, + AVG(ss.ss_sales_price) AS average_sales_price +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + d.d_month_seq +ORDER BY + d.d_month_seq; diff --git a/vortex-bench/sqlstorm/tpcds/1339.sql b/vortex-bench/sqlstorm/tpcds/1339.sql new file mode 100644 index 00000000000..aa15b9d7887 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/1339.sql @@ -0,0 +1,58 @@ + +WITH DailySales AS ( + SELECT + dd.d_date AS SaleDate, + SUM(ws.ws_sales_price * ws.ws_quantity) AS TotalSales, + COUNT(DISTINCT ws.ws_order_number) AS TotalOrders, + AVG(ws.ws_sales_price) AS AvgOrderValue + FROM + web_sales ws + JOIN + date_dim dd ON ws.ws_sold_date_sk = dd.d_date_sk + GROUP BY + dd.d_date +), +TopCustomers AS ( + SELECT + c.c_customer_id, + SUM(ws.ws_sales_price * ws.ws_quantity) AS CustomerTotalSpent + FROM + customer c + JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + GROUP BY + c.c_customer_id + ORDER BY + CustomerTotalSpent DESC + LIMIT 10 +), +SalesWithRanking AS ( + SELECT + ds.SaleDate, + ds.TotalSales, + ds.TotalOrders, + ds.AvgOrderValue, + RANK() OVER (ORDER BY ds.TotalSales DESC) AS SalesRank + FROM + DailySales ds +) +SELECT + s.SaleDate, + s.TotalSales, + s.TotalOrders, + s.AvgOrderValue, + tc.c_customer_id AS TopCustomer, + tc.CustomerTotalSpent, + CASE + WHEN s.AvgOrderValue IS NULL THEN 'No Sales' + WHEN s.AvgOrderValue < 100 THEN 'Low' + ELSE 'High' + END AS SalesCategory +FROM + SalesWithRanking s +LEFT JOIN + TopCustomers tc ON tc.CustomerTotalSpent BETWEEN 500 AND 10000 +WHERE + s.SalesRank <= 5 OR tc.c_customer_id IS NOT NULL +ORDER BY + s.TotalSales DESC, TopCustomer DESC; diff --git a/vortex-bench/sqlstorm/tpcds/13485.sql b/vortex-bench/sqlstorm/tpcds/13485.sql new file mode 100644 index 00000000000..16ca3368702 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/13485.sql @@ -0,0 +1,17 @@ + +SELECT + c.c_customer_id, + SUM(ws.ws_net_profit) AS total_net_profit +FROM + customer c +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2022 +GROUP BY + c.c_customer_id +ORDER BY + total_net_profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/13517.sql b/vortex-bench/sqlstorm/tpcds/13517.sql new file mode 100644 index 00000000000..8900a4c7d4c --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/13517.sql @@ -0,0 +1,18 @@ + +SELECT + c.c_customer_id, + SUM(ss.ss_net_profit) AS total_profit, + COUNT(ss.ss_ticket_number) AS total_sales +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 +GROUP BY + c.c_customer_id +ORDER BY + total_profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/13551.sql b/vortex-bench/sqlstorm/tpcds/13551.sql new file mode 100644 index 00000000000..fb3bc871e5b --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/13551.sql @@ -0,0 +1,17 @@ + +SELECT + c.c_customer_id, + SUM(ss.ss_sales_price) AS total_sales, + AVG(ss.ss_net_profit) AS average_profit, + COUNT(DISTINCT ss.ss_ticket_number) AS number_of_purchases +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + ss.ss_sold_date_sk BETWEEN 1 AND 30 +GROUP BY + c.c_customer_id +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/13679.sql b/vortex-bench/sqlstorm/tpcds/13679.sql new file mode 100644 index 00000000000..33be2ad27b1 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/13679.sql @@ -0,0 +1,20 @@ + +SELECT + ca_state, + COUNT(DISTINCT c_customer_sk) AS num_customers, + SUM(ss_net_profit) AS total_net_profit +FROM + customer_address +JOIN + customer ON customer.c_current_addr_sk = customer_address.ca_address_sk +JOIN + store_sales ON store_sales.ss_customer_sk = customer.c_customer_sk +JOIN + date_dim ON date_dim.d_date_sk = store_sales.ss_sold_date_sk +WHERE + d_year = 2023 +GROUP BY + ca_state +ORDER BY + total_net_profit DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/14065.sql b/vortex-bench/sqlstorm/tpcds/14065.sql new file mode 100644 index 00000000000..723767d1415 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/14065.sql @@ -0,0 +1,20 @@ + +SELECT + c.c_customer_id, + ca.ca_city, + SUM(ss.ss_sales_price) AS total_sales, + COUNT(ss.ss_ticket_number) AS total_transactions +FROM + customer c +JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + ca.ca_state = 'CA' + AND ss.ss_sold_date_sk BETWEEN 1000 AND 2000 +GROUP BY + c.c_customer_id, ca.ca_city +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/14852.sql b/vortex-bench/sqlstorm/tpcds/14852.sql new file mode 100644 index 00000000000..b993ab0102a --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/14852.sql @@ -0,0 +1,19 @@ + +SELECT + c.c_customer_id, + ca.ca_city, + SUM(ss.ss_quantity) AS total_quantity_sold, + SUM(ss.ss_sales_price) AS total_sales +FROM + customer AS c +JOIN + customer_address AS ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + store_sales AS ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + ca.ca_state = 'CA' +GROUP BY + c.c_customer_id, ca.ca_city +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/14875.sql b/vortex-bench/sqlstorm/tpcds/14875.sql new file mode 100644 index 00000000000..d72bd4a8aae --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/14875.sql @@ -0,0 +1,19 @@ + +SELECT + ca_state, + COUNT(DISTINCT c_customer_sk) AS num_customers, + SUM(ss_net_profit) AS total_net_profit +FROM + customer_address +JOIN + customer ON ca_address_sk = c_current_addr_sk +JOIN + store_sales ON c_customer_sk = ss_customer_sk +JOIN + date_dim ON ss_sold_date_sk = d_date_sk +WHERE + d_year = 2023 +GROUP BY + ca_state +ORDER BY + total_net_profit DESC; diff --git a/vortex-bench/sqlstorm/tpcds/14910.sql b/vortex-bench/sqlstorm/tpcds/14910.sql new file mode 100644 index 00000000000..b54d41646ee --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/14910.sql @@ -0,0 +1,26 @@ + +WITH sales_summary AS ( + SELECT + w.w_warehouse_id, + SUM(ws.ws_sales_price) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + COUNT(DISTINCT ws.ws_bill_customer_sk) AS total_customers + FROM + web_sales ws + JOIN + warehouse w ON ws.ws_warehouse_sk = w.w_warehouse_sk + WHERE + ws.ws_sold_date_sk BETWEEN 1 AND 1000 + GROUP BY + w.w_warehouse_id +) +SELECT + ss.w_warehouse_id, + ss.total_sales, + ss.total_orders, + ss.total_customers, + (ss.total_sales / NULLIF(ss.total_orders, 0)) AS avg_order_value +FROM + sales_summary ss +ORDER BY + ss.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/1505.sql b/vortex-bench/sqlstorm/tpcds/1505.sql new file mode 100644 index 00000000000..0f89feee325 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/1505.sql @@ -0,0 +1,56 @@ + +WITH RankedSales AS ( + SELECT + ws.ws_order_number, + ws.ws_item_sk, + ws.ws_quantity, + ws.ws_sales_price, + ws.ws_net_profit, + DENSE_RANK() OVER (PARTITION BY ws.ws_item_sk ORDER BY ws.ws_sales_price DESC) AS price_rank + FROM + web_sales ws + WHERE + ws.ws_sold_date_sk IN (SELECT d_date_sk FROM date_dim WHERE d_year = 2023) +), +TotalReturns AS ( + SELECT + wr_item_sk, + SUM(wr_return_quantity) AS total_returned_quantity, + SUM(wr_return_amt) AS total_return_amount + FROM + web_returns + GROUP BY + wr_item_sk +), +SalesAndReturns AS ( + SELECT + r.ws_item_sk, + r.ws_quantity, + r.ws_sales_price, + COALESCE(tr.total_returned_quantity, 0) AS total_returned_quantity, + COALESCE(tr.total_return_amount, 0) AS total_return_amount + FROM + RankedSales r + LEFT JOIN + TotalReturns tr ON r.ws_item_sk = tr.wr_item_sk + WHERE + r.price_rank = 1 +) +SELECT + s.ws_item_sk, + SUM(s.ws_quantity) AS total_sales_quantity, + SUM(s.ws_sales_price) AS total_sales_revenue, + SUM(s.total_returned_quantity) AS total_returns, + SUM(s.total_return_amount) AS total_returned_amount, + (SUM(ws.ws_net_profit) - SUM(s.total_return_amount)) AS net_profit +FROM + SalesAndReturns s +JOIN + web_sales ws ON s.ws_item_sk = ws.ws_item_sk +GROUP BY + s.ws_item_sk +HAVING + (SUM(ws.ws_net_profit) - SUM(s.total_return_amount)) > 0 +ORDER BY + net_profit DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/15237.sql b/vortex-bench/sqlstorm/tpcds/15237.sql new file mode 100644 index 00000000000..d967b6fe73c --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/15237.sql @@ -0,0 +1,14 @@ + +SELECT + c.c_first_name, + c.c_last_name, + SUM(ss.ss_sales_price) AS total_sales +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +GROUP BY + c.c_first_name, c.c_last_name +ORDER BY + total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/15887.sql b/vortex-bench/sqlstorm/tpcds/15887.sql new file mode 100644 index 00000000000..f403556d4bc --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/15887.sql @@ -0,0 +1,18 @@ + +SELECT + c.c_first_name, + c.c_last_name, + ca.ca_city, + ca.ca_state, + SUM(ss.ss_sales_price) AS total_sales +FROM + customer AS c +JOIN + customer_address AS ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + store_sales AS ss ON c.c_customer_sk = ss.ss_customer_sk +GROUP BY + c.c_first_name, c.c_last_name, ca.ca_city, ca.ca_state +ORDER BY + total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/16049.sql b/vortex-bench/sqlstorm/tpcds/16049.sql new file mode 100644 index 00000000000..f07b3db7bdf --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/16049.sql @@ -0,0 +1,6 @@ + +SELECT ca_state, COUNT(*) as customer_count +FROM customer_address +GROUP BY ca_state +ORDER BY customer_count DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/16350.sql b/vortex-bench/sqlstorm/tpcds/16350.sql new file mode 100644 index 00000000000..5f8740395dc --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/16350.sql @@ -0,0 +1,18 @@ + +SELECT + c.c_first_name, + c.c_last_name, + ca.ca_city, + ss.ss_quantity, + ss.ss_sales_price +FROM + customer c +JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +WHERE + ca.ca_state = 'CA' +ORDER BY + ss.ss_sales_price DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/16490.sql b/vortex-bench/sqlstorm/tpcds/16490.sql new file mode 100644 index 00000000000..0e0285ae03b --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/16490.sql @@ -0,0 +1,8 @@ + +SELECT c_first_name, c_last_name, ca_city, ca_state, SUM(ws_sales_price) AS total_sales +FROM customer c +JOIN customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +GROUP BY c_first_name, c_last_name, ca_city, ca_state +ORDER BY total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/16524.sql b/vortex-bench/sqlstorm/tpcds/16524.sql new file mode 100644 index 00000000000..1598764987a --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/16524.sql @@ -0,0 +1,13 @@ + +SELECT + ca_city, + COUNT(DISTINCT c_customer_id) AS customer_count +FROM + customer_address +JOIN + customer ON customer.c_current_addr_sk = ca_address_sk +GROUP BY + ca_city +ORDER BY + customer_count DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/1731.sql b/vortex-bench/sqlstorm/tpcds/1731.sql new file mode 100644 index 00000000000..fc78c5d9015 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/1731.sql @@ -0,0 +1,59 @@ + +WITH ranked_sales AS ( + SELECT + cs_item_sk, + SUM(cs_quantity) AS total_quantity, + SUM(cs_net_profit) AS total_net_profit, + DENSE_RANK() OVER (PARTITION BY cs_item_sk ORDER BY SUM(cs_net_profit) DESC) AS profit_rank + FROM + catalog_sales + GROUP BY + cs_item_sk +), +customer_summary AS ( + SELECT + c.c_customer_sk, + COUNT(DISTINCT cs.cs_order_number) AS total_orders, + SUM(cs.cs_net_profit) AS total_spent, + AVG(cs.cs_sales_price) AS avg_order_value + FROM + customer c + LEFT JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk + LEFT JOIN + catalog_sales cs ON c.c_customer_sk = cs.cs_ship_customer_sk + GROUP BY + c.c_customer_sk +), +top_customers AS ( + SELECT + cs.c_customer_sk, + cs.total_orders, + cs.total_spent, + cs.avg_order_value, + RANK() OVER (ORDER BY cs.total_spent DESC) AS customer_rank + FROM + customer_summary cs +) +SELECT + c.c_customer_id, + ca.ca_city, + ca.ca_state, + cu.avg_order_value, + rs.total_quantity, + rs.total_net_profit +FROM + top_customers cu +JOIN + customer c ON cu.c_customer_sk = c.c_customer_sk +LEFT JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + ranked_sales rs ON c.c_customer_sk = rs.cs_item_sk +WHERE + cu.customer_rank <= 10 + AND ca.ca_state IS NOT NULL + AND rs.total_quantity > 100 +ORDER BY + cu.total_spent DESC, + rs.total_net_profit DESC; diff --git a/vortex-bench/sqlstorm/tpcds/17672.sql b/vortex-bench/sqlstorm/tpcds/17672.sql new file mode 100644 index 00000000000..15c5403b22c --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/17672.sql @@ -0,0 +1,14 @@ + +SELECT + c.c_customer_id, + COUNT(ss.ss_ticket_number) AS total_sales, + SUM(ss.ss_net_paid) AS total_revenue +FROM + customer c +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +GROUP BY + c.c_customer_id +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/18420.sql b/vortex-bench/sqlstorm/tpcds/18420.sql new file mode 100644 index 00000000000..e81457709b8 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/18420.sql @@ -0,0 +1,5 @@ + +SELECT ca_state, COUNT(*) as customer_count +FROM customer_address +GROUP BY ca_state +ORDER BY customer_count DESC; diff --git a/vortex-bench/sqlstorm/tpcds/19066.sql b/vortex-bench/sqlstorm/tpcds/19066.sql new file mode 100644 index 00000000000..89f59d83d71 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/19066.sql @@ -0,0 +1,13 @@ + +SELECT + c.c_customer_id, + SUM(ws.ws_net_profit) AS total_net_profit +FROM + customer c +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +GROUP BY + c.c_customer_id +ORDER BY + total_net_profit DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/19155.sql b/vortex-bench/sqlstorm/tpcds/19155.sql new file mode 100644 index 00000000000..c9aa4b6931a --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/19155.sql @@ -0,0 +1,21 @@ + +SELECT + c.c_first_name, + c.c_last_name, + ca.ca_city, + ca.ca_state, + SUM(ss.ss_quantity) AS total_sales +FROM + customer c +JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +GROUP BY + c.c_first_name, + c.c_last_name, + ca.ca_city, + ca.ca_state +ORDER BY + total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/1931.sql b/vortex-bench/sqlstorm/tpcds/1931.sql new file mode 100644 index 00000000000..77745df8f36 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/1931.sql @@ -0,0 +1,61 @@ +WITH CustomerReturns AS ( + SELECT + cr_returning_customer_sk, + SUM(cr_return_amount) AS total_return_amount, + COUNT(DISTINCT cr_order_number) AS total_orders_returned, + AVG(cr_return_quantity) AS avg_return_quantity + FROM + catalog_returns + GROUP BY + cr_returning_customer_sk +), +WebSalesAnalysis AS ( + SELECT + ws_ship_customer_sk, + SUM(ws_sales_price) AS total_sales, + COUNT(ws_order_number) AS total_orders, + ROW_NUMBER() OVER (PARTITION BY ws_ship_customer_sk ORDER BY SUM(ws_sales_price) DESC) AS sales_rank + FROM + web_sales + WHERE + ws_sold_date_sk BETWEEN 2450000 AND 2450600 + GROUP BY + ws_ship_customer_sk +), +Analysis AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + COALESCE(cr.total_return_amount, 0) AS total_return_amount, + COALESCE(ws.total_sales, 0) AS total_sales, + CASE + WHEN COALESCE(ws.total_sales, 0) = 0 THEN NULL + ELSE ROUND(COALESCE(cr.total_return_amount, 0) / COALESCE(ws.total_sales, 0), 2) + END AS return_to_sales_ratio + FROM + customer c + LEFT JOIN + CustomerReturns cr ON c.c_customer_sk = cr.cr_returning_customer_sk + LEFT JOIN + WebSalesAnalysis ws ON c.c_customer_sk = ws.ws_ship_customer_sk +) +SELECT + a.c_customer_sk, + a.c_first_name, + a.c_last_name, + a.total_return_amount, + a.total_sales, + a.return_to_sales_ratio, + CASE + WHEN a.return_to_sales_ratio IS NULL OR a.return_to_sales_ratio > 0.5 THEN 'High Return' + WHEN a.return_to_sales_ratio <= 0.5 AND a.return_to_sales_ratio > 0 THEN 'Moderate Return' + ELSE 'No Returns' + END AS return_category +FROM + Analysis a +WHERE + a.total_sales > 1000 +ORDER BY + a.total_sales DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/19694.sql b/vortex-bench/sqlstorm/tpcds/19694.sql new file mode 100644 index 00000000000..0e84b551faa --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/19694.sql @@ -0,0 +1,7 @@ + +SELECT c.c_customer_id, SUM(ss.ss_quantity) AS total_quantity_sold +FROM customer c +JOIN store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +GROUP BY c.c_customer_id +ORDER BY total_quantity_sold DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/2146.sql b/vortex-bench/sqlstorm/tpcds/2146.sql new file mode 100644 index 00000000000..097693827cf --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/2146.sql @@ -0,0 +1,41 @@ + +WITH CustomerSales AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_ext_sales_price) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS order_count + FROM + customer c + LEFT JOIN web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + GROUP BY c.c_customer_sk, c.c_first_name, c.c_last_name +), +TopCustomers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cs.total_sales, + RANK() OVER (ORDER BY cs.total_sales DESC) AS sales_rank + FROM + CustomerSales cs + JOIN customer c ON cs.c_customer_sk = c.c_customer_sk +) +SELECT + tc.c_first_name, + tc.c_last_name, + tc.total_sales, + d.d_date AS sale_date, + sm.sm_type AS shipping_method +FROM + TopCustomers tc +JOIN web_sales ws ON tc.c_customer_sk = ws.ws_bill_customer_sk +JOIN date_dim d ON ws.ws_sold_date_sk = d.d_date_sk +JOIN ship_mode sm ON ws.ws_ship_mode_sk = sm.sm_ship_mode_sk +WHERE + tc.sales_rank <= 10 + AND d.d_year = 2023 + AND tc.total_sales IS NOT NULL +ORDER BY + tc.total_sales DESC, tc.c_last_name ASC; diff --git a/vortex-bench/sqlstorm/tpcds/21629.sql b/vortex-bench/sqlstorm/tpcds/21629.sql new file mode 100644 index 00000000000..936329715bb --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/21629.sql @@ -0,0 +1,74 @@ + +WITH ranked_customers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + cd.cd_marital_status, + ROW_NUMBER() OVER (PARTITION BY cd.cd_gender ORDER BY cd.cd_purchase_estimate DESC) AS rank + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +top_customers AS ( + SELECT * FROM ranked_customers WHERE rank <= 10 +), +sales_info AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_sales_price) AS total_sales, + COUNT(ws.ws_order_number) AS total_orders, + AVG(ws.ws_net_profit) AS avg_profit + FROM + web_sales ws + JOIN + top_customers tc ON ws.ws_bill_customer_sk = tc.c_customer_sk + GROUP BY + ws.ws_item_sk +), +inventory_data AS ( + SELECT + inv.inv_item_sk, + inv.inv_quantity_on_hand, + CASE + WHEN inv.inv_quantity_on_hand IS NULL THEN 'Out of Stock' + ELSE 'In Stock' + END AS stock_status + FROM + inventory inv +), +final_result AS ( + SELECT + si.ws_item_sk, + si.total_sales, + si.total_orders, + si.avg_profit, + COALESCE(id.inv_quantity_on_hand, 0) AS available_quantity, + id.stock_status + FROM + sales_info si + LEFT JOIN + inventory_data id ON si.ws_item_sk = id.inv_item_sk +) +SELECT + fr.ws_item_sk, + fr.total_sales, + fr.total_orders, + fr.avg_profit, + fr.available_quantity, + fr.stock_status, + CASE + WHEN fr.avg_profit > 100 THEN 'High Profit' + WHEN fr.avg_profit IS NULL THEN 'No Profit Data' + ELSE 'Moderate Profit' + END AS profit_category +FROM + final_result fr +WHERE + fr.total_sales > 1000 + OR fr.total_orders > 50 +ORDER BY + fr.total_sales DESC, + fr.total_orders ASC; diff --git a/vortex-bench/sqlstorm/tpcds/21766.sql b/vortex-bench/sqlstorm/tpcds/21766.sql new file mode 100644 index 00000000000..ad5fb99f566 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/21766.sql @@ -0,0 +1,71 @@ + +WITH RankedReturns AS ( + SELECT + sr_returned_date_sk, + SUM(sr_return_quantity) AS total_returned_quantity, + ROW_NUMBER() OVER (PARTITION BY sr_item_sk ORDER BY SUM(sr_return_quantity) DESC) AS rk + FROM + store_returns + GROUP BY + sr_item_sk, sr_returned_date_sk +), +CustomerReturns AS ( + SELECT + sr_customer_sk, + COUNT(DISTINCT sr_ticket_number) AS return_count, + MAX(CASE WHEN sr_reason_sk IS NULL THEN 1 ELSE 0 END) AS null_reason_return + FROM + store_returns + WHERE + sr_return_quantity > 0 + GROUP BY + sr_customer_sk +), +CustomerDemographics AS ( + SELECT + cd_gender, + COUNT(DISTINCT c_customer_sk) AS customer_count, + MAX(cd_purchase_estimate) AS max_purchase_estimate + FROM + customer_demographics + INNER JOIN + customer ON customer.c_current_cdemo_sk = customer_demographics.cd_demo_sk + GROUP BY + cd_gender +), +DateDynamics AS ( + SELECT + d_year, + COUNT(ws_order_number) AS total_orders, + SUM(ws_net_profit) AS total_profit, + AVG(ws_net_profit) AS avg_profit_per_order + FROM + web_sales + JOIN + date_dim ON ws_sold_date_sk = d_date_sk + GROUP BY + d_year +) +SELECT + cd.cd_gender, + cd.customer_count, + cd.max_purchase_estimate, + dd.d_year, + dd.total_orders, + dd.total_profit, + dd.avg_profit_per_order, + COALESCE(cr.return_count, 0) AS total_customer_returns, + COALESCE(cr.null_reason_return, 0) AS returns_with_null_reason +FROM + CustomerDemographics cd +JOIN + DateDynamics dd ON dd.total_orders > 1000 +LEFT JOIN + CustomerReturns cr ON cr.sr_customer_sk = cd.customer_count +WHERE + cd.max_purchase_estimate > 1000 + AND (cd_gender = 'M' OR cd_gender = 'F') +ORDER BY + cd_gender DESC, + total_profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/22010.sql b/vortex-bench/sqlstorm/tpcds/22010.sql new file mode 100644 index 00000000000..ea647311f96 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/22010.sql @@ -0,0 +1,84 @@ + +WITH RankedSales AS ( + SELECT + ws.ws_order_number, + ws.ws_item_sk, + ws.ws_ext_sales_price, + ROW_NUMBER() OVER (PARTITION BY ws.ws_item_sk ORDER BY ws.ws_sold_date_sk DESC) AS Rank + FROM + web_sales ws + WHERE + ws.ws_net_profit > ( + SELECT AVG(ws_net_profit) + FROM web_sales + WHERE ws_item_sk = ws.ws_item_sk + ) +), +InventoryCheck AS ( + SELECT + inv.inv_item_sk, + SUM(inv.inv_quantity_on_hand) AS total_quantity + FROM + inventory inv + GROUP BY + inv.inv_item_sk + HAVING + SUM(inv.inv_quantity_on_hand) < (SELECT AVG(inv_quantity_on_hand) FROM inventory) +), +CustomerDemographics AS ( + SELECT + cd.cd_demo_sk, + cd.cd_gender, + cd.cd_marital_status, + COUNT(DISTINCT c.c_customer_sk) AS customer_count + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + GROUP BY + cd.cd_demo_sk, cd.cd_gender, cd.cd_marital_status +), +PromotionAnalysis AS ( + SELECT + p.p_promo_id, + SUM(ws.ws_net_paid_inc_tax) AS total_revenue, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + SUM(CASE WHEN ws.ws_ship_date_sk IS NULL THEN 1 ELSE 0 END) AS pending_shipments + FROM + promotion p + LEFT JOIN + web_sales ws ON p.p_promo_sk = ws.ws_promo_sk + WHERE + p.p_discount_active = 'Y' + GROUP BY + p.p_promo_id +) +SELECT + cd.cd_gender, + cd.cd_marital_status, + SUM(RS.ws_ext_sales_price) AS total_sales, + COUNT(DISTINCT RS.ws_order_number) AS order_count, + I.total_quantity AS inventory_quantity, + PA.total_revenue AS promotion_revenue, + PA.pending_shipments +FROM + RankedSales RS +JOIN + CustomerDemographics cd ON RS.ws_item_sk IN (SELECT ic.inv_item_sk FROM InventoryCheck ic) +LEFT JOIN + InventoryCheck I ON I.inv_item_sk = RS.ws_item_sk +LEFT JOIN + PromotionAnalysis PA ON PA.p_promo_id IN ( + SELECT DISTINCT p.p_promo_id + FROM promotion p + WHERE p.p_start_date_sk < (SELECT MAX(d.d_date_sk) FROM date_dim d) + AND p.p_end_date_sk > (SELECT MIN(d.d_date_sk) FROM date_dim d) + ) +WHERE + RS.Rank = 1 +GROUP BY + cd.cd_gender, cd.cd_marital_status, I.total_quantity, PA.total_revenue, PA.pending_shipments +HAVING + (SUM(RS.ws_ext_sales_price) IS NOT NULL OR COUNT(DISTINCT RS.ws_order_number) > 0) +ORDER BY + cd.cd_gender, cd.cd_marital_status; diff --git a/vortex-bench/sqlstorm/tpcds/22956.sql b/vortex-bench/sqlstorm/tpcds/22956.sql new file mode 100644 index 00000000000..e8d31a052f7 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/22956.sql @@ -0,0 +1,78 @@ + +WITH customer_info AS ( + SELECT c.c_customer_sk, + c.c_customer_id, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_purchase_estimate, + ca.ca_city, + ca.ca_state + FROM customer c + LEFT JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +), + +address_stats AS ( + SELECT ca_state, + COUNT(DISTINCT c_customer_sk) AS customer_count, + AVG(cd_purchase_estimate) AS avg_purchase_estimate + FROM customer_info + GROUP BY ca_state +), + +ranked_customers AS ( + SELECT ci.c_customer_id, + ci.cd_gender, + ci.ca_city, + ci.ca_state, + RANK() OVER (PARTITION BY ci.ca_state ORDER BY ci.cd_purchase_estimate DESC) AS purchase_rank + FROM customer_info ci +), + +sales_data AS ( + SELECT ws_bill_customer_sk, + SUM(ws_sales_price) AS total_sales + FROM web_sales + GROUP BY ws_bill_customer_sk +), + +return_data AS ( + SELECT sr_customer_sk, + SUM(sr_return_amt_inc_tax) AS total_returns + FROM store_returns + GROUP BY sr_customer_sk +), + +final_analysis AS ( + SELECT ci.c_customer_id, + ci.cd_gender, + ci.ca_city, + ci.ca_state, + COALESCE(sd.total_sales, 0) AS total_sales, + COALESCE(rd.total_returns, 0) AS total_returns, + (COALESCE(sd.total_sales, 0) - COALESCE(rd.total_returns, 0)) AS net_revenue, + (SELECT AVG(avg_purchase_estimate) FROM address_stats AS a WHERE a.ca_state = ci.ca_state) AS state_avg_purchase, + (CASE + WHEN (COALESCE(sd.total_sales, 0) - COALESCE(rd.total_returns, 0)) < 0 THEN 'Negative Revenue' + WHEN (COALESCE(sd.total_sales, 0) - COALESCE(rd.total_returns, 0)) = 0 THEN 'Break Even' + ELSE 'Positive Revenue' + END) AS revenue_status + FROM customer_info ci + LEFT JOIN sales_data sd ON ci.c_customer_sk = sd.ws_bill_customer_sk + LEFT JOIN return_data rd ON ci.c_customer_sk = rd.sr_customer_sk +) + +SELECT fa.c_customer_id, + fa.cd_gender, + fa.ca_city, + fa.ca_state, + fa.total_sales, + fa.total_returns, + fa.net_revenue, + fa.state_avg_purchase, + fa.revenue_status, + rk.purchase_rank +FROM final_analysis fa +JOIN ranked_customers rk ON fa.c_customer_id = rk.c_customer_id +WHERE rk.purchase_rank <= 10 OR fa.net_revenue < 0 +ORDER BY fa.ca_state, fa.net_revenue DESC, rk.purchase_rank; diff --git a/vortex-bench/sqlstorm/tpcds/23039.sql b/vortex-bench/sqlstorm/tpcds/23039.sql new file mode 100644 index 00000000000..8ad2e28c5dd --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/23039.sql @@ -0,0 +1,59 @@ + +WITH RankedSales AS ( + SELECT + ws.ws_item_sk, + ws.ws_order_number, + ws.ws_net_profit, + ROW_NUMBER() OVER (PARTITION BY ws.ws_item_sk ORDER BY ws.ws_net_profit DESC) AS rank_profit, + RANK() OVER (ORDER BY ws.ws_net_profit ASC) AS rank_all_profit + FROM + web_sales ws + WHERE + ws.ws_net_paid > 0 + AND (ws.ws_ext_discount_amt IS NULL OR ws.ws_ext_discount_amt >= 0) +), +HighProfitItems AS ( + SELECT + rs.ws_item_sk, + rs.ws_order_number, + rs.ws_net_profit + FROM + RankedSales rs + WHERE + rs.rank_profit <= 10 +), +SalesSummary AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_quantity) AS total_quantity, + SUM(ws.ws_net_paid) AS total_net_paid, + AVG(ws.ws_net_profit) AS avg_profit + FROM + web_sales ws + JOIN + HighProfitItems hpi ON ws.ws_item_sk = hpi.ws_item_sk + GROUP BY + ws.ws_item_sk +) +SELECT + ci.i_item_desc, + COALESCE(ss.total_quantity, 0) AS total_quantity, + COALESCE(ss.total_net_paid, 0.00) AS total_net_paid, + CASE + WHEN ss.avg_profit IS NOT NULL THEN ROUND(ss.avg_profit, 2) + ELSE (SELECT AVG(ws.ws_net_profit) + FROM web_sales ws + WHERE ws.ws_item_sk = ci.i_item_sk + AND ws.ws_net_profit IS NOT NULL) + END AS avg_net_profit, + CASE + WHEN (SELECT COUNT(*) FROM SalesSummary WHERE total_net_paid > 100) >= 1 THEN 'High Sellers' + ELSE 'Low Sellers' + END AS seller_category +FROM + item ci +LEFT JOIN + SalesSummary ss ON ci.i_item_sk = ss.ws_item_sk +ORDER BY + avg_net_profit DESC +LIMIT 20; diff --git a/vortex-bench/sqlstorm/tpcds/23978.sql b/vortex-bench/sqlstorm/tpcds/23978.sql new file mode 100644 index 00000000000..4ea16d05564 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/23978.sql @@ -0,0 +1,53 @@ + +WITH RankedCustomers AS ( + SELECT + c.c_customer_sk, + c.c_customer_id, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_purchase_estimate, + ROW_NUMBER() OVER (PARTITION BY cd.cd_gender ORDER BY cd.cd_purchase_estimate DESC) AS rn + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +InventoryAnalysis AS ( + SELECT + inv.inv_item_sk, + SUM(inv.inv_quantity_on_hand) AS total_quantity, + COUNT(DISTINCT inv.inv_warehouse_sk) AS warehouse_count + FROM + inventory inv + GROUP BY + inv.inv_item_sk +), +SalesData AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_net_profit) AS total_net_profit + FROM + web_sales ws + WHERE + ws.ws_sold_date_sk >= (SELECT MAX(d.d_date_sk) FROM date_dim d WHERE d.d_year = 2022) + GROUP BY + ws.ws_item_sk +) +SELECT + rc.c_customer_id, + rc.cd_gender, + ia.total_quantity, + sd.total_net_profit, + COALESCE(sd.total_net_profit, 0) - COALESCE(ia.total_quantity * 0.5, 0) AS adjusted_profit +FROM + RankedCustomers rc +LEFT JOIN + InventoryAnalysis ia ON ia.inv_item_sk = rc.c_customer_sk +FULL OUTER JOIN + SalesData sd ON sd.ws_item_sk = rc.c_customer_sk +WHERE + rc.rn <= 5 AND + (rc.cd_marital_status = 'M' OR rc.cd_purchase_estimate IS NOT NULL) AND + (ia.total_quantity IS NULL OR ia.warehouse_count > 2) +ORDER BY + adjusted_profit DESC, rc.c_customer_id; diff --git a/vortex-bench/sqlstorm/tpcds/24088.sql b/vortex-bench/sqlstorm/tpcds/24088.sql new file mode 100644 index 00000000000..8835bff455e --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/24088.sql @@ -0,0 +1,46 @@ + +WITH SalesData AS ( + SELECT + ws_item_sk, + SUM(ws_quantity) AS total_quantity, + SUM(ws_ext_sales_price) AS total_sales, + AVG(ws_net_paid) AS average_payment, + COUNT(DISTINCT ws_order_number) AS order_count + FROM web_sales + WHERE ws_sold_date_sk BETWEEN 2450000 AND 2451000 + GROUP BY ws_item_sk +), +FilteredSales AS ( + SELECT + sd.ws_item_sk, + sd.total_quantity, + sd.total_sales, + sd.average_payment + FROM SalesData sd + JOIN item i ON sd.ws_item_sk = i.i_item_sk + WHERE i.i_current_price IS NOT NULL + AND i.i_formulation = 'Liquid' + AND sd.total_quantity > ( + SELECT AVG(total_quantity) + FROM SalesData + ) +), +TopItems AS ( + SELECT + fs.ws_item_sk, + ROW_NUMBER() OVER (ORDER BY fs.total_sales DESC) AS rank + FROM FilteredSales fs +) +SELECT + i.i_item_id, + COALESCE(SUM(ws.ws_net_paid_inc_tax), 0) AS total_net_paid_inc_tax, + COALESCE(COUNT(ws.ws_order_number), 0) AS order_total, + MAX(i.i_current_price) AS max_price, + COUNT(DISTINCT CASE WHEN ws.ws_ext_tax > 0 THEN ws.ws_order_number END) AS orders_with_tax, + STRING_AGG(DISTINCT i.i_brand, ', ') AS brands_utilized +FROM item i +LEFT JOIN web_sales ws ON i.i_item_sk = ws.ws_item_sk +JOIN TopItems ti ON i.i_item_sk = ti.ws_item_sk +WHERE ti.rank <= 10 +GROUP BY i.i_item_id +ORDER BY total_net_paid_inc_tax DESC; diff --git a/vortex-bench/sqlstorm/tpcds/24979.sql b/vortex-bench/sqlstorm/tpcds/24979.sql new file mode 100644 index 00000000000..4cc77bcd455 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/24979.sql @@ -0,0 +1,72 @@ + +WITH ranked_customers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + cd.cd_marital_status, + ROW_NUMBER() OVER (PARTITION BY cd.cd_gender ORDER BY cd.cd_purchase_estimate DESC) AS purchase_rank + FROM customer AS c + INNER JOIN customer_demographics AS cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + WHERE cd.cd_credit_rating IS NOT NULL +), +high_value_customers AS ( + SELECT + rc.c_customer_sk, + rc.c_first_name, + rc.c_last_name, + rc.cd_gender, + rc.cd_marital_status, + COALESCE(SUM(ws.ws_net_paid), 0) AS total_spent, + COUNT(ws.ws_order_number) AS purchase_count + FROM ranked_customers AS rc + LEFT JOIN web_sales AS ws ON rc.c_customer_sk = ws.ws_bill_customer_sk + WHERE rc.purchase_rank <= 5 + GROUP BY rc.c_customer_sk, rc.c_first_name, rc.c_last_name, rc.cd_gender, rc.cd_marital_status +), +customer_location AS ( + SELECT + c.c_customer_sk, + ca.ca_city, + ca.ca_state, + ROW_NUMBER() OVER (PARTITION BY c.c_customer_sk ORDER BY ca.ca_city) AS city_rank + FROM customer AS c + JOIN customer_address AS ca ON c.c_current_addr_sk = ca.ca_address_sk +), +customer_with_locations AS ( + SELECT + hvc.c_customer_sk, + hvc.c_first_name, + hvc.c_last_name, + hvc.cd_gender, + hvc.cd_marital_status, + hvc.total_spent, + hvc.purchase_count, + cl.ca_city, + cl.ca_state + FROM high_value_customers AS hvc + LEFT JOIN customer_location AS cl ON hvc.c_customer_sk = cl.c_customer_sk + WHERE cl.city_rank = 1 +) +SELECT + cwl.c_customer_sk, + CONCAT(cwl.c_first_name, ' ', cwl.c_last_name) AS full_name, + cwl.cd_gender, + cwl.cd_marital_status, + cwl.total_spent, + CASE + WHEN total_spent > 10000 THEN 'High Roller' + WHEN total_spent > 5000 THEN 'Moderate Spender' + ELSE 'Budget Buyer' + END AS customer_type, + cwl.ca_city, + cwl.ca_state, + CASE + WHEN cwl.total_spent >= (SELECT AVG(total_spent) FROM high_value_customers) THEN TRUE + ELSE FALSE + END AS above_average_spender +FROM customer_with_locations AS cwl +WHERE cwl.total_spent IS NOT NULL +ORDER BY cwl.total_spent DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpcds/25020.sql b/vortex-bench/sqlstorm/tpcds/25020.sql new file mode 100644 index 00000000000..52c5d3b9bd4 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25020.sql @@ -0,0 +1,58 @@ + +WITH Address_Enhanced AS ( + SELECT + ca_address_sk, + TRIM(ca_street_number) || ' ' || + UPPER(LEFT(ca_street_name, 1)) || LOWER(SUBSTRING(ca_street_name FROM 2)) || ' ' || + UPPER(ca_street_type) AS Full_Address, + ca_city, + ca_state, + ca_zip, + ca_country + FROM customer_address +), +Customer_Enhanced AS ( + SELECT + c.c_customer_sk, + CONCAT(UPPER(SUBSTRING(c.c_first_name, 1, 1)), LOWER(SUBSTRING(c.c_first_name FROM 2)), ' ', + UPPER(SUBSTRING(c.c_last_name, 1, 1)), LOWER(SUBSTRING(c.c_last_name FROM 2))) AS Full_Name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate, + ae.Full_Address, + ae.ca_city, + ae.ca_state, + ae.ca_zip, + ae.ca_country + FROM customer c + JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN Address_Enhanced ae ON c.c_current_addr_sk = ae.ca_address_sk +), +Sales_Enhanced AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_quantity) AS Total_Quantity, + SUM(ws.ws_net_profit) AS Total_Profit + FROM web_sales ws + GROUP BY ws.ws_item_sk +) +SELECT + ce.Full_Name, + ce.cd_gender, + ce.cd_marital_status, + ce.cd_education_status, + ce.cd_purchase_estimate, + se.Total_Quantity, + se.Total_Profit, + ce.Full_Address, + ce.ca_city, + ce.ca_state, + ce.ca_zip, + ce.ca_country +FROM Customer_Enhanced ce +JOIN Sales_Enhanced se ON ce.c_customer_sk = se.ws_item_sk +WHERE UPPER(ce.ca_state) = 'CA' +AND ce.cd_purchase_estimate > 500 +ORDER BY se.Total_Profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/25025.sql b/vortex-bench/sqlstorm/tpcds/25025.sql new file mode 100644 index 00000000000..8044945719a --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25025.sql @@ -0,0 +1,53 @@ + +WITH Address_summary AS ( + SELECT + ca_state, + COUNT(DISTINCT ca_address_id) AS unique_addresses, + SUM(CASE WHEN LENGTH(ca_street_name) > 20 THEN 1 ELSE 0 END) AS long_street_names, + ARRAY_AGG(DISTINCT ca_city || ', ' || ca_street_name) AS city_street_combinations + FROM + customer_address + GROUP BY + ca_state +), +Demographics_summary AS ( + SELECT + cd_gender, + COUNT(*) AS num_customers, + AVG(cd_purchase_estimate) AS avg_purchase_estimate, + STRING_AGG(DISTINCT cd_education_status, ', ') AS education_levels + FROM + customer_demographics + GROUP BY + cd_gender +), +Combined_summary AS ( + SELECT + a.ca_state, + a.unique_addresses, + a.long_street_names, + a.city_street_combinations, + d.cd_gender, + d.num_customers, + d.avg_purchase_estimate, + d.education_levels + FROM + Address_summary a + JOIN + Demographics_summary d + ON + a.unique_addresses > 10 AND d.num_customers > 50 +) +SELECT + ca_state, + unique_addresses, + long_street_names, + city_street_combinations, + cd_gender, + num_customers, + avg_purchase_estimate, + education_levels +FROM + Combined_summary +ORDER BY + unique_addresses DESC, num_customers DESC; diff --git a/vortex-bench/sqlstorm/tpcds/25030.sql b/vortex-bench/sqlstorm/tpcds/25030.sql new file mode 100644 index 00000000000..d9a96755259 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25030.sql @@ -0,0 +1,61 @@ +WITH AddressInfo AS ( + SELECT + ca_state, + ca_city, + UPPER(ca_street_name) AS upper_street_name, + LOWER(ca_city) AS lower_city, + CONCAT_WS(',', ca_street_number, ca_street_name, ca_city, ca_state) AS full_address, + LENGTH(ca_street_name) AS street_name_length, + LENGTH(ca_city) AS city_length + FROM + customer_address +), +DemographicInfo AS ( + SELECT + cd_gender, + cd_marital_status, + cd_education_status, + COUNT(cd_demo_sk) AS demographic_count + FROM + customer_demographics + WHERE + cd_purchase_estimate > 100 + GROUP BY + cd_gender, + cd_marital_status, + cd_education_status +), +AggregateInfo AS ( + SELECT + ai.ca_state, + di.cd_gender, + di.cd_marital_status, + di.cd_education_status, + AVG(ai.street_name_length) AS avg_street_name_length, + SUM(di.demographic_count) AS total_demographics + FROM + AddressInfo ai + JOIN + DemographicInfo di ON ai.ca_city = di.cd_gender + GROUP BY + ai.ca_state, + di.cd_gender, + di.cd_marital_status, + di.cd_education_status +) +SELECT + a.ca_state, + d.cd_gender, + d.cd_marital_status, + d.cd_education_status, + a.avg_street_name_length, + a.total_demographics +FROM + AggregateInfo a +JOIN + DemographicInfo d ON a.ca_state = d.cd_gender +WHERE + a.total_demographics > 10 +ORDER BY + a.avg_street_name_length DESC, + a.total_demographics ASC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/25083.sql b/vortex-bench/sqlstorm/tpcds/25083.sql new file mode 100644 index 00000000000..ccc30c323a7 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25083.sql @@ -0,0 +1,69 @@ + +WITH CustomerInfo AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate, + ca.ca_city, + ca.ca_state, + ca.ca_country + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +), +SalesInfo AS ( + SELECT + ws_bill_customer_sk, + SUM(ws_net_paid) AS total_sales + FROM + web_sales + GROUP BY + ws_bill_customer_sk +), +ReturnsInfo AS ( + SELECT + wr_returning_customer_sk, + SUM(wr_return_amt) AS total_returns + FROM + web_returns + GROUP BY + wr_returning_customer_sk +), +CombinedInfo AS ( + SELECT + ci.full_name, + ci.ca_city, + ci.ca_state, + ci.ca_country, + COALESCE(si.total_sales, 0) AS total_sales, + COALESCE(ri.total_returns, 0) AS total_returns, + COALESCE(si.total_sales, 0) - COALESCE(ri.total_returns, 0) AS net_profit + FROM + CustomerInfo ci + LEFT JOIN + SalesInfo si ON ci.c_customer_sk = si.ws_bill_customer_sk + LEFT JOIN + ReturnsInfo ri ON ci.c_customer_sk = ri.wr_returning_customer_sk +) +SELECT + full_name, + ca_city, + ca_state, + ca_country, + total_sales, + total_returns, + net_profit, + RANK() OVER (ORDER BY net_profit DESC) AS profit_rank +FROM + CombinedInfo +WHERE + net_profit > 0 +ORDER BY + profit_rank +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/25137.sql b/vortex-bench/sqlstorm/tpcds/25137.sql new file mode 100644 index 00000000000..dbc228f56ed --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25137.sql @@ -0,0 +1,63 @@ + +WITH AddressDetails AS ( + SELECT + ca.ca_address_sk, + CONCAT(ca.ca_street_number, ' ', ca.ca_street_name, ' ', ca.ca_street_type, + CASE WHEN ca.ca_suite_number IS NOT NULL THEN CONCAT(' Ste ', ca.ca_suite_number) ELSE '' END) AS full_address, + CONCAT(ca.ca_city, ', ', ca.ca_state, ' ', ca.ca_zip) AS city_state_zip, + ca.ca_country + FROM + customer_address ca +), +CustomerDemographics AS ( + SELECT + cd.cd_demo_sk, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate, + cd.cd_credit_rating + FROM + customer_demographics cd +), +CustomerDetails AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + c.c_email_address, + ad.full_address, + ad.city_state_zip, + ad.ca_country + FROM + customer c + JOIN AddressDetails ad ON c.c_current_addr_sk = ad.ca_address_sk +), +SalesOverview AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_quantity) AS total_quantity, + SUM(ws.ws_net_paid) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS total_orders + FROM + web_sales ws + GROUP BY + ws.ws_item_sk +) +SELECT + cd.c_first_name, + cd.c_last_name, + cd.c_email_address, + cd.city_state_zip, + cd.ca_country, + so.total_quantity, + so.total_sales, + so.total_orders +FROM + CustomerDetails cd +JOIN SalesOverview so ON cd.c_customer_sk = so.ws_item_sk +WHERE + cd.c_email_address LIKE '%@example.com' +ORDER BY + so.total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/25285.sql b/vortex-bench/sqlstorm/tpcds/25285.sql new file mode 100644 index 00000000000..12a6d05d878 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25285.sql @@ -0,0 +1,57 @@ + +WITH ranked_customers AS ( + SELECT + c.c_customer_id, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate, + ROW_NUMBER() OVER (PARTITION BY cd.cd_gender ORDER BY cd.cd_purchase_estimate DESC) AS rank + FROM + customer c + JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +high_value_customers AS ( + SELECT + rc.full_name, + rc.cd_gender, + rc.cd_marital_status, + rc.cd_education_status, + rc.cd_purchase_estimate + FROM + ranked_customers rc + WHERE + rc.rank <= 10 +), +address_summary AS ( + SELECT + ca.ca_city, + ca.ca_state, + COUNT(DISTINCT c.c_customer_id) AS customer_count, + AVG(cd.cd_purchase_estimate) AS avg_purchase_estimate + FROM + customer_address ca + JOIN customer c ON c.c_current_addr_sk = ca.ca_address_sk + JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + GROUP BY + ca.ca_city, + ca.ca_state +) +SELECT + hvc.full_name, + hvc.cd_gender, + hvc.cd_marital_status, + hvc.cd_education_status, + hvc.cd_purchase_estimate, + asu.ca_city, + asu.ca_state, + asu.customer_count, + asu.avg_purchase_estimate +FROM + high_value_customers hvc +JOIN + address_summary asu ON hvc.cd_gender = 'F' AND asu.customer_count > 50 +ORDER BY + hvc.cd_purchase_estimate DESC, + asu.customer_count DESC; diff --git a/vortex-bench/sqlstorm/tpcds/25425.sql b/vortex-bench/sqlstorm/tpcds/25425.sql new file mode 100644 index 00000000000..ba9ab493b0a --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25425.sql @@ -0,0 +1,60 @@ + +WITH address_summary AS ( + SELECT + ca_city AS city, + ca_state AS state, + COUNT(DISTINCT ca_address_sk) AS total_addresses, + SUM(LENGTH(ca_street_name) + LENGTH(ca_street_number) + LENGTH(ca_street_type)) AS total_characters, + AVG(LENGTH(ca_street_name) + LENGTH(ca_street_number) + LENGTH(ca_street_type)) AS avg_address_length + FROM + customer_address + GROUP BY + ca_city, ca_state +), +customer_summary AS ( + SELECT + cd_gender, + COUNT(DISTINCT c_customer_sk) AS total_customers, + SUM(cd_dep_count) AS total_dependents, + AVG(cd_purchase_estimate) AS avg_purchase_estimate + FROM + customer + JOIN + customer_demographics ON c_current_cdemo_sk = cd_demo_sk + GROUP BY + cd_gender +), +sales_summary AS ( + SELECT + ws_bill_customer_sk, + SUM(ws_net_profit) AS total_profit, + COUNT(ws_order_number) AS total_orders, + SUM(ws_quantity) AS total_items_sold + FROM + web_sales + GROUP BY + ws_bill_customer_sk +) +SELECT + ca.city AS city, + ca.state AS state, + ca.total_addresses, + ca.total_characters, + ca.avg_address_length, + cu.cd_gender, + cu.total_customers, + cu.total_dependents, + cu.avg_purchase_estimate, + ss.total_profit, + ss.total_orders, + ss.total_items_sold +FROM + address_summary ca +JOIN + customer_summary cu ON ca.total_addresses > 100 +JOIN + sales_summary ss ON ss.ws_bill_customer_sk = cu.total_customers +WHERE + ca.total_characters > 1000 +ORDER BY + ca.city, cu.cd_gender; diff --git a/vortex-bench/sqlstorm/tpcds/25549.sql b/vortex-bench/sqlstorm/tpcds/25549.sql new file mode 100644 index 00000000000..81902250819 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25549.sql @@ -0,0 +1,60 @@ + +WITH AddressComponents AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type) AS full_address, + ca_city, + ca_state, + ca_zip + FROM + customer_address +), +CustomerDetails AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_credit_rating, + ac.full_address, + ac.ca_city, + ac.ca_state, + ac.ca_zip + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + AddressComponents ac ON c.c_current_addr_sk = ac.ca_address_sk +), +SalesDetails AS ( + SELECT + ws.ws_order_number, + ws.ws_quantity, + ws.ws_ext_sales_price, + ws.ws_net_paid, + cd.c_customer_sk + FROM + web_sales ws + JOIN + CustomerDetails cd ON ws.ws_bill_customer_sk = cd.c_customer_sk +) +SELECT + cd.c_first_name, + cd.c_last_name, + cd.ca_city, + cd.ca_state, + COUNT(sd.ws_order_number) AS total_orders, + SUM(sd.ws_quantity) AS total_quantity, + SUM(sd.ws_ext_sales_price) AS total_sales, + AVG(sd.ws_net_paid) AS avg_net_paid +FROM + CustomerDetails cd +LEFT JOIN + SalesDetails sd ON cd.c_customer_sk = sd.c_customer_sk +GROUP BY + cd.c_first_name, cd.c_last_name, cd.ca_city, cd.ca_state +ORDER BY + total_sales DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpcds/25666.sql b/vortex-bench/sqlstorm/tpcds/25666.sql new file mode 100644 index 00000000000..8e5738b615c --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25666.sql @@ -0,0 +1,59 @@ + +WITH AddressStats AS ( + SELECT + ca_state, + COUNT(*) AS total_addresses, + AVG(LENGTH(ca_street_name)) AS avg_street_name_length, + SUM(CASE WHEN ca_street_type IS NOT NULL THEN 1 ELSE 0 END) AS street_type_count + FROM + customer_address + GROUP BY + ca_state +), +CustomerStats AS ( + SELECT + cd_gender, + COUNT(*) AS total_customers, + AVG(cd_purchase_estimate) AS avg_purchase_estimate, + MAX(cd_dep_count) AS max_dependents + FROM + customer_demographics + GROUP BY + cd_gender +), +SalesStats AS ( + SELECT + sm.sm_type, + COUNT(ws.ws_order_number) AS total_sales, + SUM(ws.ws_sales_price) AS total_revenue, + AVG(ws.ws_net_profit) AS avg_net_profit + FROM + web_sales ws + JOIN + ship_mode sm ON ws.ws_ship_mode_sk = sm.sm_ship_mode_sk + GROUP BY + sm.sm_type +) +SELECT + A.ca_state, + A.total_addresses, + A.avg_street_name_length, + A.street_type_count, + C.cd_gender, + C.total_customers, + C.avg_purchase_estimate, + C.max_dependents, + S.sm_type, + S.total_sales, + S.total_revenue, + S.avg_net_profit +FROM + AddressStats A +JOIN + CustomerStats C ON C.total_customers > 1000 +JOIN + SalesStats S ON S.total_sales > 50 +ORDER BY + A.total_addresses DESC, + C.total_customers DESC, + S.total_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpcds/25743.sql b/vortex-bench/sqlstorm/tpcds/25743.sql new file mode 100644 index 00000000000..755e783d8f2 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25743.sql @@ -0,0 +1,67 @@ + +WITH EnhancedCustomerInfo AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_salutation, ' ', c.c_first_name, ' ', c.c_last_name) AS full_name, + ca.ca_city, + ca.ca_state, + ca.ca_zip, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate, + cd.cd_credit_rating, + cd.cd_dep_count, + cd.cd_dep_employed_count, + cd.cd_dep_college_count + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +), +SalesSummary AS ( + SELECT + ws_bill_customer_sk, + COUNT(ws_order_number) AS total_orders, + SUM(ws_net_paid_inc_tax) AS total_revenue + FROM + web_sales + GROUP BY + ws_bill_customer_sk +), +CustomerBenchmarking AS ( + SELECT + e.c_customer_sk, + e.full_name, + e.ca_city, + e.ca_state, + e.ca_zip, + e.cd_gender, + e.cd_marital_status, + e.cd_education_status, + e.cd_purchase_estimate, + e.cd_credit_rating, + s.total_orders, + s.total_revenue, + CASE + WHEN s.total_orders IS NULL THEN 'No Orders' + ELSE 'Active Customer' + END AS customer_status + FROM + EnhancedCustomerInfo e + LEFT JOIN + SalesSummary s ON e.c_customer_sk = s.ws_bill_customer_sk +) +SELECT + *, + CASE + WHEN total_revenue > 1000 THEN 'High Value Customer' + WHEN total_revenue BETWEEN 500 AND 1000 THEN 'Medium Value Customer' + ELSE 'Low Value Customer' + END AS customer_value +FROM + CustomerBenchmarking +ORDER BY + total_revenue DESC, full_name; diff --git a/vortex-bench/sqlstorm/tpcds/25858.sql b/vortex-bench/sqlstorm/tpcds/25858.sql new file mode 100644 index 00000000000..5604f679906 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25858.sql @@ -0,0 +1,49 @@ + +WITH AddressParts AS ( + SELECT + ca_address_sk, + TRIM(ca_street_number) || ' ' || TRIM(ca_street_name) || ' ' || TRIM(ca_street_type) AS full_address, + ca_city, + ca_state, + ca_zip, + ca_country + FROM + customer_address +), +CustomerInfo AS ( + SELECT + c.c_customer_sk, + TRIM(c.c_first_name) || ' ' || TRIM(c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate, + ai.full_address, + ai.ca_city, + ai.ca_state + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + AddressParts ai ON c.c_current_addr_sk = ai.ca_address_sk +) +SELECT + ci.full_name, + ci.cd_gender, + ci.cd_marital_status, + ci.cd_education_status, + COUNT(ws.ws_order_number) AS total_orders, + SUM(ws.ws_sales_price) AS total_spent, + STRING_AGG(DISTINCT ci.full_address || ' ' || ci.ca_city || ', ' || ci.ca_state, '; ') AS addresses +FROM + CustomerInfo ci +LEFT JOIN + web_sales ws ON ci.c_customer_sk = ws.ws_bill_customer_sk +GROUP BY + ci.full_name, ci.cd_gender, ci.cd_marital_status, ci.cd_education_status +HAVING + SUM(ws.ws_sales_price) > 1000 +ORDER BY + total_spent DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpcds/25866.sql b/vortex-bench/sqlstorm/tpcds/25866.sql new file mode 100644 index 00000000000..33c21633927 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/25866.sql @@ -0,0 +1,59 @@ + +WITH CustomerSummary AS ( + SELECT + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + CASE + WHEN cd.cd_gender = 'M' THEN 'Male' + WHEN cd.cd_gender = 'F' THEN 'Female' + ELSE 'Other' + END AS gender, + cd.cd_marital_status AS marital_status, + cd.cd_education_status AS education_status, + COUNT(DISTINCT sr.sr_ticket_number) AS total_returns, + SUM(sr.sr_return_amt) AS total_return_amount, + COUNT(DISTINCT sr.sr_item_sk) AS distinct_returned_items, + c.c_customer_sk + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN + store_returns sr ON c.c_customer_sk = sr.sr_customer_sk + GROUP BY + c.c_customer_sk, c.c_first_name, c.c_last_name, cd.cd_gender, cd.cd_marital_status, cd.cd_education_status +), +DateDetails AS ( + SELECT + d.d_date AS return_date, + EXTRACT(MONTH FROM d.d_date) AS return_month, + EXTRACT(YEAR FROM d.d_date) AS return_year, + CASE + WHEN d.d_dow IN (1, 7) THEN 'Weekend' + ELSE 'Weekday' + END AS week_day_category, + d.d_date_sk + FROM + date_dim d +) +SELECT + cs.full_name, + cs.gender, + cs.marital_status, + cs.education_status, + dd.return_month, + dd.return_year, + dd.week_day_category, + cs.total_returns, + cs.total_return_amount, + cs.distinct_returned_items +FROM + CustomerSummary cs +JOIN + store_returns sr ON cs.c_customer_sk = sr.sr_customer_sk +JOIN + DateDetails dd ON sr.sr_returned_date_sk = dd.d_date_sk +WHERE + cs.total_returns > 0 +ORDER BY + cs.total_return_amount DESC, cs.full_name ASC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/26120.sql b/vortex-bench/sqlstorm/tpcds/26120.sql new file mode 100644 index 00000000000..9e49a5c4d92 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/26120.sql @@ -0,0 +1,66 @@ + +WITH customer_full_names AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +item_details AS ( + SELECT + i.i_item_sk, + i.i_item_desc, + i.i_current_price, + i.i_brand, + i.i_category + FROM + item i +), +sales_summary AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_quantity) AS total_quantity_sold, + SUM(ws.ws_ext_sales_price) AS total_sales_amount, + COUNT(DISTINCT ws.ws_order_number) AS total_orders + FROM + web_sales ws + GROUP BY + ws.ws_item_sk +) +SELECT + cf.full_name, + cf.cd_gender, + cf.cd_marital_status, + cf.cd_education_status, + id.i_item_desc, + id.i_brand, + ss.total_quantity_sold, + ss.total_sales_amount, + ss.total_orders +FROM + customer_full_names cf +JOIN + sales_summary ss ON cf.c_customer_sk = ( + SELECT + cs.ss_customer_sk + FROM + store_sales cs + WHERE + cs.ss_item_sk IN (SELECT ws.ws_item_sk FROM sales_summary ws) + LIMIT 1 + ) +JOIN + item_details id ON ss.ws_item_sk = id.i_item_sk +WHERE + cf.cd_gender = 'F' + AND cf.cd_marital_status = 'M' + AND ss.total_quantity_sold > 100 +ORDER BY + ss.total_sales_amount DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpcds/26682.sql b/vortex-bench/sqlstorm/tpcds/26682.sql new file mode 100644 index 00000000000..92b91c9e0f7 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/26682.sql @@ -0,0 +1,74 @@ + +WITH AddressDetails AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type) AS full_address, + ca_city, + ca_state, + ca_zip, + ca_country + FROM + customer_address +), +CustomerInfo AS ( + SELECT + c_customer_sk, + CONCAT(c_first_name, ' ', c_last_name) AS full_name, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +SalesSummary AS ( + SELECT + ws_bill_customer_sk AS customer_id, + COUNT(ws_order_number) AS total_orders, + SUM(ws_ext_sales_price) AS total_revenue, + SUM(ws_ext_discount_amt) AS total_discount + FROM + web_sales + GROUP BY + ws_bill_customer_sk +), +CombinedInfo AS ( + SELECT + ci.full_name, + ad.full_address, + ad.ca_city, + ad.ca_state, + ad.ca_zip, + ad.ca_country, + si.total_orders, + si.total_revenue, + si.total_discount + FROM + CustomerInfo ci + JOIN + AddressDetails ad ON ci.c_customer_sk = ad.ca_address_sk + JOIN + SalesSummary si ON ci.c_customer_sk = si.customer_id +) +SELECT + full_name, + full_address, + ca_city, + ca_state, + ca_zip, + ca_country, + total_orders, + total_revenue, + total_discount, + CASE + WHEN total_revenue > 1000 THEN 'High Value Customer' + WHEN total_revenue BETWEEN 500 AND 1000 THEN 'Medium Value Customer' + ELSE 'Low Value Customer' + END AS customer_segment +FROM + CombinedInfo +ORDER BY + total_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpcds/2678.sql b/vortex-bench/sqlstorm/tpcds/2678.sql new file mode 100644 index 00000000000..db378b5a023 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/2678.sql @@ -0,0 +1,57 @@ + +WITH CustomerSales AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_net_profit) AS total_profit, + RANK() OVER (PARTITION BY ca.ca_state ORDER BY SUM(ws.ws_net_profit) DESC) AS rank_within_state + FROM + customer c + JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk + WHERE + c.c_current_cdemo_sk IS NOT NULL + GROUP BY + c.c_customer_sk, c.c_first_name, c.c_last_name, ca.ca_state +), +TopCustomers AS ( + SELECT + cs.c_customer_sk, + cs.c_first_name, + cs.c_last_name, + cs.total_profit + FROM + CustomerSales cs + WHERE + cs.rank_within_state <= 5 +), +ProductsSold AS ( + SELECT + ws.ws_item_sk, + COUNT(ws.ws_order_number) AS total_orders, + AVG(ws.ws_sales_price) AS average_price + FROM + web_sales ws + WHERE + ws.ws_sold_date_sk BETWEEN (SELECT MAX(d_date_sk) - 30 FROM date_dim) AND (SELECT MAX(d_date_sk) FROM date_dim) + GROUP BY + ws.ws_item_sk +) +SELECT + tc.c_first_name, + tc.c_last_name, + p.total_orders, + p.average_price, + CASE + WHEN tc.total_profit IS NULL THEN 'No sales' + ELSE 'Sales recorded' + END AS sales_status +FROM + TopCustomers tc +LEFT JOIN + ProductsSold p ON tc.c_customer_sk = p.ws_item_sk +ORDER BY + tc.total_profit DESC; diff --git a/vortex-bench/sqlstorm/tpcds/27136.sql b/vortex-bench/sqlstorm/tpcds/27136.sql new file mode 100644 index 00000000000..007098dc380 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/27136.sql @@ -0,0 +1,34 @@ + +WITH ranked_customers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + cd.cd_marital_status, + COUNT(sr.sr_ticket_number) AS total_returns, + SUM(sr.sr_return_amt) AS total_return_value, + SUM(sr.sr_return_quantity) AS total_return_quantity, + ROW_NUMBER() OVER (PARTITION BY cd.cd_gender ORDER BY COUNT(sr.sr_ticket_number) DESC) AS return_rank + FROM + customer c + LEFT JOIN + store_returns sr ON c.c_customer_sk = sr.sr_customer_sk + LEFT JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + GROUP BY + c.c_customer_sk, c.c_first_name, c.c_last_name, cd.cd_gender, cd.cd_marital_status +) +SELECT + CONCAT(c_first_name, ' ', c_last_name) AS full_name, + cd_gender AS gender, + cd_marital_status AS marital_status, + total_returns, + total_return_value, + total_return_quantity +FROM + ranked_customers +WHERE + return_rank <= 10 +ORDER BY + cd_gender, total_returns DESC; diff --git a/vortex-bench/sqlstorm/tpcds/27711.sql b/vortex-bench/sqlstorm/tpcds/27711.sql new file mode 100644 index 00000000000..c3c47a1ee5d --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/27711.sql @@ -0,0 +1,59 @@ +WITH CustomerDemographics AS ( + SELECT cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + FROM customer_demographics + WHERE cd_marital_status = 'M' +), CustomerDetails AS ( + SELECT c.c_customer_sk, + c.c_first_name, + c.c_last_name, + ca.ca_city, + ca.ca_state, + ca.ca_country, + cd.cd_gender, + cd.cd_purchase_estimate + FROM customer c + JOIN customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk + JOIN CustomerDemographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), DateFiltered AS ( + SELECT d.d_date, + d.d_year, + COUNT(ws.ws_order_number) AS total_orders, + SUM(ws.ws_sales_price) AS total_sales + FROM web_sales ws + JOIN date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + WHERE d.d_year = 2001 + GROUP BY d.d_date, d.d_year +), SalesByCustomer AS ( + SELECT cd.c_customer_sk, + cd.c_first_name, + cd.c_last_name, + cd.ca_city, + cd.ca_state, + SUM(ws.ws_sales_price) AS customer_sales + FROM CustomerDetails cd + JOIN web_sales ws ON cd.c_customer_sk = ws.ws_ship_customer_sk + GROUP BY cd.c_customer_sk, cd.c_first_name, cd.c_last_name, cd.ca_city, cd.ca_state +), FinalReport AS ( + SELECT dbc.d_date AS Sales_Date, + dbc.total_orders, + dbc.total_sales, + sbc.c_customer_sk, + sbc.c_first_name, + sbc.c_last_name, + sbc.ca_city, + sbc.ca_state, + sbc.customer_sales + FROM DateFiltered dbc + JOIN SalesByCustomer sbc ON dbc.d_date = cast('2002-10-01' as date) +) +SELECT * +FROM FinalReport +ORDER BY total_sales DESC, customer_sales DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/27756.sql b/vortex-bench/sqlstorm/tpcds/27756.sql new file mode 100644 index 00000000000..b852dfb76ad --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/27756.sql @@ -0,0 +1,55 @@ + +WITH AddressDetails AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type) AS full_address, + ca_city, + ca_state, + ca_zip, + ca_country + FROM + customer_address +), +CustomerDetails AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +SalesData AS ( + SELECT + ws.ws_order_number, + ws.ws_sold_date_sk, + SUM(ws.ws_quantity) AS total_quantity, + SUM(ws.ws_ext_sales_price) AS total_sales + FROM + web_sales ws + GROUP BY + ws.ws_order_number, ws.ws_sold_date_sk +) +SELECT + cd.full_name, + cd.cd_gender, + cd.cd_marital_status, + ad.full_address, + ss.total_quantity, + ss.total_sales +FROM + CustomerDetails cd +JOIN + AddressDetails ad ON cd.c_customer_sk = ad.ca_address_sk +JOIN + SalesData ss ON cd.c_customer_sk = ss.ws_order_number +WHERE + cd.cd_gender = 'F' + AND ss.total_sales > 1000 +ORDER BY + ss.total_sales DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpcds/27983.sql b/vortex-bench/sqlstorm/tpcds/27983.sql new file mode 100644 index 00000000000..dba1c8a747b --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/27983.sql @@ -0,0 +1,58 @@ + +WITH AddressData AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type) AS full_address, + TRIM(ca_city) AS city_name, + ca_state, + ca_zip + FROM customer_address + WHERE ca_country = 'USA' +), +DemographicData AS ( + SELECT + cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count + FROM customer_demographics + WHERE cd_purchase_estimate > 1000 +), +SalesData AS ( + SELECT + ws_bill_customer_sk, + SUM(ws_ext_sales_price) AS total_sales + FROM web_sales + GROUP BY ws_bill_customer_sk +), +CustomerAddressSales AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + a.full_address, + a.city_name, + a.ca_state, + a.ca_zip, + d.cd_gender, + d.cd_marital_status, + d.cd_education_status, + s.total_sales + FROM customer c + JOIN AddressData a ON c.c_current_addr_sk = a.ca_address_sk + JOIN DemographicData d ON c.c_current_cdemo_sk = d.cd_demo_sk + LEFT JOIN SalesData s ON c.c_customer_sk = s.ws_bill_customer_sk +) +SELECT + city_name, + ca_state, + COUNT(*) AS customer_count, + AVG(total_sales) AS avg_sales, + SUM(total_sales) AS total_sales +FROM CustomerAddressSales +GROUP BY city_name, ca_state +HAVING COUNT(*) > 10 +ORDER BY total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/28029.sql b/vortex-bench/sqlstorm/tpcds/28029.sql new file mode 100644 index 00000000000..ca53b401bfa --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28029.sql @@ -0,0 +1,57 @@ + +WITH AddressInfo AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type) AS full_address, + ca_city, + ca_state, + ca_zip + FROM + customer_address +), +DemoInfo AS ( + SELECT + cd_demo_sk, + cd_gender, + CASE + WHEN cd_marital_status = 'M' THEN 'Married' + WHEN cd_marital_status = 'S' THEN 'Single' + ELSE 'Other' + END AS marital_status, + cd_education_status, + cd_purchase_estimate + FROM + customer_demographics +), +SalesData AS ( + SELECT + ws_bill_customer_sk AS customer_sk, + SUM(ws_sales_price) AS total_sales, + COUNT(ws_order_number) AS order_count + FROM + web_sales + GROUP BY + ws_bill_customer_sk +) +SELECT + c.c_customer_id, + c.c_first_name, + c.c_last_name, + a.full_address, + d.marital_status, + d.cd_gender, + s.total_sales, + s.order_count +FROM + customer c +JOIN + AddressInfo a ON c.c_current_addr_sk = a.ca_address_sk +JOIN + DemoInfo d ON c.c_current_cdemo_sk = d.cd_demo_sk +LEFT JOIN + SalesData s ON c.c_customer_sk = s.customer_sk +WHERE + (a.ca_state = 'CA' AND s.total_sales > 1000) + OR (d.marital_status = 'Married' AND d.cd_gender = 'F') +ORDER BY + total_sales DESC NULLS LAST; diff --git a/vortex-bench/sqlstorm/tpcds/28106.sql b/vortex-bench/sqlstorm/tpcds/28106.sql new file mode 100644 index 00000000000..d90a82882ba --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28106.sql @@ -0,0 +1,39 @@ + +SELECT + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + ca.ca_city, + ca.ca_state, + ca.ca_zip, + SUM(ss.ss_ext_sales_price) AS total_sales, + COUNT(DISTINCT ss.ss_ticket_number) AS number_of_purchases, + CASE + WHEN cd.cd_gender = 'M' THEN 'Male' + WHEN cd.cd_gender = 'F' THEN 'Female' + ELSE 'Other' + END AS gender, + cd.cd_marital_status, + cd.cd_education_status, + CASE + WHEN cd.cd_purchase_estimate > 5000 THEN 'High Spender' + WHEN cd.cd_purchase_estimate BETWEEN 1000 AND 5000 THEN 'Medium Spender' + ELSE 'Low Spender' + END AS spending_category +FROM + customer c +JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk +JOIN + date_dim d ON ss.ss_sold_date_sk = d.d_date_sk +WHERE + d.d_date BETWEEN '2023-01-01' AND '2023-12-31' +GROUP BY + c.c_first_name, c.c_last_name, ca.ca_city, ca.ca_state, ca.ca_zip, + cd.cd_gender, cd.cd_marital_status, cd.cd_education_status, + cd.cd_purchase_estimate +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/28184.sql b/vortex-bench/sqlstorm/tpcds/28184.sql new file mode 100644 index 00000000000..39ec049ec3d --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28184.sql @@ -0,0 +1,60 @@ + +WITH AddressDetails AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type) AS full_address, + ca_city, + ca_state, + ca_zip + FROM customer_address +), +Demographics AS ( + SELECT + cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count + FROM customer_demographics +), +CustomerInfo AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + ad.full_address, + d.cd_gender, + d.cd_marital_status, + d.cd_education_status + FROM customer c + JOIN AddressDetails ad ON c.c_current_addr_sk = ad.ca_address_sk + JOIN Demographics d ON c.c_current_cdemo_sk = d.cd_demo_sk +), +SalesStats AS ( + SELECT + ws_bill_customer_sk, + SUM(ws_quantity) AS total_quantity, + SUM(ws_net_paid) AS total_sales + FROM web_sales + GROUP BY ws_bill_customer_sk +) +SELECT + ci.c_customer_sk, + ci.c_first_name, + ci.c_last_name, + ci.full_address, + ci.cd_gender, + ci.cd_marital_status, + ss.total_quantity, + ss.total_sales, + (CASE + WHEN ss.total_sales IS NULL THEN 'No Sales' + WHEN ss.total_sales > 1000 THEN 'High Value Customer' + ELSE 'Regular Customer' + END) AS customer_status +FROM CustomerInfo ci +LEFT JOIN SalesStats ss ON ci.c_customer_sk = ss.ws_bill_customer_sk +ORDER BY ci.c_last_name, ci.c_first_name; diff --git a/vortex-bench/sqlstorm/tpcds/28216.sql b/vortex-bench/sqlstorm/tpcds/28216.sql new file mode 100644 index 00000000000..77c777be933 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28216.sql @@ -0,0 +1,48 @@ + +WITH RankedCustomers AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + ca.ca_city, + ca.ca_state, + ROW_NUMBER() OVER (PARTITION BY ca.ca_state ORDER BY c.c_customer_sk) AS state_rank + FROM customer c + JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk + WHERE ca.ca_city IS NOT NULL +), +FilteredCustomers AS ( + SELECT + full_name, + cd_gender, + cd_marital_status, + ca_city, + ca_state + FROM RankedCustomers + WHERE state_rank <= 10 +), +CustomerStats AS ( + SELECT + ca_state, + COUNT(*) AS total_customers, + STRING_AGG(full_name, ', ') AS customer_names, + COUNT(CASE WHEN cd_gender = 'F' THEN 1 END) AS female_count, + COUNT(CASE WHEN cd_marital_status = 'M' THEN 1 END) AS married_count + FROM FilteredCustomers + GROUP BY ca_state +) +SELECT + cs.ca_state, + cs.total_customers, + cs.customer_names, + cs.female_count, + cs.married_count, + CASE + WHEN cs.total_customers > 50 THEN 'Large' + WHEN cs.total_customers BETWEEN 20 AND 50 THEN 'Medium' + ELSE 'Small' + END AS customer_segment +FROM CustomerStats cs +ORDER BY cs.total_customers DESC; diff --git a/vortex-bench/sqlstorm/tpcds/28398.sql b/vortex-bench/sqlstorm/tpcds/28398.sql new file mode 100644 index 00000000000..af528277185 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28398.sql @@ -0,0 +1,62 @@ + +WITH AddressInfo AS ( + SELECT + ca_city, + ca_state, + COUNT(*) AS address_count, + STRING_AGG(DISTINCT CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type), ', ') AS street_info + FROM customer_address + GROUP BY ca_city, ca_state +), +CustomerGender AS ( + SELECT + cd_gender, + COUNT(*) AS gender_count + FROM customer_demographics + GROUP BY cd_gender +), +DateStats AS ( + SELECT + d_year, + COUNT(*) AS sales_count, + SUM(EXTRACT(DOY FROM d_date)) AS total_days_of_year + FROM date_dim + JOIN web_sales ON d_date_sk = ws_sold_date_sk + GROUP BY d_year +), +WarehouseInfo AS ( + SELECT + w_city, + AVG(w_warehouse_sq_ft) AS avg_warehouse_size + FROM warehouse + GROUP BY w_city +), +FinalBenchmark AS ( + SELECT + ai.ca_city, + ai.ca_state, + ai.address_count, + ai.street_info, + cg.cd_gender, + cg.gender_count, + ds.d_year, + ds.sales_count, + ds.total_days_of_year, + wi.w_city, + wi.avg_warehouse_size + FROM AddressInfo ai + JOIN CustomerGender cg ON TRUE + JOIN DateStats ds ON TRUE + JOIN WarehouseInfo wi ON wi.w_city = ai.ca_city +) +SELECT + CONCAT(ca_city, ', ', ca_state) AS location, + address_count, + street_info, + gender_count, + d_year, + sales_count, + ROUND(total_days_of_year::decimal / NULLIF(sales_count, 0), 2) AS avg_sales_per_day, + ROUND(avg_warehouse_size::decimal, 2) AS average_warehouse_size +FROM FinalBenchmark +ORDER BY location, d_year; diff --git a/vortex-bench/sqlstorm/tpcds/28686.sql b/vortex-bench/sqlstorm/tpcds/28686.sql new file mode 100644 index 00000000000..0afa1274524 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28686.sql @@ -0,0 +1,24 @@ + +SELECT + ca.ca_city, + ca.ca_state, + COUNT(DISTINCT c.c_customer_id) AS customer_count, + AVG(cd.cd_purchase_estimate) AS avg_purchase_estimate, + SUM(CASE WHEN cd.cd_gender = 'F' THEN 1 ELSE 0 END) AS female_customers, + SUM(CASE WHEN cd.cd_gender = 'M' THEN 1 ELSE 0 END) AS male_customers, + STRING_AGG(DISTINCT cd.cd_marital_status, ', ') AS marital_statuses, + STRING_AGG(DISTINCT cd.cd_education_status, ', ') AS education_levels +FROM + customer_address ca +JOIN + customer c ON ca.ca_address_sk = c.c_current_addr_sk +JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +WHERE + ca.ca_state IN ('CA', 'NY') +GROUP BY + ca.ca_city, + ca.ca_state +ORDER BY + customer_count DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpcds/28706.sql b/vortex-bench/sqlstorm/tpcds/28706.sql new file mode 100644 index 00000000000..0d3765db45f --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28706.sql @@ -0,0 +1,46 @@ + +WITH Address_Concat AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type, + CASE WHEN ca_suite_number IS NOT NULL THEN CONCAT(', Suite ', ca_suite_number) ELSE '' END) AS full_address, + ca_city, + ca_state, + ca_zip + FROM customer_address +), +Customer_Details AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + d.cd_gender, + d.cd_marital_status, + d.cd_education_status, + d.cd_purchase_estimate + FROM customer c + JOIN customer_demographics d ON c.c_current_cdemo_sk = d.cd_demo_sk +), +Sales_Info AS ( + SELECT + ws_bill_customer_sk, + SUM(ws_ext_sales_price) AS total_sales, + COUNT(DISTINCT ws_order_number) AS order_count + FROM web_sales + GROUP BY ws_bill_customer_sk +) +SELECT + c.full_name, + c.cd_gender, + c.cd_marital_status, + c.cd_education_status, + a.full_address, + a.ca_city, + a.ca_state, + a.ca_zip, + s.total_sales, + s.order_count +FROM Customer_Details c +JOIN Address_Concat a ON c.c_customer_sk = a.ca_address_sk +LEFT JOIN Sales_Info s ON c.c_customer_sk = s.ws_bill_customer_sk +WHERE c.cd_purchase_estimate > 1000 +ORDER BY total_sales DESC, c.full_name; diff --git a/vortex-bench/sqlstorm/tpcds/2874.sql b/vortex-bench/sqlstorm/tpcds/2874.sql new file mode 100644 index 00000000000..666136af436 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/2874.sql @@ -0,0 +1,50 @@ + +WITH recent_orders AS ( + SELECT + ws_order_number, + ws_ship_date_sk, + ws_quantity, + ws_net_paid_inc_tax, + ws_ext_sales_price, + ROW_NUMBER() OVER(PARTITION BY ws_order_number ORDER BY ws_ship_date_sk DESC) AS rn + FROM web_sales + WHERE ws_ship_date_sk > ( + SELECT MAX(d_date_sk) - 30 + FROM date_dim + ) +), +refunds AS ( + SELECT + cr_order_number, + SUM(cr_return_quantity) AS total_returned_quantity, + SUM(cr_return_amt_inc_tax) AS total_return_amount + FROM catalog_returns + GROUP BY cr_order_number +), +joined_data AS ( + SELECT + ro.ws_order_number, + ro.ws_net_paid_inc_tax, + ro.ws_quantity, + COALESCE(r.total_returned_quantity, 0) AS total_returned_quantity, + COALESCE(r.total_return_amount, 0) AS total_return_amount + FROM recent_orders ro + LEFT JOIN refunds r ON ro.ws_order_number = r.cr_order_number + WHERE ro.rn = 1 +), +final_summary AS ( + SELECT + SUM(ws_net_paid_inc_tax) AS total_sales, + SUM(total_returned_quantity) AS total_quantity_returned, + AVG(ws_net_paid_inc_tax) AS avg_order_value, + COUNT(ws_order_number) AS total_orders, + SUM(CASE WHEN ws_net_paid_inc_tax IS NULL THEN 1 ELSE 0 END) AS null_sales_count + FROM joined_data +) +SELECT + *, + total_sales - total_quantity_returned AS net_sales, + total_orders * 1.0 / NULLIF(total_sales, 0) AS order_sales_ratio +FROM final_summary +WHERE avg_order_value IS NOT NULL +ORDER BY net_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/28799.sql b/vortex-bench/sqlstorm/tpcds/28799.sql new file mode 100644 index 00000000000..f3d473a7c2f --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28799.sql @@ -0,0 +1,51 @@ + +WITH RankedCustomers AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_purchase_estimate, + ROW_NUMBER() OVER (PARTITION BY cd.cd_gender ORDER BY cd.cd_purchase_estimate DESC) AS rank + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +TopCustomers AS ( + SELECT + full_name, + cd_gender, + cd_marital_status, + cd_purchase_estimate, + c_customer_sk + FROM + RankedCustomers + WHERE + rank <= 10 +), +CustomerAddresses AS ( + SELECT + c.c_customer_sk, + ca.ca_street_number, + ca.ca_street_name, + ca.ca_city, + ca.ca_state, + CONCAT(ca.ca_street_number, ' ', ca.ca_street_name, ', ', ca.ca_city, ', ', ca.ca_state) AS full_address + FROM + customer c + JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +) +SELECT + tc.full_name, + tc.cd_gender, + tc.cd_marital_status, + tc.cd_purchase_estimate, + ca.full_address +FROM + TopCustomers tc +JOIN + CustomerAddresses ca ON ca.c_customer_sk = tc.c_customer_sk +ORDER BY + tc.cd_purchase_estimate DESC; diff --git a/vortex-bench/sqlstorm/tpcds/28943.sql b/vortex-bench/sqlstorm/tpcds/28943.sql new file mode 100644 index 00000000000..8c8e5538efb --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/28943.sql @@ -0,0 +1,60 @@ + +WITH CustomerInfo AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_purchase_estimate, + cd.cd_credit_rating, + CONCAT(ca.ca_street_number, ' ', ca.ca_street_name, ' ', ca.ca_street_type) AS full_address, + ca.ca_city, + ca.ca_state, + ca.ca_zip, + ca.ca_country + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +), +AggregateData AS ( + SELECT + COUNT(*) AS total_customers, + COUNT(DISTINCT full_name) AS unique_customers, + AVG(cd_purchase_estimate) AS avg_purchase_estimate, + MAX(cd_purchase_estimate) AS max_purchase_estimate, + MIN(cd_purchase_estimate) AS min_purchase_estimate, + cd_gender + FROM + CustomerInfo + GROUP BY + cd_gender +), +DetailedAddress AS ( + SELECT + full_name, + full_address + FROM + CustomerInfo + WHERE + ca_state = 'CA' + ORDER BY + ca_city +) +SELECT + a.total_customers, + a.unique_customers, + a.avg_purchase_estimate, + a.max_purchase_estimate, + a.min_purchase_estimate, + a.cd_gender, + d.full_name, + d.full_address +FROM + AggregateData a +JOIN + DetailedAddress d ON 1 = 1 +ORDER BY + a.cd_gender, d.full_name; diff --git a/vortex-bench/sqlstorm/tpcds/2898.sql b/vortex-bench/sqlstorm/tpcds/2898.sql new file mode 100644 index 00000000000..2eb41350cce --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/2898.sql @@ -0,0 +1,69 @@ +WITH SalesData AS ( + SELECT + ws.ws_item_sk, + ws.ws_sales_price, + ws.ws_quantity, + ws.ws_net_profit, + d.d_year, + d.d_month_seq, + d.d_week_seq, + ROW_NUMBER() OVER (PARTITION BY d.d_year, d.d_month_seq ORDER BY ws.ws_net_profit DESC) AS rank_profit + FROM + web_sales ws + JOIN date_dim d ON ws.ws_sold_date_sk = d.d_date_sk +), +CustomerDemo AS ( + SELECT + cd.cd_demo_sk, + cd.cd_gender, + cd.cd_marital_status, + hd.hd_income_band_sk + FROM + customer_demographics cd + LEFT JOIN household_demographics hd ON cd.cd_demo_sk = hd.hd_demo_sk +), +AggregatedSales AS ( + SELECT + cs.cs_item_sk, + SUM(cs.cs_quantity) AS total_quantity, + SUM(cs.cs_net_profit) AS total_profit + FROM + catalog_sales cs + GROUP BY + cs.cs_item_sk +) + +SELECT + ca.ca_address_id, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + SUM(sd.ws_quantity) AS total_web_sales_quantity, + AVG(sd.ws_sales_price) AS avg_web_sales_price, + COALESCE(SUM(sd.ws_net_profit), 0) AS total_web_profit, + r.r_reason_desc, + CASE + WHEN cd.cd_marital_status = 'M' THEN 'Married' + WHEN cd.cd_marital_status IS NULL THEN 'Unknown' + ELSE 'Single' + END AS marital_status, + CASE + WHEN sd.rank_profit <= 10 THEN 'Top Profit' + ELSE 'Regular Sales' + END AS sale_category +FROM + SalesData sd + LEFT JOIN customer c ON sd.ws_item_sk = c.c_customer_sk + LEFT JOIN customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk + LEFT JOIN CustomerDemo cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN reason r ON r.r_reason_sk = sd.ws_item_sk + LEFT JOIN AggregatedSales ag ON ag.cs_item_sk = sd.ws_item_sk +WHERE + ca.ca_country = 'USA' + AND (cd.cd_gender = 'F' OR cd.cd_gender IS NULL) +GROUP BY + ca.ca_address_id, c.c_first_name, c.c_last_name, cd.cd_gender, r.r_reason_desc, cd.cd_marital_status, sd.rank_profit +HAVING + SUM(sd.ws_quantity) > 100 +ORDER BY + total_web_profit DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/29023.sql b/vortex-bench/sqlstorm/tpcds/29023.sql new file mode 100644 index 00000000000..6db1e44fc1a --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/29023.sql @@ -0,0 +1,59 @@ + +WITH CustomerInfo AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_salutation, ' ', c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + ca.ca_city, + ca.ca_state + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +), +SalesInfo AS ( + SELECT + SUM(ss.ss_quantity) AS total_quantity, + SUM(ss.ss_sales_price) AS total_sales, + ss.ss_customer_sk + FROM + store_sales ss + GROUP BY + ss.ss_customer_sk +), +InfoWithSales AS ( + SELECT + ci.full_name, + ci.cd_gender, + ci.cd_marital_status, + ci.cd_education_status, + ci.ca_city, + ci.ca_state, + COALESCE(si.total_quantity, 0) AS total_quantity, + COALESCE(si.total_sales, 0) AS total_sales + FROM + CustomerInfo ci + LEFT JOIN + SalesInfo si ON ci.c_customer_sk = si.ss_customer_sk +) +SELECT + cd_gender, + cd_marital_status, + COUNT(*) AS customer_count, + AVG(total_quantity) AS avg_quantity, + AVG(total_sales) AS avg_sales, + ca_state +FROM + InfoWithSales +GROUP BY + cd_gender, + cd_marital_status, + ca_state +ORDER BY + ca_state, + cd_gender, + cd_marital_status; diff --git a/vortex-bench/sqlstorm/tpcds/29196.sql b/vortex-bench/sqlstorm/tpcds/29196.sql new file mode 100644 index 00000000000..b0172e5a86b --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/29196.sql @@ -0,0 +1,64 @@ + +WITH CustomerDetails AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS customer_name, + addr.ca_city, + addr.ca_state, + dem.cd_gender, + dem.cd_marital_status, + dem.cd_education_status, + dem.cd_purchase_estimate + FROM + customer c + JOIN + customer_address addr ON c.c_current_addr_sk = addr.ca_address_sk + JOIN + customer_demographics dem ON c.c_current_cdemo_sk = dem.cd_demo_sk +), +SalesSummary AS ( + SELECT + cd.c_customer_sk, + SUM(ws.ws_sales_price) AS total_sales, + COUNT(ws.ws_order_number) AS total_orders + FROM + web_sales ws + JOIN + CustomerDetails cd ON ws.ws_bill_customer_sk = cd.c_customer_sk + GROUP BY + cd.c_customer_sk +), +FinalReport AS ( + SELECT + cd.customer_name, + cd.ca_city, + cd.ca_state, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + ss.total_sales, + ss.total_orders + FROM + CustomerDetails cd + LEFT JOIN + SalesSummary ss ON cd.c_customer_sk = ss.c_customer_sk +) +SELECT + customer_name, + ca_city, + ca_state, + cd_gender, + cd_marital_status, + cd_education_status, + COALESCE(total_sales, 0) AS total_sales, + COALESCE(total_orders, 0) AS total_orders, + CASE + WHEN COALESCE(total_sales, 0) = 0 THEN 'No Sales' + WHEN COALESCE(total_sales, 0) < 100 THEN 'Low Sales' + WHEN COALESCE(total_sales, 0) BETWEEN 100 AND 500 THEN 'Moderate Sales' + ELSE 'High Sales' + END AS sales_category +FROM + FinalReport +ORDER BY + total_sales DESC, customer_name; diff --git a/vortex-bench/sqlstorm/tpcds/29246.sql b/vortex-bench/sqlstorm/tpcds/29246.sql new file mode 100644 index 00000000000..97b57641a4b --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/29246.sql @@ -0,0 +1,56 @@ + +WITH StringBenchmark AS ( + SELECT + c.c_customer_id, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + LENGTH(c.c_first_name) AS first_name_length, + LENGTH(c.c_last_name) AS last_name_length, + UPPER(c.c_first_name) AS upper_first_name, + LOWER(c.c_last_name) AS lower_last_name, + SUBSTRING(c.c_email_address, POSITION('@' IN c.c_email_address) + 1) AS email_domain, + REPLACE(c.c_email_address, '.', '-') AS modified_email, + REGEXP_REPLACE(c.c_email_address, '^[^@]+', 'user') AS anonymized_email, + CASE + WHEN LENGTH(c.c_email_address) > 40 THEN 'Long Email' + ELSE 'Short Email' + END AS email_length_category, + ROW_NUMBER() OVER (PARTITION BY c.c_customer_id ORDER BY c.c_customer_sk) AS customer_rank + FROM + customer c + WHERE + c.c_birth_year BETWEEN 1980 AND 2000 +), +StringCounts AS ( + SELECT + full_name, + COUNT(*) AS name_count, + MAX(first_name_length) AS max_first_name_length, + MIN(last_name_length) AS min_last_name_length, + SUM(CASE WHEN email_length_category = 'Long Email' THEN 1 ELSE 0 END) AS long_email_count + FROM + StringBenchmark + GROUP BY + full_name +) +SELECT + sb.full_name, + sb.first_name_length, + sb.last_name_length, + sc.name_count, + sc.max_first_name_length, + sc.min_last_name_length, + sc.long_email_count, + sb.upper_first_name, + sb.lower_last_name, + sb.email_domain, + sb.modified_email, + sb.anonymized_email, + sb.customer_rank +FROM + StringBenchmark sb +JOIN + StringCounts sc ON sb.full_name = sc.full_name +ORDER BY + sb.first_name_length DESC, + sb.last_name_length ASC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/29332.sql b/vortex-bench/sqlstorm/tpcds/29332.sql new file mode 100644 index 00000000000..18532ff3338 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/29332.sql @@ -0,0 +1,55 @@ + +WITH AddressDetails AS ( + SELECT + ca_address_sk, + CONCAT(ca_street_number, ' ', ca_street_name, ' ', ca_street_type, + CASE WHEN ca_suite_number IS NOT NULL THEN CONCAT(' Suite ', ca_suite_number) ELSE '' END) AS Full_Address, + ca_city, + ca_state, + ca_zip, + ca_country + FROM customer_address +), +CustomerAggregates AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS Full_Name, + cd.cd_gender, + cd.cd_marital_status, + COUNT(hd.hd_income_band_sk) AS Income_Band_Count + FROM customer c + JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN household_demographics hd ON hd.hd_demo_sk = c.c_current_hdemo_sk + GROUP BY c.c_customer_sk, c.c_first_name, c.c_last_name, cd.cd_gender, cd.cd_marital_status +), +DateStatistics AS ( + SELECT + d.d_year, + COUNT(ws.ws_order_number) AS Total_Orders, + SUM(ws.ws_ext_sales_price) AS Total_Sales + FROM web_sales ws + JOIN date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + GROUP BY d.d_year +) +SELECT + ca.Full_Address, + ca.ca_city, + ca.ca_state, + ca.ca_zip, + ca.ca_country, + cu.Full_Name, + cu.cd_gender, + cu.cd_marital_status, + da.d_year, + da.Total_Orders, + da.Total_Sales, + CASE + WHEN da.Total_Sales > 100000 THEN 'High Revenue' + WHEN da.Total_Sales BETWEEN 50000 AND 100000 THEN 'Moderate Revenue' + ELSE 'Low Revenue' + END AS Revenue_Category +FROM AddressDetails ca +JOIN CustomerAggregates cu ON cu.c_customer_sk = ca.ca_address_sk +JOIN DateStatistics da ON da.d_year = EXTRACT(YEAR FROM DATE '2002-10-01') +WHERE ca.ca_state = 'CA' +ORDER BY da.Total_Sales DESC, cu.Full_Name; diff --git a/vortex-bench/sqlstorm/tpcds/29896.sql b/vortex-bench/sqlstorm/tpcds/29896.sql new file mode 100644 index 00000000000..b17ef519969 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/29896.sql @@ -0,0 +1,66 @@ + +WITH customer_info AS ( + SELECT + c.c_customer_sk, + CONCAT(c.c_first_name, ' ', c.c_last_name) AS full_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate, + ca.ca_city, + ca.ca_state, + ca.ca_country + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +), +sales_info AS ( + SELECT + ws.ws_bill_customer_sk AS customer_sk, + SUM(ws.ws_ext_sales_price) AS total_sales, + COUNT(ws.ws_order_number) AS total_orders + FROM + web_sales ws + GROUP BY + ws.ws_bill_customer_sk +), +combined_info AS ( + SELECT + ci.full_name, + ci.cd_gender, + ci.cd_marital_status, + ci.cd_education_status, + ci.cd_purchase_estimate, + ci.ca_city, + ci.ca_state, + ci.ca_country, + si.total_sales, + si.total_orders + FROM + customer_info ci + LEFT JOIN + sales_info si ON ci.c_customer_sk = si.customer_sk +) +SELECT + full_name, + cd_gender, + cd_marital_status, + cd_education_status, + COALESCE(total_sales, 0) AS total_sales, + COALESCE(total_orders, 0) AS total_orders, + CONCAT('Location: ', ca_city, ', ', ca_state, ', ', ca_country) AS location_summary, + CASE + WHEN total_sales > 10000 THEN 'High Value Customer' + WHEN total_sales BETWEEN 5000 AND 10000 THEN 'Medium Value Customer' + ELSE 'Low Value Customer' + END AS customer_segment +FROM + combined_info +WHERE + cd_gender = 'F' +ORDER BY + total_sales DESC, + full_name; diff --git a/vortex-bench/sqlstorm/tpcds/30175.sql b/vortex-bench/sqlstorm/tpcds/30175.sql new file mode 100644 index 00000000000..98739b733a1 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/30175.sql @@ -0,0 +1,63 @@ + +WITH RECURSIVE sales_totals AS ( + SELECT + ws_sold_date_sk, + ws_item_sk, + SUM(ws_ext_sales_price) AS total_sales, + ROW_NUMBER() OVER (PARTITION BY ws_item_sk ORDER BY ws_sold_date_sk DESC) AS rnk + FROM + web_sales + GROUP BY + ws_sold_date_sk, ws_item_sk +), +customer_sales AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + COUNT(DISTINCT ws.ws_order_number) AS web_order_count, + AVG(COALESCE(ws.ws_net_paid, 0)) AS avg_net_paid + FROM + customer c + LEFT JOIN web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + GROUP BY + c.c_customer_sk, c.c_first_name, c.c_last_name +), +latest_shipping AS ( + SELECT + ws_ship_customer_sk, + sm.sm_type, + COUNT(DISTINCT ws_order_number) AS order_count + FROM + web_sales + JOIN ship_mode sm ON ws_ship_mode_sk = sm.sm_ship_mode_sk + GROUP BY + ws_ship_customer_sk, sm.sm_type +), +inventory_summary AS ( + SELECT + inv_item_sk, + SUM(inv_quantity_on_hand) AS total_inventory + FROM + inventory + GROUP BY + inv_item_sk +) +SELECT + cs.c_customer_sk, + cs.c_first_name, + cs.c_last_name, + lt.sm_type, + lt.order_count, + COALESCE(st.total_sales, 0) AS web_sales_total, + inv.total_inventory +FROM + customer_sales cs +LEFT JOIN latest_shipping lt ON cs.c_customer_sk = lt.ws_ship_customer_sk +LEFT JOIN sales_totals st ON cs.c_customer_sk = st.ws_item_sk +LEFT JOIN inventory_summary inv ON cs.c_customer_sk = inv.inv_item_sk +WHERE + cs.web_order_count > 5 + AND (lt.order_count IS NULL OR lt.order_count > 2) +ORDER BY + cs.c_last_name, cs.c_first_name; diff --git a/vortex-bench/sqlstorm/tpcds/30217.sql b/vortex-bench/sqlstorm/tpcds/30217.sql new file mode 100644 index 00000000000..6bad031ffcb --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/30217.sql @@ -0,0 +1,74 @@ + +WITH RECURSIVE sales_hierarchy AS ( + SELECT + c.c_customer_id, + c.c_current_cdemo_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + cd.cd_marital_status, + SUM(ss.ss_net_profit) AS total_sales + FROM + customer AS c + LEFT JOIN + customer_demographics AS cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN + store_sales AS ss ON c.c_customer_sk = ss.ss_customer_sk + GROUP BY + c.c_customer_id, c.c_current_cdemo_sk, c.c_first_name, c.c_last_name, cd.cd_gender, cd.cd_marital_status + HAVING + SUM(ss.ss_net_profit) > 1000 +), +recent_sales AS ( + SELECT + ws.ws_order_number, + ws.ws_sold_date_sk, + ws.ws_item_sk, + ws.ws_net_paid, + w.w_warehouse_name + FROM + web_sales AS ws + JOIN + warehouse AS w ON ws.ws_warehouse_sk = w.w_warehouse_sk + WHERE + ws.ws_sold_date_sk >= ( + SELECT MAX(d.d_date_sk) - 30 + FROM date_dim AS d + ) +), +promotions AS ( + SELECT + p.p_promo_name, + p.p_discount_active, + COUNT(DISTINCT ws.ws_order_number) AS order_count + FROM + promotion AS p + JOIN + web_sales AS ws ON p.p_promo_sk = ws.ws_promo_sk + WHERE + p.p_discount_active = 'Y' + GROUP BY + p.p_promo_name, p.p_discount_active + ORDER BY + order_count DESC +) +SELECT + sh.c_first_name, + sh.c_last_name, + sh.cd_gender, + sh.total_sales, + rs.ws_order_number, + rs.ws_net_paid, + pr.p_promo_name, + pr.order_count +FROM + sales_hierarchy AS sh +LEFT JOIN + recent_sales AS rs ON sh.c_current_cdemo_sk = rs.ws_item_sk +LEFT JOIN + promotions AS pr ON rs.ws_order_number = pr.order_count +WHERE + sh.total_sales IS NOT NULL +ORDER BY + sh.total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/30348.sql b/vortex-bench/sqlstorm/tpcds/30348.sql new file mode 100644 index 00000000000..eabacf2dae4 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/30348.sql @@ -0,0 +1,68 @@ + +WITH RECURSIVE month_sales AS ( + SELECT + d.d_year, + d.d_month_seq, + SUM(ws.ws_ext_sales_price) AS total_sales + FROM + date_dim d + JOIN + web_sales ws ON d.d_date_sk = ws.ws_sold_date_sk + GROUP BY + d.d_year, d.d_month_seq + UNION ALL + SELECT + d.d_year, + ms.d_month_seq + 1, + SUM(ws.ws_ext_sales_price) AS total_sales + FROM + month_sales ms + JOIN + date_dim d ON ms.d_year = d.d_year AND ms.d_month_seq + 1 = d.d_month_seq + JOIN + web_sales ws ON d.d_date_sk = ws.ws_sold_date_sk + WHERE + ms.d_month_seq < 12 + GROUP BY + d.d_year, ms.d_month_seq + 1 +), +customer_analysis AS ( + SELECT + c.c_customer_id, + cd.cd_gender, + COUNT(DISTINCT ws.ws_order_number) AS order_count, + SUM(ws.ws_net_profit) AS total_profit + FROM + customer c + LEFT JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + GROUP BY + c.c_customer_id, cd.cd_gender +), +top_customers AS ( + SELECT + c.c_customer_id, + ca.total_profit, + ROW_NUMBER() OVER (ORDER BY ca.total_profit DESC) AS rank + FROM + customer_analysis ca + JOIN + customer c ON ca.c_customer_id = c.c_customer_id + WHERE + ca.total_profit > 1000 +) +SELECT + ms.d_year, + ms.d_month_seq, + SUM(ms.total_sales) AS monthly_sales, + SUM(tc.total_profit) AS top_customer_profit +FROM + month_sales ms +JOIN + top_customers tc ON ms.d_year = 2023 +GROUP BY + ms.d_year, ms.d_month_seq +ORDER BY + ms.d_month_seq; diff --git a/vortex-bench/sqlstorm/tpcds/3107.sql b/vortex-bench/sqlstorm/tpcds/3107.sql new file mode 100644 index 00000000000..b5caab00e97 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/3107.sql @@ -0,0 +1,49 @@ + +WITH RECURSIVE Revenue_CTE AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_ext_sales_price) AS total_revenue, + COUNT(DISTINCT ws.ws_order_number) AS total_sales, + DENSE_RANK() OVER (ORDER BY SUM(ws.ws_ext_sales_price) DESC) AS rank + FROM + web_sales ws + JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + WHERE + d.d_year = 2023 + GROUP BY + ws.ws_item_sk +), +Customer_Stats AS ( + SELECT + c.c_customer_sk, + cd.cd_gender, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + COALESCE(SUM(ws.ws_net_paid), 0) AS total_spent, + SUM(CASE WHEN ws.ws_ship_mode_sk IS NOT NULL THEN ws.ws_quantity ELSE 0 END) AS shipped_quantity + FROM + customer c + LEFT JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + LEFT JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + GROUP BY + c.c_customer_sk, cd.cd_gender +) +SELECT + cs.c_customer_sk, + cs.cd_gender, + cs.total_orders, + cs.total_spent, + rc.total_revenue, + rc.total_sales +FROM + Customer_Stats cs +LEFT JOIN + Revenue_CTE rc ON cs.c_customer_sk = rc.ws_item_sk +WHERE + cs.total_orders > 0 + AND rc.total_revenue IS NOT NULL +ORDER BY + cs.total_spent DESC, rc.total_revenue DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/31149.sql b/vortex-bench/sqlstorm/tpcds/31149.sql new file mode 100644 index 00000000000..b469dc87b58 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/31149.sql @@ -0,0 +1,46 @@ + +WITH RECURSIVE sales_summary AS ( + SELECT + ws_bill_customer_sk, + SUM(ws_net_profit) AS total_net_profit, + COUNT(ws_order_number) AS order_count, + ROW_NUMBER() OVER (PARTITION BY ws_bill_customer_sk ORDER BY SUM(ws_net_profit) DESC) AS rn + FROM + web_sales + GROUP BY + ws_bill_customer_sk +), +customer_details AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_purchase_estimate, + a.ca_state + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN + customer_address a ON c.c_current_addr_sk = a.ca_address_sk +) +SELECT + cd.c_first_name, + cd.c_last_name, + cd.cd_gender, + cd.cd_marital_status, + COALESCE(ss.total_net_profit, 0) AS total_net_profit, + COALESCE(ss.order_count, 0) AS order_count, + cd.ca_state +FROM + customer_details cd +LEFT JOIN + sales_summary ss ON cd.c_customer_sk = ss.ws_bill_customer_sk AND ss.rn = 1 +WHERE + (cd.cd_purchase_estimate > 1000 OR cd.cd_marital_status = 'M') + AND cd.ca_state IS NOT NULL +ORDER BY + total_net_profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/31465.sql b/vortex-bench/sqlstorm/tpcds/31465.sql new file mode 100644 index 00000000000..260bdc57d8b --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/31465.sql @@ -0,0 +1,73 @@ + +WITH RECURSIVE SalesCTE AS ( + SELECT + ws_order_number, + ws_item_sk, + ws_quantity, + ws_sales_price, + ws_net_profit, + 1 AS Level + FROM + web_sales + WHERE + ws_sold_date_sk BETWEEN 1 AND 1000 + + UNION ALL + + SELECT + cs_order_number, + cs_item_sk, + cs_quantity, + cs_sales_price, + cs_net_profit, + Level + 1 + FROM + catalog_sales cs + JOIN + SalesCTE s ON cs.cs_order_number = s.ws_order_number + WHERE + cs_order_number IS NOT NULL +), +TopCustomers AS ( + SELECT + c.c_customer_id, + SUM(s.ws_net_profit) AS total_profit, + COUNT(s.ws_order_number) AS order_count + FROM + customer c + LEFT JOIN + web_sales s ON c.c_customer_sk = s.ws_bill_customer_sk + GROUP BY + c.c_customer_id + HAVING + SUM(s.ws_net_profit) > 1000 +), +SalesSummary AS ( + SELECT + d.d_year, + SUM(s.ws_quantity) AS total_quantity, + AVG(s.ws_sales_price) AS avg_sales_price, + SUM(s.ws_net_profit) AS total_net_profit + FROM + date_dim d + JOIN + web_sales s ON d.d_date_sk = s.ws_sold_date_sk + GROUP BY + d.d_year +) +SELECT + t.c_customer_id, + t.total_profit, + COALESCE(s.total_quantity, 0) AS total_quantity, + COALESCE(s.avg_sales_price, 0) AS avg_sales_price, + s.total_net_profit, + ROW_NUMBER() OVER (ORDER BY t.total_profit DESC) AS rank +FROM + TopCustomers t +LEFT JOIN + SalesSummary s ON t.order_count = s.total_quantity +WHERE + t.order_count > 5 +ORDER BY + t.total_profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/31517.sql b/vortex-bench/sqlstorm/tpcds/31517.sql new file mode 100644 index 00000000000..491b7b8c07c --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/31517.sql @@ -0,0 +1,39 @@ + +WITH RECURSIVE SalesCTE AS ( + SELECT ss_sold_date_sk, ss_item_sk, ss_quantity, ss_net_paid, ss_store_sk, + ROW_NUMBER() OVER (PARTITION BY ss_item_sk ORDER BY ss_sold_date_sk DESC) AS rn + FROM store_sales + WHERE ss_sold_date_sk IN (SELECT d_date_sk FROM date_dim WHERE d_year = 2022) +), +TopSales AS ( + SELECT ss_item_sk, SUM(ss_quantity) AS total_quantity, SUM(ss_net_paid) AS total_revenue + FROM SalesCTE + WHERE rn = 1 + GROUP BY ss_item_sk +), +CustomerReturns AS ( + SELECT sr_item_sk, SUM(sr_return_quantity) AS return_quantity, SUM(sr_return_amt) AS total_return_amt + FROM store_returns + WHERE sr_returned_date_sk IN (SELECT d_date_sk FROM date_dim WHERE d_year = 2022) + GROUP BY sr_item_sk +), +CombinedSales AS ( + SELECT ts.ss_item_sk, + ts.total_quantity, + ts.total_revenue, + COALESCE(cr.return_quantity, 0) AS total_return_quantity, + COALESCE(cr.total_return_amt, 0) AS total_return_amount, + (ts.total_revenue - COALESCE(cr.total_return_amt, 0)) AS net_revenue + FROM TopSales ts + LEFT JOIN CustomerReturns cr ON ts.ss_item_sk = cr.sr_item_sk +), +RankedSales AS ( + SELECT *, + RANK() OVER (ORDER BY net_revenue DESC) AS revenue_rank + FROM CombinedSales +) +SELECT i.i_item_id, i.i_item_desc, cs.total_quantity, cs.total_revenue, + cs.total_return_quantity, cs.total_return_amount, cs.net_revenue, cs.revenue_rank +FROM RankedSales cs +JOIN item i ON cs.ss_item_sk = i.i_item_sk +WHERE cs.revenue_rank <= 10; diff --git a/vortex-bench/sqlstorm/tpcds/32267.sql b/vortex-bench/sqlstorm/tpcds/32267.sql new file mode 100644 index 00000000000..faad6671d80 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/32267.sql @@ -0,0 +1,75 @@ + +WITH RECURSIVE SalesCTE AS ( + SELECT + ss_customer_sk, + SUM(ss_net_paid) AS total_sales, + COUNT(ss_ticket_number) AS sales_count, + RANK() OVER (ORDER BY SUM(ss_net_paid) DESC) AS sales_rank + FROM + store_sales + WHERE + ss_sold_date_sk >= (SELECT MIN(d_date_sk) FROM date_dim WHERE d_year = 2023) + AND ss_sold_date_sk <= (SELECT MAX(d_date_sk) FROM date_dim WHERE d_year = 2023) + GROUP BY + ss_customer_sk +), +AddressCTE AS ( + SELECT + ca_address_sk, + ca_city, + ca_state, + ca_country, + ROW_NUMBER() OVER (PARTITION BY ca_state ORDER BY ca_city) AS city_rank + FROM + customer_address +), +HighValueCustomers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + sd.total_sales, + sd.sales_count, + a.ca_city, + a.ca_state, + a.ca_country + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + SalesCTE sd ON c.c_customer_sk = sd.ss_customer_sk + LEFT JOIN + AddressCTE a ON c.c_current_addr_sk = a.ca_address_sk + WHERE + sd.total_sales > (SELECT AVG(total_sales) FROM SalesCTE) + AND cd.cd_marital_status = 'M' + AND cd.cd_gender = 'F' +), +TopCities AS ( + SELECT + ca_city, + COUNT(DISTINCT c_customer_sk) AS customer_count, + SUM(total_sales) AS city_sales + FROM + HighValueCustomers + GROUP BY + ca_city + HAVING + COUNT(DISTINCT c_customer_sk) > 10 +) +SELECT + tc.ca_city, + tc.customer_count, + tc.city_sales, + CASE + WHEN tc.city_sales > 10000 THEN 'High' + WHEN tc.city_sales BETWEEN 1000 AND 10000 THEN 'Medium' + ELSE 'Low' + END AS sales_category +FROM + TopCities tc +ORDER BY + tc.city_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/3258.sql b/vortex-bench/sqlstorm/tpcds/3258.sql new file mode 100644 index 00000000000..926bd5f6dff --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/3258.sql @@ -0,0 +1,33 @@ +WITH CustomerStats AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cd.cd_gender, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + SUM(ws.ws_net_paid_inc_tax) AS total_spent, + DENSE_RANK() OVER (PARTITION BY cd.cd_gender ORDER BY SUM(ws.ws_net_paid_inc_tax) DESC) AS spent_rank + FROM customer c + JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + GROUP BY c.c_customer_sk, c.c_first_name, c.c_last_name, cd.cd_gender +), +HighSpenders AS ( + SELECT + cs.*, + CASE + WHEN cs.spent_rank <= 5 THEN 'Top 5' + ELSE 'Not Top 5' + END AS spender_category + FROM CustomerStats cs +) +SELECT + h.c_first_name, + h.c_last_name, + h.total_orders, + h.total_spent, + COALESCE(h.spender_category, 'Unknown') AS spender_category, + REPLACE(h.c_first_name || ' ' || h.c_last_name, ' ', '_') AS formatted_name +FROM HighSpenders h +WHERE h.spent_rank <= 10 +ORDER BY h.total_spent DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/32971.sql b/vortex-bench/sqlstorm/tpcds/32971.sql new file mode 100644 index 00000000000..4c8e820ce40 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/32971.sql @@ -0,0 +1,59 @@ + +WITH RECURSIVE SalesCTE AS ( + SELECT + ws_item_sk, + SUM(ws_quantity) AS total_quantity, + SUM(ws_net_paid) AS total_sales + FROM web_sales + WHERE ws_sold_date_sk BETWEEN 2458849 AND 2458879 + GROUP BY ws_item_sk + + UNION ALL + + SELECT + s.ss_item_sk, + SUM(s.ss_quantity) AS total_quantity, + SUM(s.ss_net_paid) AS total_sales + FROM store_sales s + JOIN SalesCTE cte ON s.ss_item_sk = cte.ws_item_sk + WHERE s.ss_sold_date_sk BETWEEN 2458849 AND 2458879 + GROUP BY s.ss_item_sk +), + +TotalCustomerReturns AS ( + SELECT + sr_item_sk, + SUM(sr_return_quantity) AS total_return_quantity, + SUM(sr_return_amt_inc_tax) AS total_return_amount + FROM store_returns + GROUP BY sr_item_sk +), + +RankedSales AS ( + SELECT + cte.ws_item_sk, + cte.total_quantity, + cte.total_sales, + COALESCE(tr.total_return_quantity, 0) AS total_return_quantity, + COALESCE(tr.total_return_amount, 0) AS total_return_amount, + ROW_NUMBER() OVER (ORDER BY cte.total_sales DESC) AS sales_rank + FROM SalesCTE cte + LEFT JOIN TotalCustomerReturns tr ON cte.ws_item_sk = tr.sr_item_sk +) + +SELECT + i.i_item_id, + i.i_item_desc, + rs.total_quantity, + rs.total_sales, + rs.total_return_quantity, + rs.total_return_amount, + CASE + WHEN rs.total_sales > 1000 THEN 'High' + WHEN rs.total_sales > 500 THEN 'Medium' + ELSE 'Low' + END AS sales_category +FROM RankedSales rs +JOIN item i ON rs.ws_item_sk = i.i_item_sk +WHERE rs.sales_rank <= 10 +ORDER BY rs.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/33517.sql b/vortex-bench/sqlstorm/tpcds/33517.sql new file mode 100644 index 00000000000..3d28e0b40fc --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/33517.sql @@ -0,0 +1,84 @@ + +WITH RECURSIVE SalesCTE AS ( + SELECT + ws_item_sk, + COUNT(ws_order_number) AS total_sales, + SUM(ws_ext_sales_price) AS total_revenue + FROM + web_sales + GROUP BY + ws_item_sk + UNION ALL + SELECT + cs_item_sk, + COUNT(cs_order_number) AS total_sales, + SUM(cs_ext_sales_price) AS total_revenue + FROM + catalog_sales + GROUP BY + cs_item_sk +), +SalesSummary AS ( + SELECT + item.i_item_id, + item.i_item_desc, + COALESCE(SUM(s.total_sales), 0) AS total_sales, + COALESCE(SUM(s.total_revenue), 0) AS total_revenue + FROM + item + LEFT JOIN + SalesCTE s ON item.i_item_sk = s.ws_item_sk + GROUP BY + item.i_item_id, + item.i_item_desc +), +CustomerData AS ( + SELECT + c.c_customer_sk, + d.d_year, + cd.cd_gender, + SUM(ws.ws_net_paid) AS total_spending + FROM + customer c + JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + WHERE + d.d_year >= 2021 + GROUP BY + c.c_customer_sk, + d.d_year, + cd.cd_gender +), +HighValueCustomers AS ( + SELECT + cd.d_year, + cd.cd_gender, + COUNT(DISTINCT cd.c_customer_sk) AS high_value_count + FROM + CustomerData cd + WHERE + cd.total_spending > (SELECT AVG(total_spending) FROM CustomerData) + GROUP BY + cd.d_year, + cd.cd_gender +) +SELECT + ss.i_item_id, + ss.i_item_desc, + ss.total_sales, + ss.total_revenue, + hvc.d_year, + hvc.cd_gender, + hvc.high_value_count +FROM + SalesSummary ss +LEFT JOIN + HighValueCustomers hvc ON ss.total_sales > 100 +ORDER BY + ss.total_revenue DESC, + hvc.high_value_count DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpcds/33618.sql b/vortex-bench/sqlstorm/tpcds/33618.sql new file mode 100644 index 00000000000..cfd9df76bce --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/33618.sql @@ -0,0 +1,82 @@ + +WITH RECURSIVE CustomerReturns AS ( + SELECT + cr_returning_customer_sk, + SUM(cr_return_quantity) AS total_return_quantity, + SUM(cr_return_amount) AS total_return_amount + FROM + catalog_returns + WHERE + cr_returned_date_sk IN (SELECT d_date_sk FROM date_dim WHERE d_year = 2023) + GROUP BY + cr_returning_customer_sk +), InventoryLevels AS ( + SELECT + inv_warehouse_sk, + SUM(inv_quantity_on_hand) AS total_quantity_on_hand + FROM + inventory + GROUP BY + inv_warehouse_sk +), WeeklySales AS ( + SELECT + ws_bill_customer_sk, + SUM(ws_net_paid) AS total_spent, + EXTRACT(week FROM d_date) AS week_number + FROM + web_sales + JOIN + date_dim ON ws_sold_date_sk = d_date_sk + WHERE + d_year = 2023 + GROUP BY + ws_bill_customer_sk, week_number +), PromotionStats AS ( + SELECT + p.p_promo_sk, + COUNT(ws_order_number) AS total_orders, + SUM(ws_ext_discount_amt) AS total_discount + FROM + web_sales ws + JOIN + promotion p ON ws.ws_promo_sk = p.p_promo_sk + GROUP BY + p.p_promo_sk +) +SELECT + c.c_first_name, + c.c_last_name, + cd.cd_gender, + ABS(COALESCE(cr.total_return_quantity, 0)) AS total_return_quantity, + COALESCE(ws.total_spent, 0) AS total_spent, + COALESCE(il.total_quantity_on_hand, 0) AS current_inventory, + p.total_orders, + p.total_discount +FROM + customer c +LEFT JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +LEFT JOIN + CustomerReturns cr ON c.c_customer_sk = cr.cr_returning_customer_sk +LEFT JOIN + WeeklySales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +LEFT JOIN + InventoryLevels il ON il.inv_warehouse_sk = (SELECT w.w_warehouse_sk FROM warehouse w LIMIT 1) +LEFT JOIN + PromotionStats p ON p.p_promo_sk = (SELECT MIN(promo.p_promo_sk) FROM promotion promo) +WHERE + (cd.cd_gender = 'F' AND c.c_birth_month = 5) OR + (cd.cd_gender = 'M' AND c.c_birth_month != 5) +GROUP BY + c.c_first_name, + c.c_last_name, + cd.cd_gender, + cr.total_return_quantity, + ws.total_spent, + il.total_quantity_on_hand, + p.total_orders, + p.total_discount +ORDER BY + total_spent DESC, + total_return_quantity ASC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/33817.sql b/vortex-bench/sqlstorm/tpcds/33817.sql new file mode 100644 index 00000000000..916d5299ad5 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/33817.sql @@ -0,0 +1,36 @@ + +WITH RECURSIVE SalesData AS ( + SELECT ws_sold_date_sk, ws_item_sk, SUM(ws_quantity) AS total_sales, SUM(ws_net_profit) AS total_profit + FROM web_sales + WHERE ws_sold_date_sk IN ( + SELECT d_date_sk + FROM date_dim + WHERE d_year = 2023 + ) + GROUP BY ws_sold_date_sk, ws_item_sk + UNION ALL + SELECT cs_sold_date_sk, cs_item_sk, SUM(cs_quantity) AS total_sales, SUM(cs_net_profit) AS total_profit + FROM catalog_sales + WHERE cs_sold_date_sk IN ( + SELECT d_date_sk + FROM date_dim + WHERE d_year = 2023 + ) + GROUP BY cs_sold_date_sk, cs_item_sk +), +AggregatedSales AS ( + SELECT sd.ws_item_sk, SUM(sd.total_sales) AS yearly_sales, SUM(sd.total_profit) AS yearly_profit + FROM SalesData sd + GROUP BY sd.ws_item_sk +), +TopSellingItems AS ( + SELECT i.i_item_sk, i.i_item_desc, ag.yearly_sales, ag.yearly_profit, + RANK() OVER (ORDER BY ag.yearly_sales DESC) AS sales_rank + FROM AggregatedSales ag + JOIN item i ON ag.ws_item_sk = i.i_item_sk + WHERE ag.yearly_sales > 1000 +) +SELECT tsi.i_item_desc, tsi.yearly_sales, tsi.yearly_profit +FROM TopSellingItems tsi +WHERE tsi.sales_rank <= 10 +ORDER BY tsi.yearly_sales DESC, tsi.yearly_profit DESC; diff --git a/vortex-bench/sqlstorm/tpcds/34000.sql b/vortex-bench/sqlstorm/tpcds/34000.sql new file mode 100644 index 00000000000..45cf7b30e15 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/34000.sql @@ -0,0 +1,55 @@ + +WITH RECURSIVE sales_hierarchy AS ( + SELECT + ws_bill_customer_sk AS customer_sk, + SUM(ws_ext_sales_price) AS total_sales, + COUNT(ws_order_number) AS order_count, + 0 AS level + FROM web_sales + GROUP BY ws_bill_customer_sk + + UNION ALL + + SELECT + sr_customer_sk AS customer_sk, + SUM(sr_return_amt) AS total_sales, + COUNT(sr_ticket_number) AS order_count, + 1 AS level + FROM store_returns + GROUP BY sr_customer_sk +), +total_sales AS ( + SELECT + c.c_customer_sk, + COALESCE(SUM(sh.total_sales), 0) AS total_sales, + COUNT(DISTINCT sh.order_count) AS total_orders + FROM customer c + LEFT JOIN sales_hierarchy sh ON c.c_customer_sk = sh.customer_sk + GROUP BY c.c_customer_sk +), +ranked_customers AS ( + SELECT + c.c_customer_id, + ts.total_sales, + ts.total_orders, + DENSE_RANK() OVER (ORDER BY ts.total_sales DESC) AS sales_rank + FROM total_sales ts + JOIN customer c ON ts.c_customer_sk = c.c_customer_sk +) +SELECT + rc.c_customer_id, + rc.total_sales, + rc.total_orders, + CASE + WHEN rc.sales_rank <= 10 THEN 'Top 10%' + WHEN rc.sales_rank <= 30 THEN 'Top 30%' + ELSE 'Others' + END AS customer_segment +FROM ranked_customers rc +WHERE rc.total_orders > 5 +AND rc.total_sales > ( + SELECT AVG(total_sales) + FROM total_sales +) OR rc.total_sales IS NULL +ORDER BY rc.total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/34489.sql b/vortex-bench/sqlstorm/tpcds/34489.sql new file mode 100644 index 00000000000..bb617ff7366 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/34489.sql @@ -0,0 +1,62 @@ + +WITH RECURSIVE sales_cte AS ( + SELECT + ws_sold_date_sk, + ws_item_sk, + SUM(ws_quantity) AS total_quantity, + SUM(ws_net_paid) AS total_net_paid, + ROW_NUMBER() OVER (PARTITION BY ws_item_sk ORDER BY ws_sold_date_sk DESC) AS rn + FROM web_sales + GROUP BY ws_sold_date_sk, ws_item_sk +), +total_sales AS ( + SELECT + ws_item_sk, + SUM(total_quantity) AS quantity_sold, + SUM(total_net_paid) AS net_sales + FROM sales_cte + WHERE rn <= 10 + GROUP BY ws_item_sk +), +high_demand_items AS ( + SELECT + i.i_item_id, + i.i_product_name, + t.quantity_sold, + t.net_sales + FROM total_sales t + JOIN item i ON t.ws_item_sk = i.i_item_sk + WHERE t.quantity_sold > ( + SELECT AVG(quantity_sold) FROM total_sales + ) +), +customer_data AS ( + SELECT + c.c_customer_id, + cd.cd_gender, + hd.hd_income_band_sk, + COUNT(DISTINCT s.ss_ticket_number) AS purchases_count + FROM customer c + LEFT JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN household_demographics hd ON cd.cd_demo_sk = hd.hd_demo_sk + LEFT JOIN store_sales s ON c.c_customer_sk = s.ss_customer_sk + GROUP BY c.c_customer_id, cd.cd_gender, hd.hd_income_band_sk +), +top_customers AS ( + SELECT + c.c_customer_id AS customer_id, + SUM(cd.purchases_count) AS total_purchases + FROM customer_data cd + JOIN customer c ON cd.c_customer_id = c.c_customer_id + GROUP BY c.c_customer_id + ORDER BY total_purchases DESC + LIMIT 5 +) +SELECT hv.i_product_name, + hv.quantity_sold, + hv.net_sales, + tc.customer_id, + tc.total_purchases +FROM high_demand_items hv +JOIN top_customers tc ON hv.quantity_sold > 100 +ORDER BY hv.net_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/34677.sql b/vortex-bench/sqlstorm/tpcds/34677.sql new file mode 100644 index 00000000000..a542345af20 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/34677.sql @@ -0,0 +1,64 @@ + +WITH RECURSIVE sales_data AS ( + SELECT + ws_order_number, + ws_item_sk, + ws_sold_date_sk, + ws_quantity, + ws_sales_price, + ROW_NUMBER() OVER (PARTITION BY ws_item_sk ORDER BY ws_sold_date_sk DESC) AS rn + FROM + web_sales + WHERE + ws_sold_date_sk >= 20210101 +), +inventory_data AS ( + SELECT + inv_date_sk, + inv_item_sk, + SUM(inv_quantity_on_hand) AS total_quantity + FROM + inventory + WHERE + inv_date_sk BETWEEN 20210101 AND 20220331 + GROUP BY + inv_date_sk, inv_item_sk +), +customer_segments AS ( + SELECT + cd_demo_sk, + COUNT(DISTINCT c_customer_sk) AS customer_count, + MAX(cd_purchase_estimate) AS max_purchase_estimate + FROM + customer_demographics + JOIN + customer ON c_current_cdemo_sk = cd_demo_sk + GROUP BY + cd_demo_sk +) +SELECT + c.c_first_name, + c.c_last_name, + sa.ws_order_number, + sa.ws_item_sk, + sa.ws_quantity, + sa.ws_sales_price, + case + when (sa.ws_quantity > id.total_quantity) then 'Exceeds Inventory' + else 'Within Inventory' + end as inventory_status, + cs.customer_count AS segment_customer_count, + cs.max_purchase_estimate +FROM + customer c +JOIN + sales_data sa ON c.c_customer_sk = sa.ws_order_number +LEFT JOIN + inventory_data id ON sa.ws_item_sk = id.inv_item_sk +LEFT JOIN + customer_segments cs ON c.c_current_cdemo_sk = cs.cd_demo_sk +WHERE + sa.rn = 1 + AND c.c_birth_year IS NOT NULL +ORDER BY + c.c_last_name, c.c_first_name; diff --git a/vortex-bench/sqlstorm/tpcds/4013.sql b/vortex-bench/sqlstorm/tpcds/4013.sql new file mode 100644 index 00000000000..241ced73fab --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/4013.sql @@ -0,0 +1,43 @@ +WITH sales_data AS ( + SELECT + ws.ws_item_sk, + SUM(ws.ws_quantity) AS total_quantity, + SUM(ws.ws_net_profit) AS total_net_profit, + DENSE_RANK() OVER (PARTITION BY ws.ws_item_sk ORDER BY SUM(ws.ws_net_profit) DESC) AS profit_rank + FROM + web_sales ws + JOIN + item i ON ws.ws_item_sk = i.i_item_sk + WHERE + ws.ws_sold_date_sk BETWEEN 2400 AND 2470 + GROUP BY + ws.ws_item_sk +), +top_items AS ( + SELECT + sd.ws_item_sk, + sd.total_quantity, + sd.total_net_profit, + i.i_item_desc, + ROW_NUMBER() OVER (ORDER BY sd.total_net_profit DESC) AS row_num + FROM + sales_data sd + JOIN + item i ON sd.ws_item_sk = i.i_item_sk + WHERE + sd.profit_rank <= 10 + ORDER BY + sd.total_net_profit DESC +) +SELECT + ti.row_num, + ti.i_item_desc, + ti.total_quantity, + COALESCE(pr.p_promo_name, 'No Promotion') AS promotion_name, + ti.total_net_profit +FROM + top_items ti +LEFT JOIN + promotion pr ON ti.ws_item_sk = pr.p_item_sk AND pr.p_start_date_sk <= 2470 AND pr.p_end_date_sk >= 2400 +ORDER BY + ti.row_num; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/4646.sql b/vortex-bench/sqlstorm/tpcds/4646.sql new file mode 100644 index 00000000000..77f1d1cf215 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/4646.sql @@ -0,0 +1,68 @@ + +WITH SalesData AS ( + SELECT + ws_sold_date_sk, + ws_item_sk, + SUM(ws_quantity) AS total_quantity, + SUM(ws_net_paid_inc_tax) AS total_sales, + COUNT(DISTINCT ws_order_number) AS total_orders + FROM + web_sales + WHERE + ws_sold_date_sk BETWEEN (SELECT MAX(d_date_sk) FROM date_dim WHERE d_year = 2023) - 30 + AND (SELECT MAX(d_date_sk) FROM date_dim WHERE d_year = 2023) + GROUP BY + ws_sold_date_sk, ws_item_sk +), +ItemDetails AS ( + SELECT + i_item_sk, + i_item_desc, + i_product_name, + i_current_price, + i_brand, + i_class, + i_category + FROM + item +), +CustomerStats AS ( + SELECT + cd_demo_sk, + COUNT(c_customer_sk) AS customer_count, + AVG(cd_purchase_estimate) AS average_purchase_estimate + FROM + customer_demographics cd + JOIN + customer c ON cd.cd_demo_sk = c.c_current_cdemo_sk + GROUP BY + cd_demo_sk +), +RankedSales AS ( + SELECT + sd.ws_item_sk, + sd.total_quantity, + sd.total_sales, + ROW_NUMBER() OVER (ORDER BY sd.total_sales DESC) AS sales_rank + FROM + SalesData sd +) +SELECT + rs.sales_rank, + id.i_item_desc, + id.i_product_name, + id.i_current_price, + rs.total_quantity, + rs.total_sales, + cs.customer_count, + cs.average_purchase_estimate +FROM + RankedSales rs +JOIN + ItemDetails id ON rs.ws_item_sk = id.i_item_sk +LEFT JOIN + CustomerStats cs ON id.i_item_sk = cs.cd_demo_sk +WHERE + rs.sales_rank <= 10 +ORDER BY + rs.sales_rank; diff --git a/vortex-bench/sqlstorm/tpcds/4975.sql b/vortex-bench/sqlstorm/tpcds/4975.sql new file mode 100644 index 00000000000..1018c5b10a5 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/4975.sql @@ -0,0 +1,59 @@ + +WITH CustomerPurchaseData AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_ext_sales_price) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS order_count, + COUNT(DISTINCT ws.ws_web_page_sk) AS unique_pages_visited, + ROW_NUMBER() OVER (PARTITION BY c.c_customer_sk ORDER BY SUM(ws.ws_ext_sales_price) DESC) AS rank_sales + FROM + customer c + LEFT JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + JOIN + date_dim dd ON ws.ws_sold_date_sk = dd.d_date_sk + WHERE + dd.d_year = 2022 + GROUP BY + c.c_customer_sk, c.c_first_name, c.c_last_name +), +TopCustomers AS ( + SELECT + cpd.c_customer_sk, + cpd.c_first_name, + cpd.c_last_name, + cpd.total_sales, + cpd.order_count, + cpd.unique_pages_visited + FROM + CustomerPurchaseData cpd + WHERE + cpd.rank_sales <= 10 +), +ReturnData AS ( + SELECT + sr.sr_customer_sk, + SUM(sr.sr_return_amt_inc_tax) AS total_return_amt, + COUNT(sr.sr_ticket_number) AS returns_count + FROM + store_returns sr + GROUP BY + sr.sr_customer_sk +) +SELECT + tc.c_first_name, + tc.c_last_name, + tc.total_sales AS customer_total_sales, + COALESCE(rd.total_return_amt, 0) AS total_returns, + tc.order_count, + rd.returns_count, + (tc.total_sales - COALESCE(rd.total_return_amt, 0)) AS net_revenue, + ROUND(COALESCE(rd.total_return_amt, 0) * 100 / NULLIF(tc.total_sales, 0), 2) AS return_percentage +FROM + TopCustomers tc +LEFT JOIN + ReturnData rd ON tc.c_customer_sk = rd.sr_customer_sk +ORDER BY + net_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpcds/5281.sql b/vortex-bench/sqlstorm/tpcds/5281.sql new file mode 100644 index 00000000000..1d1cf2d1542 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/5281.sql @@ -0,0 +1,52 @@ + +WITH sales_summary AS ( + SELECT + d.d_year, + d.d_month_seq, + d.d_quarter_seq, + SUM(ws.ws_net_paid) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + COUNT(DISTINCT ws.ws_bill_customer_sk) AS unique_customers + FROM + web_sales AS ws + JOIN + date_dim AS d ON ws.ws_sold_date_sk = d.d_date_sk + JOIN + customer AS c ON ws.ws_bill_customer_sk = c.c_customer_sk + JOIN + customer_demographics AS cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + WHERE + d.d_year = 2023 + AND cd.cd_gender = 'F' + AND cd.cd_marital_status = 'M' + GROUP BY + d.d_year, d.d_month_seq, d.d_quarter_seq +), avg_sales AS ( + SELECT + d_year, + d_month_seq, + d_quarter_seq, + total_sales, + total_orders, + unique_customers, + total_sales / NULLIF(total_orders, 0) AS avg_order_value, + unique_customers / NULLIF(total_orders, 0) AS avg_customers_per_order + FROM + sales_summary +) + +SELECT + d_year, + d_month_seq, + d_quarter_seq, + total_sales, + total_orders, + unique_customers, + avg_order_value, + avg_customers_per_order +FROM + avg_sales +ORDER BY + d_year, + d_quarter_seq, + d_month_seq; diff --git a/vortex-bench/sqlstorm/tpcds/5391.sql b/vortex-bench/sqlstorm/tpcds/5391.sql new file mode 100644 index 00000000000..dd2f06ece1f --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/5391.sql @@ -0,0 +1,54 @@ + +WITH sales_summary AS ( + SELECT + ws.ws_sold_date_sk, + ws.ws_item_sk, + SUM(ws.ws_quantity) AS total_quantity, + SUM(ws.ws_net_profit) AS total_profit, + SUM(ws.ws_ext_sales_price) AS total_sales + FROM + web_sales ws + JOIN + date_dim dd ON ws.ws_sold_date_sk = dd.d_date_sk + WHERE + dd.d_year = 2023 AND dd.d_month_seq BETWEEN 1 AND 6 + GROUP BY + ws.ws_sold_date_sk, ws.ws_item_sk +), +customer_summary AS ( + SELECT + cd.cd_demo_sk, + COUNT(DISTINCT c.c_customer_sk) AS total_customers, + SUM(cd.cd_purchase_estimate) AS total_purchase_estimate + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + WHERE + cd.cd_gender = 'M' AND cd.cd_marital_status = 'M' + GROUP BY + cd.cd_demo_sk +) +SELECT + ss.ws_item_sk, + ss.total_quantity, + ss.total_profit, + ss.total_sales, + cs.total_customers, + cs.total_purchase_estimate +FROM + sales_summary ss +JOIN + customer_summary cs ON cs.cd_demo_sk IN ( + SELECT + DISTINCT i.i_item_sk + FROM + item i + JOIN + store_sales s ON i.i_item_sk = s.ss_item_sk + WHERE + s.ss_sold_date_sk BETWEEN 20230101 AND 20230630 + ) +ORDER BY + ss.total_profit DESC, ss.total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/5543.sql b/vortex-bench/sqlstorm/tpcds/5543.sql new file mode 100644 index 00000000000..ac726757646 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/5543.sql @@ -0,0 +1,60 @@ + +WITH sales_summary AS ( + SELECT + c.c_customer_id, + SUM(ws.ws_net_profit) AS total_net_profit, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + AVG(ws.ws_quantity) AS avg_quantity_per_order, + MAX(ws.ws_sales_price) AS max_sales_price, + MIN(ws.ws_sales_price) AS min_sales_price + FROM + customer c + JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + WHERE + d.d_year = 2023 + AND c.c_current_addr_sk IS NOT NULL + GROUP BY + c.c_customer_id +), +customer_demographics AS ( + SELECT + cd.cd_demo_sk, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + SUM(ss.total_net_profit) AS total_profit_by_demo + FROM + customer_demographics cd + JOIN + customer c ON cd.cd_demo_sk = c.c_current_cdemo_sk + JOIN + sales_summary ss ON c.c_customer_id = ss.c_customer_id + GROUP BY + cd.cd_demo_sk, cd.cd_gender, cd.cd_marital_status, cd.cd_education_status +), +top_customers AS ( + SELECT + cd.cd_demo_sk, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.total_profit_by_demo, + RANK() OVER (ORDER BY cd.total_profit_by_demo DESC) AS rank + FROM + customer_demographics cd +) +SELECT + tc.rank, + tc.cd_gender, + tc.cd_marital_status, + tc.cd_education_status, + tc.total_profit_by_demo +FROM + top_customers tc +WHERE + tc.rank <= 10 +ORDER BY + tc.rank; diff --git a/vortex-bench/sqlstorm/tpcds/5605.sql b/vortex-bench/sqlstorm/tpcds/5605.sql new file mode 100644 index 00000000000..c3f155eb820 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/5605.sql @@ -0,0 +1,36 @@ + +WITH CustomerInfo AS ( + SELECT + c.c_customer_id, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + SUM(ss.ss_sales_price) AS total_sales + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + JOIN + store_sales ss ON c.c_customer_sk = ss.ss_customer_sk + GROUP BY + c.c_customer_id, cd.cd_gender, cd.cd_marital_status, cd.cd_education_status +), +TopCustomers AS ( + SELECT + c.*, + RANK() OVER (PARTITION BY c.cd_gender ORDER BY c.total_sales DESC) AS sales_rank + FROM + CustomerInfo c +) +SELECT + tc.c_customer_id, + tc.cd_gender, + tc.cd_marital_status, + tc.cd_education_status, + tc.total_sales +FROM + TopCustomers tc +WHERE + tc.sales_rank <= 10 +ORDER BY + tc.cd_gender, tc.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/5933.sql b/vortex-bench/sqlstorm/tpcds/5933.sql new file mode 100644 index 00000000000..ea2c3e61cad --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/5933.sql @@ -0,0 +1,51 @@ + +WITH RankedSales AS ( + SELECT + ws_item_sk, + SUM(ws_quantity) AS total_quantity, + SUM(ws_ext_sales_price) AS total_sales, + ROW_NUMBER() OVER (PARTITION BY ws_item_sk ORDER BY SUM(ws_ext_sales_price) DESC) AS sales_rank + FROM + web_sales + GROUP BY + ws_item_sk +), +TopSellingItems AS ( + SELECT + item.i_item_id, + item.i_item_desc, + RankedSales.total_quantity, + RankedSales.total_sales + FROM + RankedSales + JOIN + item ON RankedSales.ws_item_sk = item.i_item_sk + WHERE + RankedSales.sales_rank <= 10 +), +CustomerDemographics AS ( + SELECT + cd_gender, + cd_marital_status, + AVG(cd_purchase_estimate) AS avg_purchase_estimate, + COUNT(DISTINCT c_customer_sk) AS customer_count + FROM + customer + JOIN + customer_demographics ON c_current_cdemo_sk = cd_demo_sk + GROUP BY + cd_gender, cd_marital_status +) +SELECT + tbi.i_item_id, + tbi.i_item_desc, + tbi.total_quantity, + tbi.total_sales, + cd.avg_purchase_estimate, + cd.customer_count +FROM + TopSellingItems tbi +JOIN + CustomerDemographics cd ON cd.cd_marital_status = 'M' +ORDER BY + tbi.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/6120.sql b/vortex-bench/sqlstorm/tpcds/6120.sql new file mode 100644 index 00000000000..7997e96577f --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/6120.sql @@ -0,0 +1,49 @@ + +WITH SalesSummary AS ( + SELECT + ws.ws_bill_customer_sk, + SUM(ws.ws_ext_sales_price) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS order_count, + AVG(ws.ws_net_profit) AS avg_net_profit, + MAX(ws.ws_sales_price) AS max_sales_price, + MIN(ws.ws_sales_price) AS min_sales_price, + COUNT(DISTINCT ws.ws_ship_mode_sk) AS distinct_shipping_methods, + cd.cd_gender, + cd.cd_marital_status + FROM + web_sales ws + JOIN + customer c ON ws.ws_bill_customer_sk = c.c_customer_sk + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + WHERE + EXISTS (SELECT 1 FROM store s WHERE s.s_store_sk = ws.ws_warehouse_sk AND s.s_state = 'CA') + AND + ws.ws_sold_date_sk BETWEEN (SELECT d_date_sk FROM date_dim WHERE d_date = '2023-01-01') + AND (SELECT d_date_sk FROM date_dim WHERE d_date = '2023-12-31') + GROUP BY + ws.ws_bill_customer_sk, cd.cd_gender, cd.cd_marital_status +), +RankedSales AS ( + SELECT + *, + RANK() OVER (PARTITION BY cd_gender ORDER BY total_sales DESC) AS sales_rank + FROM + SalesSummary +) +SELECT + s.ws_bill_customer_sk, + s.total_sales, + s.order_count, + s.avg_net_profit, + s.max_sales_price, + s.min_sales_price, + s.distinct_shipping_methods, + s.cd_gender, + s.cd_marital_status +FROM + RankedSales s +WHERE + s.sales_rank <= 10 +ORDER BY + s.cd_gender, s.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/6481.sql b/vortex-bench/sqlstorm/tpcds/6481.sql new file mode 100644 index 00000000000..a345c72b774 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/6481.sql @@ -0,0 +1,56 @@ + +WITH RankedCustomers AS ( + SELECT + c.c_customer_id, + cd.cd_gender, + cd.cd_marital_status, + COUNT(ss.ss_ticket_number) AS total_sales, + SUM(ss.ss_net_paid) AS total_spent, + RANK() OVER (PARTITION BY cd.cd_gender ORDER BY SUM(ss.ss_net_paid) DESC) AS rank_spending + FROM + customer AS c + JOIN + customer_demographics AS cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + LEFT JOIN + store_sales AS ss ON ss.ss_customer_sk = c.c_customer_sk + GROUP BY + c.c_customer_id, cd.cd_gender, cd.cd_marital_status +), +TopCustomers AS ( + SELECT + rc.c_customer_id, + rc.cd_gender, + rc.cd_marital_status, + rc.total_sales, + rc.total_spent + FROM + RankedCustomers rc + WHERE + rc.rank_spending <= 10 +), +SalesSummary AS ( + SELECT + d.d_year, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + SUM(ws.ws_net_profit) AS total_profit, + SUM(ws.ws_ext_sales_price) AS total_sales_value + FROM + web_sales AS ws + JOIN + date_dim AS d ON ws.ws_sold_date_sk = d.d_date_sk + GROUP BY + d.d_year +) +SELECT + tc.c_customer_id, + tc.cd_gender, + tc.cd_marital_status, + ss.total_orders, + ss.total_profit, + ss.total_sales_value +FROM + TopCustomers tc +JOIN + SalesSummary ss ON ss.total_orders > 100 +ORDER BY + ss.total_sales_value DESC; diff --git a/vortex-bench/sqlstorm/tpcds/6948.sql b/vortex-bench/sqlstorm/tpcds/6948.sql new file mode 100644 index 00000000000..33f18f04648 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/6948.sql @@ -0,0 +1,44 @@ + +WITH CustomerSales AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_ext_sales_price) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS order_count + FROM + customer c + JOIN + web_sales ws ON c.c_customer_sk = ws.ws_ship_customer_sk + JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + WHERE + d.d_year = 2023 + GROUP BY + c.c_customer_sk, c.c_first_name, c.c_last_name +), +TopCustomers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cs.total_sales, + cs.order_count, + RANK() OVER (ORDER BY cs.total_sales DESC) AS sales_rank + FROM + CustomerSales cs + JOIN + customer c ON cs.c_customer_sk = c.c_customer_sk +) +SELECT + tc.c_customer_sk, + tc.c_first_name, + tc.c_last_name, + tc.total_sales, + tc.order_count +FROM + TopCustomers tc +WHERE + tc.sales_rank <= 10 +ORDER BY + tc.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/7069.sql b/vortex-bench/sqlstorm/tpcds/7069.sql new file mode 100644 index 00000000000..c00605cc649 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/7069.sql @@ -0,0 +1,53 @@ + +WITH SalesSummary AS ( + SELECT + ws_bill_cdemo_sk AS CustomerDemoSK, + SUM(ws_ext_sales_price) AS TotalSales, + COUNT(DISTINCT ws_order_number) AS TotalOrders, + AVG(ws_sales_price) AS AvgPrice, + MAX(ws_sales_price) AS MaxPrice, + MIN(ws_sales_price) AS MinPrice + FROM + web_sales + WHERE + ws_sold_date_sk BETWEEN (SELECT d_date_sk FROM date_dim WHERE d_date = '2023-10-01') AND + (SELECT d_date_sk FROM date_dim WHERE d_date = '2023-10-31') + GROUP BY + ws_bill_cdemo_sk +), +CustomerDemographics AS ( + SELECT + cd_demo_sk, + cd_gender, + cd_marital_status, + cd_education_status + FROM + customer_demographics +), +HighValueCustomers AS ( + SELECT + s.CustomerDemoSK, + c.cd_gender, + c.cd_marital_status, + c.cd_education_status + FROM + SalesSummary s + JOIN + CustomerDemographics c ON s.CustomerDemoSK = c.cd_demo_sk + WHERE + s.TotalSales > (SELECT AVG(TotalSales) FROM SalesSummary) +) +SELECT + hvc.cd_gender, + hvc.cd_marital_status, + hvc.cd_education_status, + COUNT(*) AS HighValueCustomerCount, + SUM(ss.TotalSales) AS TotalHighValueSales +FROM + HighValueCustomers hvc +JOIN + SalesSummary ss ON hvc.CustomerDemoSK = ss.CustomerDemoSK +GROUP BY + hvc.cd_gender, hvc.cd_marital_status, hvc.cd_education_status +ORDER BY + TotalHighValueSales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/715.sql b/vortex-bench/sqlstorm/tpcds/715.sql new file mode 100644 index 00000000000..68252f89a85 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/715.sql @@ -0,0 +1,58 @@ + +WITH CustomerReturns AS ( + SELECT + sr_customer_sk, + SUM(sr_return_amt) AS total_return_amt, + COUNT(sr_ticket_number) AS return_count + FROM + store_returns + GROUP BY + sr_customer_sk +), +ItemSales AS ( + SELECT + ws_ship_customer_sk, + SUM(ws_net_profit) AS total_net_profit, + COUNT(ws_order_number) AS order_count + FROM + web_sales + GROUP BY + ws_ship_customer_sk +), +SalesSummary AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + COALESCE(cr.total_return_amt, 0) AS total_return_amt, + COALESCE(cr.return_count, 0) AS return_count, + COALESCE(isales.total_net_profit, 0) AS total_net_profit, + COALESCE(isales.order_count, 0) AS order_count + FROM + customer c + LEFT JOIN + CustomerReturns cr ON c.c_customer_sk = cr.sr_customer_sk + LEFT JOIN + ItemSales isales ON c.c_customer_sk = isales.ws_ship_customer_sk +) +SELECT + s.c_customer_sk, + s.c_first_name, + s.c_last_name, + s.total_return_amt, + s.return_count, + s.total_net_profit, + s.order_count, + CASE + WHEN s.total_net_profit > s.total_return_amt THEN 'Profitable' + WHEN s.total_net_profit < s.total_return_amt THEN 'Unprofitable' + ELSE 'Break Even' + END AS profitability_status +FROM + SalesSummary s +WHERE + (s.total_return_amt > 1000 OR s.total_net_profit > 5000) +ORDER BY + profitability_status DESC, + total_net_profit DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpcds/7155.sql b/vortex-bench/sqlstorm/tpcds/7155.sql new file mode 100644 index 00000000000..9e7c2483bf8 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/7155.sql @@ -0,0 +1,44 @@ + +WITH SalesData AS ( + SELECT + ws.ws_sold_date_sk, + ws.ws_item_sk, + SUM(ws.ws_quantity) AS total_quantity, + SUM(ws.ws_net_paid) AS total_sales, + AVG(ws.ws_sales_price) AS avg_price + FROM + web_sales ws + JOIN date_dim dd ON ws.ws_sold_date_sk = dd.d_date_sk + WHERE + dd.d_year = 2022 + GROUP BY + ws.ws_sold_date_sk, + ws.ws_item_sk +), +TopItems AS ( + SELECT + sd.ws_item_sk, + sd.total_quantity, + sd.total_sales, + ROW_NUMBER() OVER (ORDER BY sd.total_sales DESC) AS sales_rank + FROM + SalesData sd +) +SELECT + ti.ws_item_sk, + ti.total_quantity, + ti.total_sales, + i.i_item_desc, + i.i_current_price, + cd.cd_gender, + cd.cd_marital_status +FROM + TopItems ti +JOIN item i ON ti.ws_item_sk = i.i_item_sk +JOIN store_sales ss ON ti.ws_item_sk = ss.ss_item_sk +JOIN customer c ON ss.ss_customer_sk = c.c_customer_sk +JOIN customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +WHERE + ti.sales_rank <= 10 +ORDER BY + ti.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/7274.sql b/vortex-bench/sqlstorm/tpcds/7274.sql new file mode 100644 index 00000000000..e2dd91dfba2 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/7274.sql @@ -0,0 +1,42 @@ + +WITH SalesData AS ( + SELECT + ws_item_sk, + SUM(ws_ext_sales_price) AS total_sales, + SUM(ws_net_profit) AS total_profit, + COUNT(DISTINCT ws_order_number) AS order_count + FROM + web_sales + WHERE + ws_sold_date_sk BETWEEN (SELECT MIN(d_date_sk) FROM date_dim WHERE d_year = 2023) AND + (SELECT MAX(d_date_sk) FROM date_dim WHERE d_year = 2023) + GROUP BY + ws_item_sk +), +TopItems AS ( + SELECT + i.i_item_sk, + i.i_item_desc, + dd.d_year, + RANK() OVER (PARTITION BY dd.d_year ORDER BY sd.total_sales DESC) AS sales_rank + FROM + Item i + JOIN + SalesData sd ON i.i_item_sk = sd.ws_item_sk + JOIN + date_dim dd ON dd.d_date_sk = sd.ws_item_sk -- Fixed the join condition +) +SELECT + ti.i_item_desc, + ti.sales_rank, + sd.total_sales, + sd.total_profit, + sd.order_count +FROM + TopItems ti +JOIN + SalesData sd ON ti.i_item_sk = sd.ws_item_sk +WHERE + ti.sales_rank <= 10 +ORDER BY + sd.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/7416.sql b/vortex-bench/sqlstorm/tpcds/7416.sql new file mode 100644 index 00000000000..0114cb78e2e --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/7416.sql @@ -0,0 +1,63 @@ +WITH sales_summary AS ( + SELECT + d.d_year, + SUM(ws.ws_net_paid) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + SUM(ws.ws_quantity) AS total_quantity, + COUNT(DISTINCT ws.ws_ship_customer_sk) AS unique_customers + FROM + web_sales ws + JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + GROUP BY + d.d_year +), +customer_summary AS ( + SELECT + cd.cd_gender, + COUNT(DISTINCT c.c_customer_sk) AS total_customers, + AVG(cd.cd_purchase_estimate) AS avg_purchase_estimate, + SUM(cd.cd_dep_count) AS total_dependents + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk + GROUP BY + cd.cd_gender +), +top_products AS ( + SELECT + i.i_item_id, + i.i_item_desc, + SUM(ws.ws_quantity) AS total_quantity_sold + FROM + item i + JOIN + web_sales ws ON i.i_item_sk = ws.ws_item_sk + GROUP BY + i.i_item_id, i.i_item_desc + ORDER BY + total_quantity_sold DESC + LIMIT 10 +) +SELECT + ss.d_year, + ss.total_sales, + ss.total_orders, + ss.total_quantity, + ss.unique_customers, + cs.cd_gender, + cs.total_customers, + cs.avg_purchase_estimate, + cs.total_dependents, + tp.i_item_id, + tp.i_item_desc, + tp.total_quantity_sold +FROM + sales_summary ss +JOIN + customer_summary cs ON TRUE +JOIN + top_products tp ON TRUE +ORDER BY + ss.d_year, cs.cd_gender, tp.total_quantity_sold DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/7572.sql b/vortex-bench/sqlstorm/tpcds/7572.sql new file mode 100644 index 00000000000..34bb075f1fc --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/7572.sql @@ -0,0 +1,63 @@ + +WITH CustomerReturns AS ( + SELECT + wr_returning_customer_sk AS customer_sk, + SUM(wr_return_quantity) AS total_returned_items, + SUM(wr_return_amt) AS total_return_amount, + COUNT(DISTINCT wr_order_number) AS return_count + FROM + web_returns + GROUP BY + wr_returning_customer_sk +), +CustomerDemographics AS ( + SELECT + c.c_customer_sk, + cd.cd_gender, + cd.cd_marital_status, + cd.cd_education_status, + cd.cd_purchase_estimate + FROM + customer c + JOIN + customer_demographics cd ON c.c_current_cdemo_sk = cd.cd_demo_sk +), +ReturnStatistics AS ( + SELECT + cd.c_customer_sk, + cd.cd_gender, + cd.cd_marital_status, + COUNT(CASE WHEN cr.return_count > 1 THEN 1 END) AS repeat_returns, + AVG(cr.total_return_amount) AS avg_return_amount, + SUM(cr.total_returned_items) AS total_returned + FROM + CustomerDemographics cd + LEFT JOIN + CustomerReturns cr ON cd.c_customer_sk = cr.customer_sk + GROUP BY + cd.c_customer_sk, cd.cd_gender, cd.cd_marital_status +), +TopReturners AS ( + SELECT + r.cd_gender, + r.cd_marital_status, + COUNT(*) AS customer_count, + SUM(r.total_returned) AS total_items_returned + FROM + ReturnStatistics r + GROUP BY + r.cd_gender, r.cd_marital_status + ORDER BY + total_items_returned DESC +) +SELECT + t.cd_gender, + t.cd_marital_status, + t.customer_count, + t.total_items_returned +FROM + TopReturners t +WHERE + t.customer_count > 10 +ORDER BY + t.total_items_returned DESC; diff --git a/vortex-bench/sqlstorm/tpcds/8082.sql b/vortex-bench/sqlstorm/tpcds/8082.sql new file mode 100644 index 00000000000..86e185f1cb9 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/8082.sql @@ -0,0 +1,47 @@ +WITH SalesData AS ( + SELECT + ss_store_sk, + SUM(ss_quantity) AS total_quantity, + SUM(ss_sales_price) AS total_sales, + AVG(ss_sales_price) AS avg_sales_price, + COUNT(DISTINCT ss_ticket_number) AS total_transactions + FROM + store_sales + WHERE + ss_sold_date_sk BETWEEN 2451545 AND 2451549 + GROUP BY + ss_store_sk +), +CustomerData AS ( + SELECT + c.c_customer_sk, + COUNT(DISTINCT sr_ticket_number) AS total_returns, + SUM(sr_return_amt) AS total_return_amount, + AVG(sr_return_quantity) AS avg_return_quantity + FROM + customer c + LEFT JOIN + store_returns sr ON c.c_customer_sk = sr.sr_customer_sk + GROUP BY + c.c_customer_sk +) +SELECT + w.w_warehouse_name, + s.s_store_name, + sd.total_quantity, + sd.total_sales, + sd.avg_sales_price, + cd.total_returns, + cd.total_return_amount, + cd.avg_return_quantity +FROM + SalesData sd +JOIN + store s ON sd.ss_store_sk = s.s_store_sk +JOIN + warehouse w ON s.s_store_sk = w.w_warehouse_sk +LEFT JOIN + CustomerData cd ON sd.ss_store_sk = cd.c_customer_sk +ORDER BY + total_sales DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/8436.sql b/vortex-bench/sqlstorm/tpcds/8436.sql new file mode 100644 index 00000000000..85772b000e0 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/8436.sql @@ -0,0 +1,47 @@ + +WITH SalesSummary AS ( + SELECT + c.c_customer_id AS customer_id, + SUM(ws.ws_ext_sales_price) AS total_sales, + AVG(ws.ws_net_profit) AS avg_profit, + COUNT(ws.ws_order_number) AS order_count, + d.d_year, + d.d_month_seq + FROM + web_sales ws + JOIN + customer c ON ws.ws_bill_customer_sk = c.c_customer_sk + JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk + WHERE + d.d_year = 2022 + GROUP BY + c.c_customer_id, d.d_year, d.d_month_seq +), +RankedSales AS ( + SELECT + customer_id, + total_sales, + avg_profit, + order_count, + ROW_NUMBER() OVER (PARTITION BY d_year, d_month_seq ORDER BY total_sales DESC) AS sales_rank, + d_year, + d_month_seq + FROM + SalesSummary +) +SELECT + r.customer_id, + r.total_sales, + r.avg_profit, + r.order_count, + d.d_month_seq, + d.d_year +FROM + RankedSales r +JOIN + date_dim d ON r.d_year = d.d_year AND r.d_month_seq = d.d_month_seq +WHERE + r.sales_rank <= 10 +ORDER BY + d.d_year, d.d_month_seq, r.total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/858.sql b/vortex-bench/sqlstorm/tpcds/858.sql new file mode 100644 index 00000000000..c246ddfc861 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/858.sql @@ -0,0 +1,53 @@ + +WITH CustomerSales AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_net_profit) AS total_sales, + COUNT(DISTINCT ws.ws_order_number) AS order_count + FROM customer c + LEFT JOIN web_sales ws ON c.c_customer_sk = ws.ws_ship_customer_sk + WHERE ws.ws_sold_date_sk BETWEEN 2450000 AND 2450600 + GROUP BY c.c_customer_sk, c.c_first_name, c.c_last_name +), + +TopCustomers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cs.total_sales, + cs.order_count, + RANK() OVER (ORDER BY cs.total_sales DESC) AS sales_rank + FROM CustomerSales cs + JOIN customer c ON cs.c_customer_sk = c.c_customer_sk +) + +SELECT + tc.c_customer_sk, + tc.c_first_name, + tc.c_last_name, + COALESCE(tc.total_sales, 0) AS total_sales, + COALESCE(tc.order_count, 0) AS order_count, + CASE + WHEN tc.sales_rank <= 10 THEN 'Top 10' + ELSE 'Others' + END AS customer_category +FROM TopCustomers tc +WHERE tc.order_count > 0 + +UNION ALL + +SELECT + ca.ca_address_sk, + 'N/A' AS c_first_name, + 'N/A' AS c_last_name, + SUM(sr.sr_return_amt) AS total_sales, + COUNT(sr.sr_ticket_number) AS order_count, + 'Returns' AS customer_category +FROM store_returns sr +LEFT JOIN customer_address ca ON sr.sr_addr_sk = ca.ca_address_sk +WHERE sr.sr_returned_date_sk BETWEEN 2450000 AND 2450600 +GROUP BY ca.ca_address_sk +ORDER BY total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpcds/8788.sql b/vortex-bench/sqlstorm/tpcds/8788.sql new file mode 100644 index 00000000000..ef4f74d3d83 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/8788.sql @@ -0,0 +1,53 @@ +WITH CustomerSales AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_ext_sales_price) AS total_sales, + COUNT(ws.ws_order_number) AS order_count + FROM + customer c + JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk + WHERE + c.c_birth_year >= 1980 + GROUP BY + c.c_customer_sk, c.c_first_name, c.c_last_name +), +TopCustomers AS ( + SELECT + c.c_customer_sk, + c.c_first_name, + c.c_last_name, + cs.total_sales, + cs.order_count, + RANK() OVER (ORDER BY cs.total_sales DESC) AS sales_rank + FROM + CustomerSales cs + JOIN + customer c ON cs.c_customer_sk = c.c_customer_sk +) +SELECT + tc.c_customer_sk, + tc.c_first_name, + tc.c_last_name, + tc.total_sales, + tc.order_count, + d.d_date AS last_order_date, + i.i_item_desc, + s.s_store_name +FROM + TopCustomers tc +JOIN + store_sales ss ON ss.ss_customer_sk = tc.c_customer_sk +JOIN + date_dim d ON d.d_date_sk = ss.ss_sold_date_sk +JOIN + item i ON i.i_item_sk = ss.ss_item_sk +JOIN + store s ON s.s_store_sk = ss.ss_store_sk +WHERE + tc.sales_rank <= 10 + AND d.d_year = 2000 +ORDER BY + tc.total_sales DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpcds/8934.sql b/vortex-bench/sqlstorm/tpcds/8934.sql new file mode 100644 index 00000000000..3aa996fa6db --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/8934.sql @@ -0,0 +1,27 @@ + +SELECT + c.c_first_name, + c.c_last_name, + ca.ca_city, + ca.ca_state, + SUM(ws.ws_quantity) AS total_quantity_sold, + SUM(ws.ws_ext_sales_price) AS total_sales, + d.d_year, + d.d_month_seq +FROM + customer c +JOIN + customer_address ca ON c.c_current_addr_sk = ca.ca_address_sk +JOIN + web_sales ws ON c.c_customer_sk = ws.ws_bill_customer_sk +JOIN + date_dim d ON ws.ws_sold_date_sk = d.d_date_sk +WHERE + d.d_year = 2023 AND + d.d_month_seq IN (1, 2, 3) AND + ca.ca_state = 'CA' +GROUP BY + c.c_first_name, c.c_last_name, ca.ca_city, ca.ca_state, d.d_year, d.d_month_seq +ORDER BY + total_sales DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpcds/9708.sql b/vortex-bench/sqlstorm/tpcds/9708.sql new file mode 100644 index 00000000000..ba3fff46113 --- /dev/null +++ b/vortex-bench/sqlstorm/tpcds/9708.sql @@ -0,0 +1,35 @@ + +SELECT + c.c_customer_id, + c.c_first_name, + c.c_last_name, + SUM(ws.ws_ext_sales_price) AS total_sales, + SUM(ws.ws_ext_tax) AS total_tax, + MAX(d.d_date) AS last_purchase_date, + COUNT(DISTINCT ws.ws_order_number) AS total_orders, + ce.cc_call_center_id, + COUNT(DISTINCT sr.sr_ticket_number) AS total_returns, + AVG(inv.inv_quantity_on_hand) AS avg_inventory, + p.p_promo_name +FROM + customer AS c +JOIN + web_sales AS ws ON c.c_customer_sk = ws.ws_bill_customer_sk +JOIN + call_center AS ce ON ws.ws_ship_customer_sk = ce.cc_call_center_sk +JOIN + date_dim AS d ON ws.ws_sold_date_sk = d.d_date_sk +LEFT JOIN + store_returns AS sr ON c.c_customer_sk = sr.sr_customer_sk +LEFT JOIN + inventory AS inv ON ws.ws_item_sk = inv.inv_item_sk AND inv.inv_warehouse_sk = ws.ws_warehouse_sk +LEFT JOIN + promotion AS p ON ws.ws_promo_sk = p.p_promo_sk +WHERE + d.d_year = 2023 + AND c.c_current_cdemo_sk IS NOT NULL +GROUP BY + c.c_customer_id, c.c_first_name, c.c_last_name, ce.cc_call_center_id, p.p_promo_name +ORDER BY + total_sales DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpch/10021.sql b/vortex-bench/sqlstorm/tpch/10021.sql new file mode 100644 index 00000000000..cf062cd611e --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10021.sql @@ -0,0 +1,23 @@ +SELECT + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'ASIA' +GROUP BY + p.p_name +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/10084.sql b/vortex-bench/sqlstorm/tpch/10084.sql new file mode 100644 index 00000000000..4481679c866 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10084.sql @@ -0,0 +1,21 @@ +SELECT + n.n_name AS nation, + r.r_name AS region, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON c.c_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + o.o_orderdate >= DATE '1997-01-01' AND o.o_orderdate < DATE '1998-01-01' +GROUP BY + n.n_name, r.r_name +ORDER BY + revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/10163.sql b/vortex-bench/sqlstorm/tpch/10163.sql new file mode 100644 index 00000000000..0ea22cba17f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10163.sql @@ -0,0 +1,18 @@ +SELECT + p.p_partkey, + p.p_name, + s.s_suppkey, + s.s_name, + SUM(ps.ps_availqty) AS total_available_quantity, + SUM(ps.ps_supplycost) AS total_supply_cost +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +GROUP BY + p.p_partkey, p.p_name, s.s_suppkey, s.s_name +ORDER BY + total_available_quantity DESC, total_supply_cost DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpch/10179.sql b/vortex-bench/sqlstorm/tpch/10179.sql new file mode 100644 index 00000000000..389a230628a --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10179.sql @@ -0,0 +1,23 @@ +SELECT + n.n_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey AND s.s_suppkey = ps.ps_suppkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +WHERE + o.o_orderdate >= '1995-01-01' AND o.o_orderdate < '1996-01-01' +GROUP BY + n.n_name +ORDER BY + revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/10241.sql b/vortex-bench/sqlstorm/tpch/10241.sql new file mode 100644 index 00000000000..44c94479479 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10241.sql @@ -0,0 +1,6 @@ +SELECT p_name, SUM(ps_supplycost * ps_availqty) AS total_cost +FROM part +JOIN partsupp ON part.p_partkey = partsupp.ps_partkey +GROUP BY p_name +ORDER BY total_cost DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/10287.sql b/vortex-bench/sqlstorm/tpch/10287.sql new file mode 100644 index 00000000000..b42e3e83093 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10287.sql @@ -0,0 +1,18 @@ +SELECT + n.n_name AS nation_name, + sum(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON c.c_nationkey = n.n_nationkey +WHERE + l.l_shipdate >= DATE '1995-01-01' + AND l.l_shipdate < DATE '1995-02-01' +GROUP BY + n.n_name +ORDER BY + total_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/10326.sql b/vortex-bench/sqlstorm/tpch/10326.sql new file mode 100644 index 00000000000..29b6cca7fcc --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10326.sql @@ -0,0 +1,24 @@ +SELECT + p.p_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + lineitem l +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'ASIA' + AND l.l_shipdate >= DATE '1993-01-01' + AND l.l_shipdate < DATE '1994-01-01' +GROUP BY + p.p_name +ORDER BY + revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/1036.sql b/vortex-bench/sqlstorm/tpch/1036.sql new file mode 100644 index 00000000000..bbfa00b2c47 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/1036.sql @@ -0,0 +1,64 @@ + +WITH ranked_customers AS ( + SELECT + c.c_custkey, + c.c_name, + c.c_acctbal, + RANK() OVER (PARTITION BY c.c_nationkey ORDER BY c.c_acctbal DESC) AS rank, + n.n_name + FROM + customer c + JOIN + nation n ON c.c_nationkey = n.n_nationkey +), +high_value_orders AS ( + SELECT + o.o_orderkey, + o.o_custkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_value + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + GROUP BY + o.o_orderkey, o.o_custkey + HAVING + SUM(l.l_extendedprice * (1 - l.l_discount)) > 10000 +), +supplier_part_info AS ( + SELECT + s.s_suppkey, + p.p_partkey, + p.p_name, + ps.ps_supplycost, + ROW_NUMBER() OVER (PARTITION BY p.p_partkey ORDER BY ps.ps_supplycost DESC) AS supply_rank + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + part p ON ps.ps_partkey = p.p_partkey +) +SELECT + r.n_name, + rc.c_name, + rc.c_acctbal, + HO.o_orderkey, + HO.total_value, + STRING_AGG(DISTINCT CONCAT(sp.p_name, ': ', sp.ps_supplycost), ', ') AS part_supplier_info +FROM + ranked_customers rc +LEFT JOIN + high_value_orders HO ON rc.c_custkey = HO.o_custkey +LEFT JOIN + supplier_part_info sp ON rc.c_custkey = sp.s_suppkey +LEFT JOIN + nation r ON rc.n_name = r.n_name +WHERE + rc.rank = 1 +AND + HO.total_value IS NOT NULL +GROUP BY + r.n_name, rc.c_name, rc.c_acctbal, HO.o_orderkey, HO.total_value +ORDER BY + rc.c_acctbal DESC, r.n_name; diff --git a/vortex-bench/sqlstorm/tpch/10493.sql b/vortex-bench/sqlstorm/tpch/10493.sql new file mode 100644 index 00000000000..f17bc1d1b7f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10493.sql @@ -0,0 +1,20 @@ +SELECT + n.n_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + customer c +JOIN + orders o ON c.c_custkey = o.o_custkey +JOIN + lineitem l ON o.o_orderkey = l.l_orderkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +WHERE + l.l_shipdate >= DATE '1995-01-01' AND + l.l_shipdate < DATE '1996-01-01' +GROUP BY + n.n_name +ORDER BY + total_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/10495.sql b/vortex-bench/sqlstorm/tpch/10495.sql new file mode 100644 index 00000000000..be453ab08d0 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10495.sql @@ -0,0 +1,16 @@ +SELECT + n.n_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON c.c_nationkey = n.n_nationkey +GROUP BY + n.n_name +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/10547.sql b/vortex-bench/sqlstorm/tpch/10547.sql new file mode 100644 index 00000000000..78dfd491dc9 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10547.sql @@ -0,0 +1,21 @@ +SELECT + n.n_name AS nation_name, + r.r_name AS region_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON c.c_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + o.o_orderdate >= DATE '1996-01-01' + AND o.o_orderdate < DATE '1997-01-01' +GROUP BY + n.n_name, r.r_name +ORDER BY + revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/10595.sql b/vortex-bench/sqlstorm/tpch/10595.sql new file mode 100644 index 00000000000..93b429a0f13 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10595.sql @@ -0,0 +1,23 @@ +SELECT + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'EUROPE' +GROUP BY + p.p_name +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/10617.sql b/vortex-bench/sqlstorm/tpch/10617.sql new file mode 100644 index 00000000000..41d7df3ae12 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10617.sql @@ -0,0 +1,21 @@ +SELECT + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'ASIA' +GROUP BY + p.p_name +ORDER BY + revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/10688.sql b/vortex-bench/sqlstorm/tpch/10688.sql new file mode 100644 index 00000000000..4fd142d5be7 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/10688.sql @@ -0,0 +1,17 @@ +SELECT + n_name AS nation, + sum(l_extendedprice * (1 - l_discount)) AS total_revenue +FROM + lineitem +JOIN + orders ON l_orderkey = o_orderkey +JOIN + customer ON o_custkey = c_custkey +JOIN + nation ON c_nationkey = n_nationkey +WHERE + l_shipdate >= '1997-01-01' AND l_shipdate < '1998-01-01' +GROUP BY + n_name +ORDER BY + total_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/11176.sql b/vortex-bench/sqlstorm/tpch/11176.sql new file mode 100644 index 00000000000..e816091b6fe --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/11176.sql @@ -0,0 +1,18 @@ +SELECT + l_returnflag, + l_linestatus, + SUM(l_quantity) AS sum_quantity, + SUM(l_extendedprice) AS sum_extendedprice, + SUM(l_extendedprice * (1 - l_discount)) AS sum_discounted_price, + AVG(l_tax) AS avg_tax, + COUNT(*) AS total_lineitems +FROM + lineitem +WHERE + l_shipdate BETWEEN '1994-01-01' AND '1994-12-31' +GROUP BY + l_returnflag, + l_linestatus +ORDER BY + l_returnflag, + l_linestatus; diff --git a/vortex-bench/sqlstorm/tpch/11227.sql b/vortex-bench/sqlstorm/tpch/11227.sql new file mode 100644 index 00000000000..6047c61239d --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/11227.sql @@ -0,0 +1,18 @@ +SELECT + n.n_name, + o.o_orderstatus, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON c.c_nationkey = n.n_nationkey +GROUP BY + n.n_name, o.o_orderstatus +HAVING + SUM(l.l_extendedprice * (1 - l.l_discount)) > 10000 +ORDER BY + total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpch/11518.sql b/vortex-bench/sqlstorm/tpch/11518.sql new file mode 100644 index 00000000000..bb4e7dfe927 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/11518.sql @@ -0,0 +1,24 @@ +SELECT + p.p_brand, + p.p_type, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'ASIA' + AND l.l_shipdate >= DATE '1994-01-01' + AND l.l_shipdate < DATE '1995-01-01' +GROUP BY + p.p_brand, p.p_type +ORDER BY + revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/11729.sql b/vortex-bench/sqlstorm/tpch/11729.sql new file mode 100644 index 00000000000..2818fed26d0 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/11729.sql @@ -0,0 +1,18 @@ +SELECT + n.n_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + partsupp ps ON ps.ps_partkey = p.p_partkey +JOIN + supplier s ON s.s_suppkey = ps.ps_suppkey +JOIN + nation n ON n.n_nationkey = s.s_nationkey +GROUP BY + n.n_name +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/11851.sql b/vortex-bench/sqlstorm/tpch/11851.sql new file mode 100644 index 00000000000..4db6c253e45 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/11851.sql @@ -0,0 +1,15 @@ +SELECT + p.p_name, + s.s_name, + SUM(ps.ps_availqty) AS total_available, + SUM(ps.ps_supplycost) AS total_supply_cost +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +GROUP BY + p.p_name, s.s_name +ORDER BY + total_available DESC, total_supply_cost ASC; diff --git a/vortex-bench/sqlstorm/tpch/11916.sql b/vortex-bench/sqlstorm/tpch/11916.sql new file mode 100644 index 00000000000..be6eff1a25d --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/11916.sql @@ -0,0 +1,6 @@ +SELECT l_orderkey, SUM(l_extendedprice * (1 - l_discount)) AS revenue +FROM lineitem +WHERE l_shipdate >= DATE '1994-01-01' AND l_shipdate < DATE '1995-01-01' +GROUP BY l_orderkey +ORDER BY revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/12238.sql b/vortex-bench/sqlstorm/tpch/12238.sql new file mode 100644 index 00000000000..e2565462cb2 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/12238.sql @@ -0,0 +1,16 @@ +SELECT + n.n_name, + SUM(ps.ps_supplycost * l.l_quantity) AS total_cost +FROM + lineitem l +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +GROUP BY + n.n_name +ORDER BY + total_cost DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/12431.sql b/vortex-bench/sqlstorm/tpch/12431.sql new file mode 100644 index 00000000000..d004d96a6fd --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/12431.sql @@ -0,0 +1,24 @@ +SELECT + p.p_partkey, + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey AND s.s_suppkey = ps.ps_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'Europe' +GROUP BY + p.p_partkey, p.p_name +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/1261.sql b/vortex-bench/sqlstorm/tpch/1261.sql new file mode 100644 index 00000000000..61b9874135a --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/1261.sql @@ -0,0 +1,48 @@ +WITH RankedOrders AS ( + SELECT o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + o.o_orderstatus, + ROW_NUMBER() OVER (PARTITION BY o.o_orderstatus ORDER BY o.o_orderdate DESC) AS rn + FROM orders o +), +SupplierSummary AS ( + SELECT ps.ps_partkey, + s.s_nationkey, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supply_cost, + AVG(s.s_acctbal) AS avg_supplier_balance + FROM partsupp ps + JOIN supplier s ON ps.ps_suppkey = s.s_suppkey + GROUP BY ps.ps_partkey, s.s_nationkey +), +CustomerRegion AS ( + SELECT c.c_custkey, + c.c_name, + n.n_regionkey, + r.r_name, + ROW_NUMBER() OVER (PARTITION BY r.r_name ORDER BY c.c_acctbal DESC) AS region_rank + FROM customer c + JOIN nation n ON c.c_nationkey = n.n_nationkey + JOIN region r ON n.n_regionkey = r.r_regionkey +), +LineItemDetails AS ( + SELECT l.l_orderkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue, + COUNT(*) AS item_count + FROM lineitem l + WHERE l.l_shipdate >= '1997-01-01' AND l.l_shipdate < '1998-01-01' + GROUP BY l.l_orderkey +) +SELECT cr.r_name, + COUNT(DISTINCT co.o_orderkey) AS total_orders, + SUM(ss.total_supply_cost) AS total_supply_cost, + AVG(ss.avg_supplier_balance) AS average_supplier_balance, + SUM(ld.revenue) AS total_revenue, + SUM(ld.item_count) AS total_items +FROM CustomerRegion cr +LEFT JOIN RankedOrders co ON cr.c_custkey = co.o_orderkey +LEFT JOIN SupplierSummary ss ON cr.n_regionkey = ss.s_nationkey +LEFT JOIN LineItemDetails ld ON co.o_orderkey = ld.l_orderkey +WHERE cr.region_rank <= 10 +GROUP BY cr.r_name +ORDER BY total_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/12634.sql b/vortex-bench/sqlstorm/tpch/12634.sql new file mode 100644 index 00000000000..247bd57712e --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/12634.sql @@ -0,0 +1,12 @@ +SELECT + l_orderkey, + SUM(l_extendedprice * (1 - l_discount)) AS total_revenue +FROM + lineitem +WHERE + l_shipdate >= '1996-01-01' AND l_shipdate < '1996-02-01' +GROUP BY + l_orderkey +ORDER BY + total_revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/13096.sql b/vortex-bench/sqlstorm/tpch/13096.sql new file mode 100644 index 00000000000..0b8910c859a --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/13096.sql @@ -0,0 +1,19 @@ +SELECT + l_returnflag, + l_linestatus, + SUM(l_quantity) AS sum_qty, + SUM(l_extendedprice) AS sum_base_price, + SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price, + SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, + COUNT(*) AS count_order +FROM + lineitem +WHERE + l_shipdate >= DATE '1995-01-01' + AND l_shipdate < DATE '1995-01-01' + INTERVAL '1' YEAR +GROUP BY + l_returnflag, + l_linestatus +ORDER BY + l_returnflag, + l_linestatus; diff --git a/vortex-bench/sqlstorm/tpch/13114.sql b/vortex-bench/sqlstorm/tpch/13114.sql new file mode 100644 index 00000000000..f7507e4549f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/13114.sql @@ -0,0 +1,17 @@ +SELECT + n_name AS nation, + SUM(l_extendedprice * (1 - l_discount)) AS total_revenue +FROM + lineitem +JOIN + orders ON l_orderkey = o_orderkey +JOIN + customer ON o_custkey = c_custkey +JOIN + nation ON c_nationkey = n_nationkey +WHERE + o_orderdate >= DATE '1996-01-01' AND o_orderdate < DATE '1997-01-01' +GROUP BY + n_name +ORDER BY + total_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/13394.sql b/vortex-bench/sqlstorm/tpch/13394.sql new file mode 100644 index 00000000000..3e604444921 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/13394.sql @@ -0,0 +1,24 @@ +SELECT + p.p_partkey, + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'EUROPE' +GROUP BY + p.p_partkey, p.p_name +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/13610.sql b/vortex-bench/sqlstorm/tpch/13610.sql new file mode 100644 index 00000000000..bf4555c8c2d --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/13610.sql @@ -0,0 +1,20 @@ +SELECT + n.n_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON c.c_nationkey = n.n_nationkey +WHERE + l.l_shipdate >= DATE '1997-01-01' +AND + l.l_shipdate < DATE '1997-12-31' +GROUP BY + n.n_name +ORDER BY + total_revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/13618.sql b/vortex-bench/sqlstorm/tpch/13618.sql new file mode 100644 index 00000000000..a3d5e14adb6 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/13618.sql @@ -0,0 +1,12 @@ + +SELECT COUNT(*) AS total_orders, + SUM(o.o_totalprice) AS total_revenue, + AVG(l.l_extendedprice) AS avg_lineitem_price, + COUNT(DISTINCT c.c_custkey) AS total_customers +FROM orders o +JOIN lineitem l ON o.o_orderkey = l.l_orderkey +JOIN customer c ON o.o_custkey = c.c_custkey +WHERE o.o_orderstatus = 'O' + AND l.l_shipdate BETWEEN DATE '1997-01-01' AND DATE '1997-12-31' +GROUP BY o.o_orderpriority +ORDER BY total_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/13828.sql b/vortex-bench/sqlstorm/tpch/13828.sql new file mode 100644 index 00000000000..28b83afc9d6 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/13828.sql @@ -0,0 +1,17 @@ +SELECT + p.p_partkey, + p.p_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +WHERE + o.o_orderdate BETWEEN DATE '1997-01-01' AND DATE '1997-12-31' +GROUP BY + p.p_partkey, p.p_name +ORDER BY + total_revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/14313.sql b/vortex-bench/sqlstorm/tpch/14313.sql new file mode 100644 index 00000000000..6fedd575699 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14313.sql @@ -0,0 +1,14 @@ +SELECT + p.p_partkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +WHERE + l.l_shipdate >= DATE '1997-01-01' AND l.l_shipdate < DATE '1997-12-31' +GROUP BY + p.p_partkey +ORDER BY + total_revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/14416.sql b/vortex-bench/sqlstorm/tpch/14416.sql new file mode 100644 index 00000000000..18d458f45f0 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14416.sql @@ -0,0 +1,13 @@ +SELECT + l.l_shipmode, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +WHERE + o.o_orderdate >= '1997-01-01' AND o.o_orderdate < '1997-02-01' +GROUP BY + l.l_shipmode +ORDER BY + revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/14578.sql b/vortex-bench/sqlstorm/tpch/14578.sql new file mode 100644 index 00000000000..bb6b6e31b9b --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14578.sql @@ -0,0 +1,17 @@ +SELECT + p.p_partkey, + p.p_name, + SUM(ps.ps_availqty) AS total_availqty, + AVG(ps.ps_supplycost) AS avg_supplycost, + COUNT(DISTINCT s.s_suppkey) AS supplier_count +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +GROUP BY + p.p_partkey, p.p_name +ORDER BY + total_availqty DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpch/14662.sql b/vortex-bench/sqlstorm/tpch/14662.sql new file mode 100644 index 00000000000..89a5bc6bac3 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14662.sql @@ -0,0 +1,20 @@ +SELECT + l_returnflag, + l_linestatus, + SUM(l_quantity) AS sum_qty, + SUM(l_extendedprice) AS sum_base_price, + SUM(l_extendedprice * (1 - l_discount)) AS sum_disc_price, + SUM(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, + AVG(l_quantity) AS avg_qty, + AVG(l_extendedprice) AS avg_price, + AVG(l_discount) AS avg_disc, + COUNT(*) AS count_order +FROM + lineitem +WHERE + l_shipdate >= DATE '1994-01-01' + AND l_shipdate < DATE '1995-01-01' +GROUP BY + l_returnflag, l_linestatus +ORDER BY + l_returnflag, l_linestatus; diff --git a/vortex-bench/sqlstorm/tpch/14804.sql b/vortex-bench/sqlstorm/tpch/14804.sql new file mode 100644 index 00000000000..b44d092139a --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14804.sql @@ -0,0 +1,29 @@ +SELECT + l.l_orderkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue, + SUM(l.l_quantity) AS quantity_sold, + AVG(l.l_tax) AS average_tax +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey AND s.s_suppkey = ps.ps_suppkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'ASIA' + AND o.o_orderdate BETWEEN DATE '1994-01-01' AND DATE '1994-12-31' +GROUP BY + l.l_orderkey +ORDER BY + revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/14879.sql b/vortex-bench/sqlstorm/tpch/14879.sql new file mode 100644 index 00000000000..6d212a3fc05 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14879.sql @@ -0,0 +1,17 @@ +SELECT + p.p_partkey, + p.p_name, + s.s_name, + SUM(ps.ps_availqty) AS total_available_quantity, + AVG(ps.ps_supplycost) AS average_supply_cost +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +GROUP BY + p.p_partkey, p.p_name, s.s_name +ORDER BY + total_available_quantity DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/14908.sql b/vortex-bench/sqlstorm/tpch/14908.sql new file mode 100644 index 00000000000..8e20a7a3340 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14908.sql @@ -0,0 +1,22 @@ +SELECT + n.n_name AS nation, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + nation n +JOIN + supplier s ON n.n_nationkey = s.s_nationkey +JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +WHERE + o.o_orderdate >= '1996-01-01' + AND o.o_orderdate < '1997-01-01' +GROUP BY + n.n_name +ORDER BY + total_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/14923.sql b/vortex-bench/sqlstorm/tpch/14923.sql new file mode 100644 index 00000000000..192dbe49b7f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/14923.sql @@ -0,0 +1,22 @@ +SELECT + p.p_partkey, + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + r.r_name = 'ASIA' +GROUP BY + p.p_partkey, p.p_name +ORDER BY + total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/15163.sql b/vortex-bench/sqlstorm/tpch/15163.sql new file mode 100644 index 00000000000..d5b7f23647e --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/15163.sql @@ -0,0 +1,7 @@ +SELECT p.p_name, s.s_name, ps.ps_supplycost +FROM part p +JOIN partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN supplier s ON ps.ps_suppkey = s.s_suppkey +WHERE p.p_size > 10 +ORDER BY ps.ps_supplycost DESC +LIMIT 5; diff --git a/vortex-bench/sqlstorm/tpch/15211.sql b/vortex-bench/sqlstorm/tpch/15211.sql new file mode 100644 index 00000000000..028a25de89e --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/15211.sql @@ -0,0 +1,15 @@ +SELECT + p.p_partkey, + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice) AS total_extended_price +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +GROUP BY + p.p_partkey, + p.p_name +ORDER BY + total_extended_price DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/15267.sql b/vortex-bench/sqlstorm/tpch/15267.sql new file mode 100644 index 00000000000..31c504e9077 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/15267.sql @@ -0,0 +1,6 @@ +SELECT p.p_name, s.s_name, ps.ps_supplycost +FROM part p +JOIN partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN supplier s ON ps.ps_suppkey = s.s_suppkey +WHERE ps.ps_availqty > 100 +ORDER BY ps.ps_supplycost DESC; diff --git a/vortex-bench/sqlstorm/tpch/15283.sql b/vortex-bench/sqlstorm/tpch/15283.sql new file mode 100644 index 00000000000..5f250845265 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/15283.sql @@ -0,0 +1,6 @@ +SELECT p.p_name, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM part p +JOIN lineitem l ON p.p_partkey = l.l_partkey +GROUP BY p.p_name +ORDER BY total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/15535.sql b/vortex-bench/sqlstorm/tpch/15535.sql new file mode 100644 index 00000000000..5d6c8d62f01 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/15535.sql @@ -0,0 +1,12 @@ +SELECT + p_brand, + COUNT(p_partkey) AS part_count, + AVG(p_retailprice) AS avg_price +FROM + part +GROUP BY + p_brand +HAVING + COUNT(p_partkey) > 10 +ORDER BY + avg_price DESC; diff --git a/vortex-bench/sqlstorm/tpch/15940.sql b/vortex-bench/sqlstorm/tpch/15940.sql new file mode 100644 index 00000000000..3446380518f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/15940.sql @@ -0,0 +1,17 @@ +SELECT + p.p_partkey, + p.p_name, + s.s_suppkey, + s.s_name, + ps.ps_supplycost +FROM + part AS p +JOIN + partsupp AS ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier AS s ON ps.ps_suppkey = s.s_suppkey +WHERE + s.s_acctbal > 1000 +ORDER BY + ps.ps_supplycost DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/16740.sql b/vortex-bench/sqlstorm/tpch/16740.sql new file mode 100644 index 00000000000..d23e8b59dbc --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/16740.sql @@ -0,0 +1,5 @@ +SELECT p_brand, SUM(l_extendedprice) AS total_revenue +FROM part +JOIN lineitem ON p_partkey = l_partkey +GROUP BY p_brand +ORDER BY total_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/17019.sql b/vortex-bench/sqlstorm/tpch/17019.sql new file mode 100644 index 00000000000..383cf522a84 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/17019.sql @@ -0,0 +1,18 @@ +SELECT + p.p_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_price +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +WHERE + n.n_name = 'FRANCE' +GROUP BY + p.p_name +ORDER BY + total_price DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/17442.sql b/vortex-bench/sqlstorm/tpch/17442.sql new file mode 100644 index 00000000000..6924a5f8f98 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/17442.sql @@ -0,0 +1,14 @@ +SELECT + p.p_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +WHERE + l.l_shipdate >= '1997-01-01' AND l.l_shipdate < '1997-12-31' +GROUP BY + p.p_name +ORDER BY + total_revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/17567.sql b/vortex-bench/sqlstorm/tpch/17567.sql new file mode 100644 index 00000000000..c493e73abfa --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/17567.sql @@ -0,0 +1,6 @@ +SELECT p_brand, SUM(l_extendedprice * (1 - l_discount)) AS total_revenue +FROM part +JOIN lineitem ON p_partkey = l_partkey +GROUP BY p_brand +ORDER BY total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/17781.sql b/vortex-bench/sqlstorm/tpch/17781.sql new file mode 100644 index 00000000000..9bc24e58a0c --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/17781.sql @@ -0,0 +1,6 @@ +SELECT l_orderkey, SUM(l_extendedprice * (1 - l_discount)) AS total_revenue +FROM lineitem +WHERE l_shipdate >= DATE '1995-01-01' +GROUP BY l_orderkey +ORDER BY total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/17999.sql b/vortex-bench/sqlstorm/tpch/17999.sql new file mode 100644 index 00000000000..16454533b49 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/17999.sql @@ -0,0 +1,10 @@ +SELECT p.p_name, SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM part p +JOIN lineitem l ON p.p_partkey = l.l_partkey +JOIN supplier s ON l.l_suppkey = s.s_suppkey +JOIN nation n ON s.s_nationkey = n.n_nationkey +JOIN region r ON n.n_regionkey = r.r_regionkey +WHERE r.r_name = 'ASIA' +GROUP BY p.p_name +ORDER BY revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/18133.sql b/vortex-bench/sqlstorm/tpch/18133.sql new file mode 100644 index 00000000000..f53643ae3d8 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/18133.sql @@ -0,0 +1,15 @@ +SELECT + p.p_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +WHERE + l.l_shipdate >= '1997-01-01' + AND l.l_shipdate < '1998-01-01' +GROUP BY + p.p_name +ORDER BY + revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/18498.sql b/vortex-bench/sqlstorm/tpch/18498.sql new file mode 100644 index 00000000000..60ac1796789 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/18498.sql @@ -0,0 +1,5 @@ +SELECT p_partkey, p_name, p_retailprice +FROM part +WHERE p_size > 10 +ORDER BY p_retailprice DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/19300.sql b/vortex-bench/sqlstorm/tpch/19300.sql new file mode 100644 index 00000000000..c13f2b96bf6 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/19300.sql @@ -0,0 +1,5 @@ +SELECT p_brand, COUNT(*) AS supplier_count +FROM part +JOIN partsupp ON part.p_partkey = partsupp.ps_partkey +JOIN supplier ON partsupp.ps_suppkey = supplier.s_suppkey +GROUP BY p_brand; diff --git a/vortex-bench/sqlstorm/tpch/19410.sql b/vortex-bench/sqlstorm/tpch/19410.sql new file mode 100644 index 00000000000..42c71fbb8c7 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/19410.sql @@ -0,0 +1,13 @@ +SELECT + p_brand, + COUNT(DISTINCT ps_suppkey) AS supplier_count, + AVG(ps_supplycost) AS avg_supplycost +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +GROUP BY + p_brand +ORDER BY + supplier_count DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/19935.sql b/vortex-bench/sqlstorm/tpch/19935.sql new file mode 100644 index 00000000000..44038781392 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/19935.sql @@ -0,0 +1,14 @@ +SELECT + p.p_partkey, + p.p_name, + p.p_retailprice, + SUM(l.l_quantity) AS total_quantity +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +GROUP BY + p.p_partkey, p.p_name, p.p_retailprice +ORDER BY + total_quantity DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/2156.sql b/vortex-bench/sqlstorm/tpch/2156.sql new file mode 100644 index 00000000000..af465a1d772 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/2156.sql @@ -0,0 +1,67 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + c.c_mktsegment, + RANK() OVER (PARTITION BY c.c_mktsegment ORDER BY o.o_totalprice DESC) AS segment_rank + FROM + orders o + JOIN + customer c ON o.o_custkey = c.c_custkey + WHERE + o.o_orderdate >= DATE '1997-01-01' + AND o.o_orderdate < DATE '1998-01-01' +), +SupplierPartSummary AS ( + SELECT + ps.ps_partkey, + s.s_nationkey, + SUM(ps.ps_availqty) AS total_avail_qty, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_cost + FROM + partsupp ps + JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey + GROUP BY + ps.ps_partkey, s.s_nationkey + HAVING + SUM(ps.ps_availqty) > 100 +), +HighValueOrders AS ( + SELECT + ro.o_orderkey, + ro.o_totalprice, + ro.o_orderdate, + ro.c_mktsegment + FROM + RankedOrders ro + WHERE + ro.segment_rank <= 5 +) +SELECT + hvo.o_orderkey, + hvo.o_totalprice, + hvo.o_orderdate, + n.n_name AS nation, + pp.p_name AS part_name, + COALESCE(sp.total_avail_qty, 0) AS available_quantity, + COALESCE(sp.total_cost, 0) AS total_cost +FROM + HighValueOrders hvo +LEFT JOIN + lineitem l ON hvo.o_orderkey = l.l_orderkey +LEFT JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +LEFT JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +LEFT JOIN + nation n ON s.s_nationkey = n.n_nationkey +LEFT JOIN + part pp ON ps.ps_partkey = pp.p_partkey +LEFT JOIN + SupplierPartSummary sp ON ps.ps_partkey = sp.ps_partkey AND s.s_nationkey = sp.s_nationkey +WHERE + hvo.o_totalprice > (SELECT AVG(o_totalprice) FROM orders) +ORDER BY + hvo.o_orderdate DESC, hvo.o_totalprice DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/22041.sql b/vortex-bench/sqlstorm/tpch/22041.sql new file mode 100644 index 00000000000..6687c24ad47 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/22041.sql @@ -0,0 +1,53 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_acctbal, + ROW_NUMBER() OVER (PARTITION BY s.s_nationkey ORDER BY s.s_acctbal DESC) AS rnk, + SUM(ps.ps_supplycost) OVER (PARTITION BY s.s_nationkey) AS total_supply_cost + FROM + supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey +), +SubQueryCustomer AS ( + SELECT + c.c_custkey, + COUNT(o.o_orderkey) AS order_count + FROM + customer c + LEFT JOIN orders o ON c.c_custkey = o.o_custkey + GROUP BY c.c_custkey + HAVING COUNT(o.o_orderkey) > (SELECT AVG(order_count) FROM (SELECT COUNT(o.o_orderkey) AS order_count FROM orders o GROUP BY o.o_custkey) AS sub_avg) +), +FilteredParts AS ( + SELECT + p.p_partkey, + p.p_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue + FROM + part p + JOIN lineitem l ON p.p_partkey = l.l_partkey + GROUP BY p.p_partkey, p.p_name + HAVING SUM(l.l_extendedprice * (1 - l.l_discount)) > 50000 +) +SELECT + r.r_name, + COUNT(DISTINCT cs.c_custkey) AS customer_count, + SUM(fp.revenue) AS total_revenue, + MAX(rs.total_supply_cost) AS max_supply_cost +FROM + region r +LEFT JOIN nation n ON r.r_regionkey = n.n_regionkey +LEFT JOIN RankedSuppliers rs ON n.n_nationkey = rs.s_suppkey +LEFT JOIN SubQueryCustomer cs ON cs.c_custkey = rs.s_suppkey +LEFT JOIN FilteredParts fp ON fp.p_partkey = rs.s_suppkey +WHERE + r.r_name IS NOT NULL + AND (rs.rnk = 1 OR rs.total_supply_cost IS NULL) +GROUP BY + r.r_name +HAVING + SUM(fp.revenue) IS NOT NULL + AND COUNT(DISTINCT cs.c_custkey) > 0 +ORDER BY + customer_count DESC, total_revenue ASC; diff --git a/vortex-bench/sqlstorm/tpch/2281.sql b/vortex-bench/sqlstorm/tpch/2281.sql new file mode 100644 index 00000000000..562e51b2203 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/2281.sql @@ -0,0 +1,65 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_acctbal, + ROW_NUMBER() OVER (PARTITION BY s.s_nationkey ORDER BY s.s_acctbal DESC) AS rn + FROM + supplier s +), +AggregatedOrders AS ( + SELECT + o.o_orderkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + o.o_orderdate + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + WHERE + l.l_shipdate >= DATE '1997-01-01' + GROUP BY + o.o_orderkey, o.o_orderdate +), +TotalRevenueByNation AS ( + SELECT + n.n_name, + SUM(a.total_revenue) AS total_revenue + FROM + nation n + JOIN + customer c ON n.n_nationkey = c.c_nationkey + JOIN + orders o ON c.c_custkey = o.o_custkey + JOIN + AggregatedOrders a ON o.o_orderkey = a.o_orderkey + GROUP BY + n.n_name +), +HighestRevenueRegion AS ( + SELECT + r.r_name, + SUM(t.total_revenue) AS region_revenue + FROM + region r + JOIN + nation n ON r.r_regionkey = n.n_regionkey + JOIN + TotalRevenueByNation t ON n.n_name = t.n_name + GROUP BY + r.r_name +) +SELECT + r.r_name, + COALESCE(h.region_revenue, 0) AS total_revenue, + COUNT(DISTINCT s.s_suppkey) AS supplier_count +FROM + region r +LEFT JOIN + HighestRevenueRegion h ON r.r_name = h.r_name +LEFT JOIN + RankedSuppliers s ON s.rn <= 5 +GROUP BY + r.r_name, h.region_revenue +ORDER BY + total_revenue DESC, supplier_count DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/2294.sql b/vortex-bench/sqlstorm/tpch/2294.sql new file mode 100644 index 00000000000..8e20ee66f5f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/2294.sql @@ -0,0 +1,68 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_acctbal, + s.s_nationkey, + ROW_NUMBER() OVER (PARTITION BY s.s_nationkey ORDER BY s.s_acctbal DESC) as rn + FROM + supplier s +), +AvailableParts AS ( + SELECT + ps.ps_partkey, + SUM(ps.ps_availqty) AS total_available + FROM + partsupp ps + GROUP BY + ps.ps_partkey +), +HighValueOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_value + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + WHERE + o.o_orderstatus = 'O' + GROUP BY + o.o_orderkey, o.o_orderdate + HAVING + SUM(l.l_extendedprice * (1 - l.l_discount)) > 10000 +) +SELECT + n.n_name, + p.p_name, + COALESCE(SUM(hvo.total_value), 0) AS total_order_value, + AVG(COALESCE(rs.s_acctbal, 0)) AS avg_supplier_balance, + COUNT(DISTINCT rs.s_suppkey) AS supplier_count, + pot.total_available, + CASE + WHEN COUNT(DISTINCT rs.s_suppkey) > 0 THEN 'Suppliers Available' + ELSE 'No Suppliers Available' + END AS supplier_availability +FROM + part p +LEFT JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +LEFT JOIN + RankedSuppliers rs ON rs.s_suppkey = ps.ps_suppkey AND rs.rn <= 5 +LEFT JOIN + AvailableParts pot ON pot.ps_partkey = p.p_partkey +LEFT JOIN + HighValueOrders hvo ON hvo.o_orderkey = ps.ps_partkey +JOIN + nation n ON n.n_nationkey = rs.s_nationkey +WHERE + p.p_size = ( + SELECT MAX(p2.p_size) + FROM part p2 + WHERE p2.p_type = p.p_type + ) +GROUP BY + n.n_name, p.p_name, pot.total_available +ORDER BY + total_order_value DESC, avg_supplier_balance DESC; diff --git a/vortex-bench/sqlstorm/tpch/23058.sql b/vortex-bench/sqlstorm/tpch/23058.sql new file mode 100644 index 00000000000..1c883fa3a68 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/23058.sql @@ -0,0 +1,79 @@ + +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_acctbal, + RANK() OVER (PARTITION BY n.n_nationkey ORDER BY s.s_acctbal DESC) AS rank + FROM + supplier s + JOIN + nation n ON s.s_nationkey = n.n_nationkey +), +HighValueParts AS ( + SELECT + p.p_partkey, + p.p_name, + SUM(ps.ps_availqty * ps.ps_supplycost) AS total_parts_value + FROM + part p + JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey + GROUP BY + p.p_partkey, p.p_name + HAVING + SUM(ps.ps_availqty) > 1000 +), +OrderDetails AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS order_total + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + GROUP BY + o.o_orderkey, o.o_orderdate + HAVING + SUM(l.l_discount) IS NULL OR SUM(l.l_discount) < 0.10 +), +FinalReport AS ( + SELECT + ns.n_name AS nation_name, + rs.s_name AS supplier_name, + hp.p_name AS part_name, + od.order_total, + ROW_NUMBER() OVER (PARTITION BY ns.n_nationkey ORDER BY od.order_total DESC) AS order_rank + FROM + RankedSuppliers rs + JOIN + nation ns ON rs.s_suppkey = ns.n_nationkey + JOIN + HighValueParts hp ON hp.total_parts_value > 0 + JOIN + OrderDetails od ON od.o_orderkey IN ( + SELECT o.o_orderkey + FROM orders o + WHERE o.o_orderstatus = 'O' AND o.o_totalprice < 5000 + ) + LEFT JOIN + lineitem l ON od.o_orderkey = l.l_orderkey AND l.l_returnflag = 'R' + WHERE + rs.rank = 1 +) +SELECT + fr.nation_name, + fr.supplier_name, + fr.part_name, + fr.order_total, + CASE + WHEN fr.order_rank IS NULL THEN 'No Rank Available' + ELSE CAST(fr.order_rank AS VARCHAR) + END AS order_rank +FROM + FinalReport fr +WHERE + fr.order_total IS NOT NULL +ORDER BY + fr.nation_name, fr.order_total DESC; diff --git a/vortex-bench/sqlstorm/tpch/24117.sql b/vortex-bench/sqlstorm/tpch/24117.sql new file mode 100644 index 00000000000..b9db9e89b49 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/24117.sql @@ -0,0 +1,65 @@ +WITH RankedParts AS ( + SELECT + p.p_partkey, + p.p_name, + p.p_retailprice, + ROW_NUMBER() OVER (PARTITION BY p.p_brand ORDER BY p.p_retailprice DESC) AS brand_rank + FROM + part p + WHERE + p.p_size BETWEEN 1 AND 30 + AND p.p_retailprice IS NOT NULL +), +CustomerOrders AS ( + SELECT + c.c_custkey, + c.c_name, + SUM(o.o_totalprice) AS total_spent, + COUNT(o.o_orderkey) AS order_count + FROM + customer c + LEFT JOIN + orders o ON c.c_custkey = o.o_custkey + GROUP BY + c.c_custkey, c.c_name + HAVING + SUM(o.o_totalprice) IS NOT NULL +), +SupplierInfo AS ( + SELECT + s.s_suppkey, + s.s_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS supplier_value + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY + s.s_suppkey, s.s_name + HAVING + COUNT(ps.ps_partkey) > 0 +) +SELECT + r.r_name, + COALESCE(cp.c_name, 'Unknown Customer') AS top_customer, + COALESCE(rp.p_name, 'No Ranked Parts') AS top_part_name, + si.supplier_value, + CASE + WHEN si.supplier_value >= 100000 THEN 'High Value' + WHEN si.supplier_value BETWEEN 50000 AND 99999 THEN 'Medium Value' + ELSE 'Low Value' + END AS supplier_category +FROM + region r +LEFT JOIN + nation n ON r.r_regionkey = n.n_regionkey +LEFT JOIN + CustomerOrders cp ON n.n_nationkey = cp.c_custkey +LEFT JOIN + RankedParts rp ON rp.brand_rank = 1 +LEFT JOIN + SupplierInfo si ON si.supplier_value >= 50000 +WHERE + n.n_name LIKE '%land%' OR r.r_name IS NULL +ORDER BY + r.r_name, si.supplier_value DESC; diff --git a/vortex-bench/sqlstorm/tpch/25133.sql b/vortex-bench/sqlstorm/tpch/25133.sql new file mode 100644 index 00000000000..27f19ea1609 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/25133.sql @@ -0,0 +1,29 @@ +SELECT + CONCAT_WS(' ', c.c_name, s.s_name) AS supplier_customer_name, + LEFT(p.p_name, 15) AS short_part_name, + COUNT(DISTINCT o.o_orderkey) AS total_orders, + SUM(l.l_quantity) AS total_quantity, + AVG(l.l_extendedprice * (1 - l.l_discount)) AS avg_price_after_discount, + MAX(l.l_shipdate) AS last_ship_date, + MIN(l.l_shipdate) AS first_ship_date, + SUM(CASE WHEN l.l_returnflag = 'R' THEN 1 ELSE 0 END) AS total_returns, + SUM(CASE WHEN l.l_linestatus = 'F' THEN l.l_quantity ELSE 0 END) AS fulfilled_quantity +FROM + customer c +JOIN + orders o ON c.c_custkey = o.o_custkey +JOIN + lineitem l ON o.o_orderkey = l.l_orderkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +WHERE + p.p_comment LIKE '%fragile%' + AND c.c_mktsegment = 'BUILDING' +GROUP BY + c.c_name, s.s_name, p.p_name +ORDER BY + total_orders DESC, avg_price_after_discount DESC; diff --git a/vortex-bench/sqlstorm/tpch/25377.sql b/vortex-bench/sqlstorm/tpch/25377.sql new file mode 100644 index 00000000000..f907a24f009 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/25377.sql @@ -0,0 +1,27 @@ + +SELECT + p.p_name, + COUNT(DISTINCT ps.ps_suppkey) AS supplier_count, + SUM(ps.ps_availqty) AS total_available_quantity, + ROUND(AVG(ps.ps_supplycost), 2) AS average_supply_cost, + SUBSTRING(p.p_comment, 1, 10) AS short_comment, + CASE + WHEN CHAR_LENGTH(p.p_name) > 10 THEN 'Long Name' + ELSE 'Short Name' + END AS name_length_category +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +WHERE + s.s_acctbal > 0 + AND p.p_size BETWEEN 1 AND 30 +GROUP BY + p.p_name, p.p_comment, p.p_size +HAVING + COUNT(DISTINCT ps.ps_suppkey) > 5 +ORDER BY + total_available_quantity DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/26346.sql b/vortex-bench/sqlstorm/tpch/26346.sql new file mode 100644 index 00000000000..47271ea8251 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/26346.sql @@ -0,0 +1,46 @@ +WITH RankedParts AS ( + SELECT + p.p_partkey, + p.p_name, + p.p_brand, + p.p_type, + LENGTH(p.p_name) AS name_length, + SUBSTRING(p.p_comment, 1, 10) AS short_comment + FROM + part p + WHERE + p.p_retailprice > 100.00 +), +SupplierCounts AS ( + SELECT + ps.ps_partkey, + COUNT(DISTINCT ps.ps_suppkey) AS supplier_count + FROM + partsupp ps + GROUP BY + ps.ps_partkey +), +FinalResults AS ( + SELECT + r.p_partkey, + r.p_name, + r.p_brand, + r.p_type, + s.supplier_count, + CONCAT('Part: ', r.p_name, ' | Brand: ', r.p_brand) AS description + FROM + RankedParts r + JOIN + SupplierCounts s ON r.p_partkey = s.ps_partkey +) +SELECT + f.p_partkey, + f.description, + f.supplier_count +FROM + FinalResults f +WHERE + f.supplier_count > 2 +ORDER BY + f.supplier_count DESC, + f.p_partkey; diff --git a/vortex-bench/sqlstorm/tpch/27030.sql b/vortex-bench/sqlstorm/tpch/27030.sql new file mode 100644 index 00000000000..33cf5d1681a --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/27030.sql @@ -0,0 +1,24 @@ +SELECT + p.p_name, + COUNT(DISTINCT s.s_suppkey) AS supplier_count, + AVG(ps.ps_supplycost) AS avg_supplycost, + STRING_AGG(DISTINCT n.n_name, ', ') AS nations_supplied, + RANK() OVER (ORDER BY AVG(ps.ps_supplycost) DESC) AS supply_rank +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +WHERE + p.p_brand LIKE '%BrandA%' + AND LENGTH(p.p_comment) > 10 + AND n.n_name NOT LIKE 'N%' +GROUP BY + p.p_name +HAVING + COUNT(DISTINCT s.s_suppkey) > 5 +ORDER BY + supply_rank; diff --git a/vortex-bench/sqlstorm/tpch/27153.sql b/vortex-bench/sqlstorm/tpch/27153.sql new file mode 100644 index 00000000000..bc5aae19723 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/27153.sql @@ -0,0 +1,29 @@ +SELECT + p.p_name AS product_name, + s.s_name AS supplier_name, + c.c_name AS customer_name, + o.o_orderkey AS order_id, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + COUNT(DISTINCT o.o_orderkey) AS order_count, + STRING_AGG(DISTINCT CONCAT('OrderID:', o.o_orderkey, ', Product:', p.p_name), '; ') AS order_details +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + part p ON l.l_partkey = p.p_partkey +JOIN + customer c ON o.o_custkey = c.c_custkey +WHERE + p.p_comment LIKE '%green%' + AND s.s_address LIKE '%USA%' + AND o.o_orderdate BETWEEN '1997-01-01' AND '1997-12-31' +GROUP BY + p.p_name, s.s_name, c.c_name, o.o_orderkey +ORDER BY + total_revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/27797.sql b/vortex-bench/sqlstorm/tpch/27797.sql new file mode 100644 index 00000000000..9d15172d874 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/27797.sql @@ -0,0 +1,26 @@ +SELECT + p.p_name, + s.s_name, + c.c_name, + SUM(l.l_quantity) AS total_quantity, + AVG(l.l_extendedprice) AS avg_extended_price, + MAX(l.l_discount) AS max_discount, + STRING_AGG(CONCAT(l.l_comment, ' (Order: ', o.o_orderkey, ')'), '; ') AS detailed_comments +FROM + part p +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + supplier s ON l.l_suppkey = s.s_suppkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +WHERE + p.p_name LIKE '%widget%' +GROUP BY + p.p_name, s.s_name, c.c_name +HAVING + SUM(l.l_quantity) > 100 +ORDER BY + total_quantity DESC; diff --git a/vortex-bench/sqlstorm/tpch/27806.sql b/vortex-bench/sqlstorm/tpch/27806.sql new file mode 100644 index 00000000000..2b4ed3547df --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/27806.sql @@ -0,0 +1,22 @@ +WITH String_Benchmark AS ( + SELECT + p.p_name, + LENGTH(p.p_name) AS name_length, + UPPER(p.p_mfgr) AS upper_mfgr, + LOWER(p.p_brand) AS lower_brand, + CONCAT(p.p_type, ' - ', p.p_container) AS type_container, + REPLACE(p.p_comment, 'smokeless', 'flame-free') AS modified_comment, + SUBSTRING(p.p_comment, 1, 10) AS comment_excerpt + FROM part p + WHERE p.p_retailprice > 100.00 +) +SELECT + sb.name_length, + COUNT(sb.upper_mfgr) AS upper_mfgr_count, + COUNT(sb.lower_brand) AS lower_brand_count, + COUNT(sb.type_container) AS type_container_count, + COUNT(sb.modified_comment) AS modified_comment_count, + COUNT(sb.comment_excerpt) AS comment_excerpt_count +FROM String_Benchmark sb +GROUP BY sb.name_length +ORDER BY sb.name_length DESC; diff --git a/vortex-bench/sqlstorm/tpch/279.sql b/vortex-bench/sqlstorm/tpch/279.sql new file mode 100644 index 00000000000..0474df64033 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/279.sql @@ -0,0 +1,71 @@ +WITH SupplierSales AS ( + SELECT + s.s_suppkey, + s.s_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + lineitem l ON ps.ps_partkey = l.l_partkey + GROUP BY + s.s_suppkey, s.s_name +), +CustomerOrders AS ( + SELECT + c.c_custkey, + c.c_name, + SUM(o.o_totalprice) AS total_order_value + FROM + customer c + JOIN + orders o ON c.c_custkey = o.o_custkey + WHERE + o.o_orderdate >= DATE '1994-01-01' + AND o.o_orderdate < DATE '1995-01-01' + GROUP BY + c.c_custkey, c.c_name +), +NationSummary AS ( + SELECT + n.n_nationkey, + n.n_name, + COUNT(DISTINCT s.s_suppkey) AS supplier_count + FROM + nation n + LEFT JOIN + supplier s ON n.n_nationkey = s.s_nationkey + GROUP BY + n.n_nationkey, n.n_name +), +FinalReport AS ( + SELECT + cs.c_name, + cs.total_order_value, + ss.total_sales, + ns.supplier_count, + RANK() OVER (ORDER BY cs.total_order_value DESC) AS order_rank + FROM + CustomerOrders cs + LEFT JOIN + SupplierSales ss ON cs.c_custkey = ss.s_suppkey + LEFT JOIN + NationSummary ns ON cs.c_custkey = ns.n_nationkey +) + +SELECT + fr.c_name, + fr.total_order_value, + COALESCE(fr.total_sales, 0) AS total_sales, + fr.supplier_count, + CASE + WHEN fr.order_rank <= 10 THEN 'Top Customer' + ELSE 'Regular Customer' + END AS customer_category +FROM + FinalReport fr +WHERE + fr.total_order_value > 10000 OR fr.total_sales > 50000 +ORDER BY + fr.total_order_value DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/28119.sql b/vortex-bench/sqlstorm/tpch/28119.sql new file mode 100644 index 00000000000..50868376e85 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/28119.sql @@ -0,0 +1,35 @@ +WITH Supplier_Count AS ( + SELECT p.p_partkey, COUNT(DISTINCT s.s_suppkey) AS supplier_count + FROM part p + JOIN partsupp ps ON p.p_partkey = ps.ps_partkey + JOIN supplier s ON ps.ps_suppkey = s.s_suppkey + GROUP BY p.p_partkey +), +Customer_Count AS ( + SELECT o.o_orderkey, COUNT(DISTINCT c.c_custkey) AS customer_count + FROM orders o + JOIN customer c ON o.o_custkey = c.c_custkey + GROUP BY o.o_orderkey +), +LineItem_Stats AS ( + SELECT l.l_orderkey, + SUM(l.l_extendedprice) AS total_revenue, + AVG(l.l_discount) AS avg_discount, + MAX(l.l_tax) AS max_tax + FROM lineitem l + GROUP BY l.l_orderkey +) +SELECT p.p_name, + sc.supplier_count, + cc.customer_count, + ls.total_revenue, + ls.avg_discount, + ls.max_tax +FROM Supplier_Count sc +JOIN Customer_Count cc ON sc.p_partkey = cc.o_orderkey +JOIN LineItem_Stats ls ON cc.o_orderkey = ls.l_orderkey +JOIN part p ON sc.p_partkey = p.p_partkey +WHERE sc.supplier_count > 5 +AND cc.customer_count > 10 +AND ls.total_revenue > 10000 +ORDER BY ls.total_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/28234.sql b/vortex-bench/sqlstorm/tpch/28234.sql new file mode 100644 index 00000000000..1ed4f1370f5 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/28234.sql @@ -0,0 +1,27 @@ +SELECT + p.p_name, + COUNT(DISTINCT ps.ps_suppkey) AS supplier_count, + SUM(CASE WHEN o.o_orderstatus = 'F' THEN l.l_extendedprice ELSE 0 END) AS total_filled_order_value, + STRING_AGG(DISTINCT s.s_name, ', ') AS supplier_names, + SUBSTRING(p.p_comment, 1, 10) AS short_comment, + CONCAT('Total Count: ', COUNT(DISTINCT ps.ps_suppkey), ' | Filled Value: ', + SUM(CASE WHEN o.o_orderstatus = 'F' THEN l.l_extendedprice ELSE 0 END) + ) AS report_summary +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + lineitem l ON ps.ps_partkey = l.l_partkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +WHERE + p.p_name LIKE 'widget%' +GROUP BY + p.p_name, p.p_comment +HAVING + COUNT(DISTINCT ps.ps_suppkey) > 0 +ORDER BY + total_filled_order_value DESC; diff --git a/vortex-bench/sqlstorm/tpch/28247.sql b/vortex-bench/sqlstorm/tpch/28247.sql new file mode 100644 index 00000000000..39f059b9745 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/28247.sql @@ -0,0 +1,33 @@ +SELECT + p.p_name, + s.s_name, + c.c_name, + n.n_name, + r.r_name, + COUNT(DISTINCT o.o_orderkey) AS total_orders, + SUM(l.l_quantity) AS total_quantity, + AVG(l.l_extendedprice) AS avg_price, + STRING_AGG(DISTINCT p.p_comment, '; ') AS aggregated_comments +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + p.p_type LIKE '%brass%' AND + l.l_shipdate BETWEEN '1997-01-01' AND '1997-12-31' +GROUP BY + p.p_name, s.s_name, c.c_name, n.n_name, r.r_name +ORDER BY + total_orders DESC, total_quantity DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/28419.sql b/vortex-bench/sqlstorm/tpch/28419.sql new file mode 100644 index 00000000000..676e96d3223 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/28419.sql @@ -0,0 +1,58 @@ + +WITH SupplierDetails AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_address, + n.n_name AS nation_name, + CONCAT(s.s_name, ' - ', s.s_address) AS supplier_info + FROM + supplier s + JOIN + nation n ON s.s_nationkey = n.n_nationkey +), +PartDetails AS ( + SELECT + p.p_partkey, + p.p_name, + p.p_mfgr, + p.p_brand, + p.p_retailprice, + p.p_comment, + LENGTH(p.p_comment) AS comment_length + FROM + part p +), +CombinedData AS ( + SELECT + s.s_suppkey AS suppkey, + s.supplier_info, + p.p_name, + p.p_brand, + p.comment_length, + p.p_retailprice + FROM + SupplierDetails s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + PartDetails p ON ps.ps_partkey = p.p_partkey +) +SELECT + cdc.supplier_info, + cdc.p_name, + cdc.p_brand, + cdc.p_retailprice, + COUNT(*) OVER (PARTITION BY cdc.supplier_info) AS supplier_part_count +FROM + CombinedData cdc +WHERE + cdc.comment_length > 20 +GROUP BY + cdc.supplier_info, + cdc.p_name, + cdc.p_brand, + cdc.p_retailprice, + cdc.comment_length +ORDER BY + cdc.p_retailprice DESC, cdc.supplier_info; diff --git a/vortex-bench/sqlstorm/tpch/29110.sql b/vortex-bench/sqlstorm/tpch/29110.sql new file mode 100644 index 00000000000..c4348f0587d --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29110.sql @@ -0,0 +1,24 @@ +SELECT + CONCAT(c.c_name, ' from ', s.s_name, ' in ', n.n_name, ' supplies ', p.p_name) AS supply_info, + LENGTH(CONCAT(c.c_name, ' from ', s.s_name, ' in ', n.n_name, ' supplies ', p.p_name)) AS info_length, + SUBSTRING(CONCAT(c.c_name, ' from ', s.s_name, ' in ', n.n_name, ' supplies ', p.p_name), 1, 50) AS short_supply_info +FROM + customer c +JOIN + orders o ON c.c_custkey = o.o_custkey +JOIN + lineitem l ON o.o_orderkey = l.l_orderkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +WHERE + LENGTH(s.s_comment) > 50 + AND o.o_orderstatus = 'O' +ORDER BY + info_length DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/29176.sql b/vortex-bench/sqlstorm/tpch/29176.sql new file mode 100644 index 00000000000..5d5ffe255f5 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29176.sql @@ -0,0 +1,29 @@ +SELECT + p.p_name, + COUNT(DISTINCT ps.ps_suppkey) AS supplier_count, + AVG(s.s_acctbal) AS avg_supplier_acctbal, + SUM(l.l_quantity) AS total_ordered_quantity, + STRING_AGG(DISTINCT s.s_comment, '; ') AS supplier_comments, + MAX(CASE + WHEN l.l_returnflag = 'R' THEN l.l_extendedprice * (1 - l.l_discount) + ELSE 0 + END) AS max_returned_value +FROM + part p +JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +WHERE + o.o_orderdate BETWEEN '1997-01-01' AND '1997-12-31' + AND p.p_type LIKE '%rubber%' +GROUP BY + p.p_name +ORDER BY + supplier_count DESC, + total_ordered_quantity DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/29299.sql b/vortex-bench/sqlstorm/tpch/29299.sql new file mode 100644 index 00000000000..56c56622f9b --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29299.sql @@ -0,0 +1,43 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_acctbal, + ROW_NUMBER() OVER (PARTITION BY p.p_partkey ORDER BY s.s_acctbal DESC) AS rn, + p.p_name, + p.p_container + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + part p ON ps.ps_partkey = p.p_partkey + WHERE + p.p_size BETWEEN 10 AND 20 + AND s.s_comment LIKE '%reliable%' +), FilteredOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + RANK() OVER (ORDER BY o.o_totalprice DESC) AS order_rank + FROM + orders o + WHERE + o.o_orderdate BETWEEN '1997-01-01' AND '1997-12-31' + AND o.o_orderstatus = 'F' +) +SELECT + fs.s_name, + fs.p_name, + fs.p_container, + fo.o_orderkey, + fo.o_orderdate, + fo.o_totalprice +FROM + RankedSuppliers fs +JOIN + FilteredOrders fo ON fs.rn = 1 +ORDER BY + fo.o_totalprice DESC, + fs.p_name ASC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/29393.sql b/vortex-bench/sqlstorm/tpch/29393.sql new file mode 100644 index 00000000000..eeac2d69490 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29393.sql @@ -0,0 +1,50 @@ + +WITH RankedParts AS ( + SELECT + p.p_partkey, + p.p_name, + p.p_mfgr, + p.p_brand, + p.p_type, + p.p_size, + p.p_container, + p.p_retailprice, + p.p_comment, + ROW_NUMBER() OVER (PARTITION BY p.p_brand ORDER BY p.p_retailprice DESC) AS rank + FROM + part p + WHERE + LENGTH(p.p_name) > 10 + AND p.p_retailprice > 50.00 +), +FilteredSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_address, + s.s_nationkey, + s.s_phone, + s.s_acctbal, + s.s_comment + FROM + supplier s + WHERE + s.s_acctbal > (SELECT AVG(s2.s_acctbal) FROM supplier s2) +) +SELECT + rp.p_name, + rp.p_brand, + rp.p_retailprice, + fs.s_name, + fs.s_address, + CONCAT(fs.s_name, ' - ', fs.s_address) AS supplier_info +FROM + RankedParts rp +JOIN + partsupp ps ON rp.p_partkey = ps.ps_partkey +JOIN + FilteredSuppliers fs ON ps.ps_suppkey = fs.s_suppkey +WHERE + rp.rank <= 5 +ORDER BY + rp.p_retailprice DESC, fs.s_name; diff --git a/vortex-bench/sqlstorm/tpch/29497.sql b/vortex-bench/sqlstorm/tpch/29497.sql new file mode 100644 index 00000000000..8ee9890ad7c --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29497.sql @@ -0,0 +1,43 @@ +WITH Supplier_Products AS ( + SELECT + s.s_name AS supplier_name, + p.p_name AS product_name, + p.p_brand AS product_brand, + p.p_container AS product_container, + ps.ps_availqty AS available_quantity, + ps.ps_supplycost AS supply_cost, + p.p_comment AS product_comment + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + part p ON ps.ps_partkey = p.p_partkey +), +Aggregated_Supplier_Products AS ( + SELECT + supplier_name, + COUNT(*) AS total_products, + SUM(available_quantity) AS total_available_quantity, + AVG(supply_cost) AS average_supply_cost, + STRING_AGG(DISTINCT product_brand, ', ') AS brands_offered, + STRING_AGG(DISTINCT product_container, ', ') AS container_types + FROM + Supplier_Products + GROUP BY + supplier_name +) +SELECT + supplier_name, + total_products, + total_available_quantity, + average_supply_cost, + brands_offered, + container_types, + CONCAT('Supplier: ', supplier_name, ' offers ', total_products, ' products, with an average supply cost of $', ROUND(average_supply_cost, 2), '.') AS supplier_summary +FROM + Aggregated_Supplier_Products +WHERE + total_available_quantity > 0 +ORDER BY + total_products DESC; diff --git a/vortex-bench/sqlstorm/tpch/29558.sql b/vortex-bench/sqlstorm/tpch/29558.sql new file mode 100644 index 00000000000..9df2abfc8dc --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29558.sql @@ -0,0 +1,46 @@ +WITH StringMetrics AS ( + SELECT + p.p_partkey, + p.p_name, + LENGTH(p.p_name) AS name_length, + SUBSTRING(p.p_name FROM 1 FOR 3) AS name_prefix, + REPLACE(p.p_comment, ' ', '') AS compact_comment, + COUNT(DISTINCT s.s_name) AS supplier_count, + SUM(ps.ps_availqty) AS total_available + FROM + part p + LEFT JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey + LEFT JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey + GROUP BY + p.p_partkey, p.p_name, p.p_comment +), +OrderedMetrics AS ( + SELECT + sm.p_partkey, + sm.p_name, + sm.name_length, + sm.name_prefix, + sm.compact_comment, + sm.supplier_count, + sm.total_available, + ROW_NUMBER() OVER (ORDER BY sm.supplier_count DESC, sm.total_available DESC) AS rank + FROM + StringMetrics sm +) +SELECT + om.p_partkey, + om.p_name, + om.name_length, + om.name_prefix, + om.compact_comment, + om.supplier_count, + om.total_available, + CONCAT('Rank: ', om.rank) AS rank_string +FROM + OrderedMetrics om +WHERE + om.supplier_count > 0 +ORDER BY + om.rank; diff --git a/vortex-bench/sqlstorm/tpch/29701.sql b/vortex-bench/sqlstorm/tpch/29701.sql new file mode 100644 index 00000000000..01e154a7752 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29701.sql @@ -0,0 +1,46 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + SUM(COALESCE(ps.ps_availqty, 0)) AS total_available_qty, + COUNT(DISTINCT p.p_partkey) AS part_count, + ROW_NUMBER() OVER (PARTITION BY r.r_regionkey ORDER BY SUM(COALESCE(ps.ps_supplycost, 0)) DESC) AS rn + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + part p ON ps.ps_partkey = p.p_partkey + JOIN + nation n ON s.s_nationkey = n.n_nationkey + JOIN + region r ON n.n_regionkey = r.r_regionkey + WHERE + p.p_comment LIKE '%special%' + GROUP BY + s.s_suppkey, s.s_name, r.r_regionkey +), +TopSuppliers AS ( + SELECT + rs.s_suppkey, + rs.s_name, + rs.total_available_qty, + rs.part_count + FROM + RankedSuppliers rs + WHERE + rs.rn <= 3 +) +SELECT + ts.s_name, + ts.total_available_qty, + ts.part_count, + (SELECT COUNT(DISTINCT c.c_custkey) + FROM customer c + JOIN orders o ON c.c_custkey = o.o_custkey + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE l.l_suppkey = ts.s_suppkey) AS total_customers +FROM + TopSuppliers ts +ORDER BY + ts.total_available_qty DESC; diff --git a/vortex-bench/sqlstorm/tpch/29740.sql b/vortex-bench/sqlstorm/tpch/29740.sql new file mode 100644 index 00000000000..8ef031f8c4f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29740.sql @@ -0,0 +1,25 @@ + +WITH FilteredParts AS ( + SELECT p.p_partkey, p.p_name, p.p_retailprice, p.p_comment + FROM part p + WHERE p.p_retailprice > 50.00 + AND LENGTH(p.p_comment) > 10 +), SupplierDetails AS ( + SELECT s.s_suppkey, s.s_name, s.s_acctbal, s.s_comment + FROM supplier s + WHERE s.s_acctbal < 1000.00 +), CombinedData AS ( + SELECT fp.p_partkey, fp.p_name, fp.p_retailprice, sd.s_name AS supplier_name, sd.s_acctbal + FROM FilteredParts fp + JOIN partsupp ps ON fp.p_partkey = ps.ps_partkey + JOIN SupplierDetails sd ON ps.ps_suppkey = sd.s_suppkey +) +SELECT COUNT(*) AS total_parts, + MIN(p_retailprice) AS min_retail_price, + MAX(p_retailprice) AS max_retail_price, + AVG(p_retailprice) AS avg_retail_price, + STRING_AGG(DISTINCT supplier_name, ', ') AS supplier_names +FROM CombinedData +WHERE p_retailprice BETWEEN 50.00 AND 200.00 +GROUP BY p_partkey, p_name, p_retailprice +ORDER BY total_parts DESC; diff --git a/vortex-bench/sqlstorm/tpch/29827.sql b/vortex-bench/sqlstorm/tpch/29827.sql new file mode 100644 index 00000000000..1e5e26acf1b --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29827.sql @@ -0,0 +1,38 @@ +WITH RECURSIVE string_benchmark AS ( + SELECT + p.p_name AS part_name, + SUBSTRING(p.p_name FROM 1 FOR 5) AS substring_5, + LENGTH(p.p_name) AS name_length, + REGEXP_REPLACE(p.p_comment, '[^a-zA-Z0-9]', '') AS sanitized_comment, + 'Part: ' || p.p_name || + ' | Size: ' || p.p_size || + ' | Price: ' || CAST(p.p_retailprice AS varchar) AS formatted_info + FROM + part p + WHERE + p.p_size > 10 + + UNION ALL + + SELECT + s.s_name AS supplier_name, + SUBSTRING(s.s_name FROM 1 FOR 5) AS substring_5, + LENGTH(s.s_name) AS name_length, + REGEXP_REPLACE(s.s_comment, '[^a-zA-Z0-9]', '') AS sanitized_comment, + 'Supplier: ' || s.s_name || + ' | Address: ' || s.s_address || + ' | Phone: ' || s.s_phone AS formatted_info + FROM + supplier s + JOIN + nation n ON n.n_nationkey = s.s_nationkey + WHERE + n.n_name LIKE 'S%' +) + +SELECT + COUNT(*) AS total_records, + AVG(name_length) AS avg_length, + STRING_AGG(formatted_info, ' | ') AS aggregated_info +FROM + string_benchmark; diff --git a/vortex-bench/sqlstorm/tpch/29848.sql b/vortex-bench/sqlstorm/tpch/29848.sql new file mode 100644 index 00000000000..24104f394b6 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29848.sql @@ -0,0 +1,28 @@ + +SELECT + CONCAT('Supplier: ', s.s_name, ' | Nation: ', n.n_name, + ' | Region: ', r.r_name, ' | Total Order Value: ', + SUM(l.l_extendedprice * (1 - l.l_discount))) AS Total_Value, + s.s_name, n.n_name, r.r_name +FROM + supplier s +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +WHERE + o.o_orderdate BETWEEN '1997-01-01' AND '1997-12-31' +GROUP BY + s.s_name, n.n_name, r.r_name +HAVING + SUM(l.l_extendedprice * (1 - l.l_discount)) > 10000 +ORDER BY + Total_Value DESC; diff --git a/vortex-bench/sqlstorm/tpch/29930.sql b/vortex-bench/sqlstorm/tpch/29930.sql new file mode 100644 index 00000000000..c2b63e39514 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/29930.sql @@ -0,0 +1,18 @@ +WITH RankedSuppliers AS ( + SELECT s.s_suppkey, s.s_name, SUM(ps.ps_supplycost * ps.ps_availqty) AS total_cost, + ROW_NUMBER() OVER (PARTITION BY p.p_type ORDER BY SUM(ps.ps_supplycost * ps.ps_availqty) DESC) AS rank + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN part p ON ps.ps_partkey = p.p_partkey + GROUP BY s.s_suppkey, s.s_name, p.p_type +), FilteredSuppliers AS ( + SELECT r.r_name AS region, ns.n_name AS nation, rs.s_name, rs.total_cost + FROM RankedSuppliers rs + JOIN supplier s ON rs.s_suppkey = s.s_suppkey + JOIN nation ns ON s.s_nationkey = ns.n_nationkey + JOIN region r ON ns.n_regionkey = r.r_regionkey + WHERE rs.rank <= 3 +) +SELECT fs.region, fs.nation, fs.s_name, fs.total_cost +FROM FilteredSuppliers fs +ORDER BY fs.region, fs.nation, fs.total_cost DESC; diff --git a/vortex-bench/sqlstorm/tpch/30069.sql b/vortex-bench/sqlstorm/tpch/30069.sql new file mode 100644 index 00000000000..c61ce07908c --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/30069.sql @@ -0,0 +1,43 @@ +WITH RECURSIVE nation_hierarchy AS ( + SELECT n_nationkey, n_name, n_regionkey, 0 AS level + FROM nation + WHERE n_regionkey IS NOT NULL + UNION ALL + SELECT n.n_nationkey, n.n_name, n.n_regionkey, nh.level + 1 + FROM nation n + JOIN nation_hierarchy nh ON n.n_nationkey = nh.n_regionkey +), +supplier_costs AS ( + SELECT s.s_suppkey, SUM(ps.ps_supplycost * ps.ps_availqty) AS total_cost + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY s.s_suppkey +), +order_summary AS ( + SELECT o.o_orderkey, o.o_custkey, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_lineitem_price + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE l.l_shipdate >= '1997-01-01' AND l.l_shipdate < '1998-01-01' + GROUP BY o.o_orderkey, o.o_custkey +), +customer_rank AS ( + SELECT c.c_custkey, CUME_DIST() OVER (ORDER BY SUM(os.total_lineitem_price) DESC) AS customer_rank + FROM customer c + JOIN order_summary os ON c.c_custkey = os.o_custkey + GROUP BY c.c_custkey +) +SELECT r.r_name AS region_name, + n.n_name AS nation_name, + s.s_name AS supplier_name, + ROUND(COALESCE(s_cost.total_cost, 0), 2) AS supplier_total_cost, + ROUND(SUM(os.total_lineitem_price), 2) AS total_order_value, + cr.customer_rank +FROM region r +LEFT JOIN nation n ON r.r_regionkey = n.n_regionkey +LEFT JOIN supplier s ON n.n_nationkey = s.s_nationkey +LEFT JOIN supplier_costs s_cost ON s.s_suppkey = s_cost.s_suppkey +LEFT JOIN order_summary os ON s.s_suppkey = os.o_custkey +LEFT JOIN customer_rank cr ON os.o_custkey = cr.c_custkey +GROUP BY r.r_name, n.n_name, s.s_name, s_cost.total_cost, cr.customer_rank +HAVING SUM(os.total_lineitem_price) IS NOT NULL +ORDER BY region_name, nation_name, supplier_total_cost DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/3020.sql b/vortex-bench/sqlstorm/tpch/3020.sql new file mode 100644 index 00000000000..e2c9a77a384 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/3020.sql @@ -0,0 +1,49 @@ +WITH OrderDetails AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales, + COUNT(DISTINCT l.l_linenumber) AS line_item_count + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + WHERE + o.o_orderdate >= DATE '1996-01-01' AND o.o_orderdate < DATE '1997-01-01' + GROUP BY + o.o_orderkey, o.o_orderdate +), +RankedOrders AS ( + SELECT + od.o_orderkey, + od.o_orderdate, + od.total_sales, + od.line_item_count, + RANK() OVER (ORDER BY od.total_sales DESC) AS sales_rank + FROM + OrderDetails od +) +SELECT + coalesce(r.o_orderkey, o.o_orderkey) as order_key, + o.o_orderdate, + r.total_sales, + r.line_item_count, + CASE + WHEN r.sales_rank IS NULL THEN 'No Sales' + ELSE CONCAT('Rank ', r.sales_rank) + END AS sales_rank +FROM + orders o +LEFT JOIN + RankedOrders r ON o.o_orderkey = r.o_orderkey +WHERE + o.o_orderstatus IN ('O', 'P') + AND (r.total_sales IS NULL OR r.total_sales > 1000) + AND EXISTS ( + SELECT 1 + FROM lineitem l + WHERE l.l_orderkey = o.o_orderkey + AND l.l_returnflag = 'R' + ) +ORDER BY + o.o_orderdate DESC, total_sales DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/30388.sql b/vortex-bench/sqlstorm/tpch/30388.sql new file mode 100644 index 00000000000..004df9df9ab --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/30388.sql @@ -0,0 +1,51 @@ +WITH RECURSIVE SupplierHierarchy AS ( + SELECT s.s_suppkey, s.s_name, s.s_nationkey, 0 AS level + FROM supplier s + WHERE s.s_acctbal > (SELECT AVG(s_acctbal) FROM supplier) + UNION ALL + SELECT s.s_suppkey, s.s_name, s.s_nationkey, sh.level + 1 + FROM supplier s + JOIN SupplierHierarchy sh ON s.s_nationkey = sh.s_nationkey + WHERE sh.level < 3 +), +CustomerOrders AS ( + SELECT c.c_custkey, c.c_name, COUNT(o.o_orderkey) as total_orders + FROM customer c + LEFT JOIN orders o ON c.c_custkey = o.o_custkey + WHERE c.c_acctbal IS NOT NULL + GROUP BY c.c_custkey, c.c_name +), +PartSupplier AS ( + SELECT p.p_partkey, p.p_name, SUM(ps.ps_supplycost) AS total_supplycost + FROM part p + JOIN partsupp ps ON p.p_partkey = ps.ps_partkey + GROUP BY p.p_partkey, p.p_name +), +TopNSuppliers AS ( + SELECT s.s_suppkey, s.s_name, SUM(ps.ps_availqty) AS total_availqty + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY s.s_suppkey, s.s_name + ORDER BY total_availqty DESC + LIMIT 5 +) +SELECT DISTINCT + so.s_name AS supplier_name, + co.c_name AS customer_name, + po.p_name AS part_name, + li.l_quantity, + li.l_extendedprice, + LI.l_discount, + RANK() OVER (PARTITION BY co.c_custkey ORDER BY li.l_extendedprice DESC) AS price_rank, + CASE + WHEN li.l_returnflag = 'R' THEN 'Returned' + ELSE 'Not Returned' + END AS return_status +FROM lineitem li +JOIN orders o ON li.l_orderkey = o.o_orderkey +JOIN CustomerOrders co ON o.o_custkey = co.c_custkey +JOIN TopNSuppliers so ON li.l_suppkey = so.s_suppkey +JOIN PartSupplier po ON li.l_partkey = po.p_partkey +WHERE li.l_shipdate >= DATE '1997-01-01' +AND (li.l_discount > 0.1 OR co.total_orders > 5) +ORDER BY return_status, price_rank; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/304.sql b/vortex-bench/sqlstorm/tpch/304.sql new file mode 100644 index 00000000000..2648aceb644 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/304.sql @@ -0,0 +1,46 @@ + +WITH SupplierDetails AS ( + SELECT s.s_suppkey, + s.s_name, + s.s_acctbal, + n.n_name AS nation_name, + ROW_NUMBER() OVER (PARTITION BY n.n_name ORDER BY s.s_acctbal DESC) AS rank + FROM supplier s + JOIN nation n ON s.s_nationkey = n.n_nationkey + WHERE s.s_acctbal > 0 +), +OrderSummary AS ( + SELECT o.o_orderkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales, + COUNT(DISTINCT l.l_partkey) AS parts_count, + o.o_orderdate + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE o.o_orderdate >= DATE '1996-01-01' + GROUP BY o.o_orderkey, o.o_orderdate +), +HighValueOrders AS ( + SELECT o.o_orderkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales, + o.o_orderdate + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE o.o_orderstatus = 'F' + AND o.o_totalprice > 1000 + GROUP BY o.o_orderkey, o.o_orderdate +) +SELECT sd.s_name, + d.r_name, + COALESCE(os.total_sales, 0) AS order_sales, + COALESCE(hvo.total_sales, 0) AS high_value_sales, + sd.rank, + CASE + WHEN os.total_sales IS NOT NULL THEN 'Order Exists' + ELSE 'No Order' + END AS order_status +FROM SupplierDetails sd +LEFT JOIN region d ON sd.nation_name = d.r_name +LEFT JOIN OrderSummary os ON sd.s_suppkey = os.o_orderkey +LEFT JOIN HighValueOrders hvo ON os.o_orderkey = hvo.o_orderkey +WHERE sd.rank <= 5 +ORDER BY d.r_name, sd.rank; diff --git a/vortex-bench/sqlstorm/tpch/30427.sql b/vortex-bench/sqlstorm/tpch/30427.sql new file mode 100644 index 00000000000..b31dab4ac6c --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/30427.sql @@ -0,0 +1,34 @@ +WITH RECURSIVE CTE_Suppliers AS ( + SELECT s_suppkey, s_name, n_name, s_acctbal, + ROW_NUMBER() OVER (PARTITION BY n_name ORDER BY s_acctbal DESC) AS rank + FROM supplier + JOIN nation ON supplier.s_nationkey = nation.n_nationkey +), +CTE_OrderSummary AS ( + SELECT o_custkey, SUM(o_totalprice) AS total_spent + FROM orders + GROUP BY o_custkey +), +CTE_PartPricing AS ( + SELECT ps_partkey, AVG(ps_supplycost) AS avg_supplycost + FROM partsupp + GROUP BY ps_partkey +) +SELECT p.p_name, + COALESCE(s.s_name, 'No Supplier') AS supplier_name, + ns.total_spent, + pp.avg_supplycost, + p.p_retailprice - COALESCE(pp.avg_supplycost, 0) AS profit_margin, + CASE + WHEN pp.avg_supplycost IS NULL THEN 'No Cost Data' + WHEN (p.p_retailprice - COALESCE(pp.avg_supplycost, 0)) < 0 THEN 'Loss' + ELSE 'Profit' + END AS profitability_status +FROM part p +LEFT JOIN CTE_PartPricing pp ON p.p_partkey = pp.ps_partkey +LEFT JOIN CTE_Suppliers s ON s.rank = 1 AND p.p_partkey = s.s_suppkey +LEFT JOIN CTE_OrderSummary ns ON ns.o_custkey = p.p_partkey +WHERE (p.p_size >= 10 OR p.p_comment IS NULL) + AND (pp.avg_supplycost < p.p_retailprice OR pp.avg_supplycost IS NULL) +ORDER BY profit_margin DESC +LIMIT 100; diff --git a/vortex-bench/sqlstorm/tpch/30459.sql b/vortex-bench/sqlstorm/tpch/30459.sql new file mode 100644 index 00000000000..2fec97d7dd6 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/30459.sql @@ -0,0 +1,38 @@ +WITH RECURSIVE region_supplier AS ( + SELECT s.s_suppkey, s.s_name, s.s_nationkey, s.s_acctbal, s.s_comment + FROM supplier s + INNER JOIN nation n ON s.s_nationkey = n.n_nationkey + WHERE n.n_name = 'Canada' + + UNION ALL + + SELECT s.s_suppkey, s.s_name, s.s_nationkey, s.s_acctbal, s.s_comment + FROM supplier s + INNER JOIN region_supplier rs ON rs.s_nationkey = s.s_nationkey + WHERE rs.s_acctbal > 5000 +), +order_summary AS ( + SELECT o.o_orderkey, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE l.l_shipdate >= DATE '1997-01-01' AND l.l_shipdate <= DATE '1997-12-31' + GROUP BY o.o_orderkey +), +market_analysis AS ( + SELECT c.c_mktsegment, COUNT(DISTINCT o.o_orderkey) AS order_count, + SUM(os.total_revenue) AS segment_revenue + FROM customer c + LEFT JOIN orders o ON c.c_custkey = o.o_custkey + LEFT JOIN order_summary os ON o.o_orderkey = os.o_orderkey + GROUP BY c.c_mktsegment +) +SELECT r.r_name, COALESCE(SUM(ma.segment_revenue), 0) AS total_segment_revenue, + COUNT(DISTINCT rs.s_suppkey) AS supplier_count, + AVG(rs.s_acctbal) AS average_acctbal +FROM region r +LEFT JOIN nation n ON r.r_regionkey = n.n_regionkey +LEFT JOIN region_supplier rs ON n.n_nationkey = rs.s_nationkey +LEFT JOIN market_analysis ma ON ma.c_mktsegment = 'BUILDING' +GROUP BY r.r_name +HAVING AVG(rs.s_acctbal) > (SELECT AVG(s.s_acctbal) FROM supplier s WHERE s.s_acctbal IS NOT NULL) +ORDER BY total_segment_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/30974.sql b/vortex-bench/sqlstorm/tpch/30974.sql new file mode 100644 index 00000000000..2f0f557a218 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/30974.sql @@ -0,0 +1,37 @@ +WITH RECURSIVE SupplierHierarchy AS ( + SELECT s.s_suppkey, s.s_name, s.s_acctbal, 0 AS level + FROM supplier s + WHERE s.s_acctbal > 100000 + UNION ALL + SELECT s.s_suppkey, s.s_name, s.s_acctbal, sh.level + 1 + FROM supplier s + INNER JOIN SupplierHierarchy sh ON s.s_suppkey = sh.s_suppkey + WHERE s.s_acctbal > 150000 +), +OrderStats AS ( + SELECT o.o_custkey, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales, + COUNT(DISTINCT o.o_orderkey) AS order_count + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE o.o_orderstatus = 'O' + GROUP BY o.o_custkey +), +CustomerSales AS ( + SELECT c.c_custkey, c.c_name, cs.total_sales, cs.order_count, + RANK() OVER (ORDER BY cs.total_sales DESC) AS sales_rank + FROM customer c + LEFT JOIN OrderStats cs ON c.c_custkey = cs.o_custkey + WHERE c.c_acctbal IS NOT NULL +) +SELECT n.n_name, + COALESCE(SUM(CASE WHEN cs.sales_rank <= 10 THEN cs.total_sales ELSE 0 END), 0) AS top_sales, + COUNT(DISTINCT cs.c_custkey) AS customer_count +FROM nation n +LEFT JOIN customer c ON n.n_nationkey = c.c_nationkey +LEFT JOIN CustomerSales cs ON c.c_custkey = cs.c_custkey +WHERE n.n_name IS NOT NULL +GROUP BY n.n_name +HAVING COUNT(DISTINCT cs.c_custkey) > 5 +ORDER BY top_sales DESC +LIMIT 5; + diff --git a/vortex-bench/sqlstorm/tpch/31305.sql b/vortex-bench/sqlstorm/tpch/31305.sql new file mode 100644 index 00000000000..02ccb900c38 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/31305.sql @@ -0,0 +1,51 @@ + +WITH RECURSIVE SupplierHierarchy AS ( + SELECT s.s_suppkey, s.s_name, s.s_acctbal, + CAST(s.s_name AS VARCHAR) AS path, + 1 AS level + FROM supplier s + WHERE s.s_acctbal > 10000 + + UNION ALL + + SELECT s.s_suppkey, s.s_name, s.s_acctbal, + CONCAT(sh.path, ' > ', s.s_name), + sh.level + 1 + FROM supplier s + JOIN SupplierHierarchy sh ON s.s_nationkey = sh.s_suppkey + WHERE sh.level < 3 +), +RankedOrders AS ( + SELECT o.o_orderkey, o.o_orderdate, o.o_totalprice, + ROW_NUMBER() OVER (PARTITION BY o.o_orderstatus ORDER BY o.o_totalprice DESC) AS rank + FROM orders o + WHERE o.o_totalprice > (SELECT AVG(o_totalprice) FROM orders) +), +SupplierStats AS ( + SELECT ps.ps_partkey, SUM(ps.ps_supplycost) AS total_supplycost, + COUNT(DISTINCT ps.ps_suppkey) AS unique_suppliers + FROM partsupp ps + GROUP BY ps.ps_partkey +), +CustomerOrders AS ( + SELECT c.c_custkey, c.c_name, COUNT(o.o_orderkey) AS order_count, + SUM(o.o_totalprice) AS total_spent + FROM customer c + LEFT JOIN orders o ON c.c_custkey = o.o_custkey + GROUP BY c.c_custkey, c.c_name +) +SELECT n.n_name, r.r_name, + SUM(COALESCE(l.l_extendedprice * (1 - l.l_discount), 0)) AS total_revenue, + AVG(cs.total_spent) AS avg_customer_spent, + MAX(ss.unique_suppliers) AS max_unique_suppliers +FROM lineitem l +JOIN orders o ON l.l_orderkey = o.o_orderkey +JOIN customer c ON o.o_custkey = c.c_custkey +JOIN nation n ON c.c_nationkey = n.n_nationkey +JOIN region r ON n.n_regionkey = r.r_regionkey +JOIN SupplierStats ss ON ss.ps_partkey = l.l_partkey +JOIN CustomerOrders cs ON cs.c_custkey = c.c_custkey +GROUP BY n.n_name, r.r_name +HAVING COUNT(DISTINCT o.o_orderkey) > 5 +ORDER BY total_revenue DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/32138.sql b/vortex-bench/sqlstorm/tpch/32138.sql new file mode 100644 index 00000000000..af4de8c7ff0 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/32138.sql @@ -0,0 +1,54 @@ +WITH RECURSIVE MonthlyOrders AS ( + SELECT + o_custkey, + DATE_TRUNC('month', o_orderdate) AS order_month, + COUNT(o_orderkey) AS order_count, + SUM(o_totalprice) AS total_revenue + FROM + orders + WHERE + o_orderdate >= DATE '1997-01-01' AND o_orderdate < DATE '1997-12-31' + GROUP BY + o_custkey, DATE_TRUNC('month', o_orderdate) +), +NationSummary AS ( + SELECT + n.n_nationkey, + n.n_name, + COUNT(DISTINCT s.s_suppkey) AS total_suppliers, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supply_value + FROM + nation n + LEFT JOIN + supplier s ON n.n_nationkey = s.s_nationkey + LEFT JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY + n.n_nationkey, n.n_name +), +HighValueCustomers AS ( + SELECT + c.c_custkey, + c.c_name, + SUM(mo.total_revenue) AS high_value_revenue + FROM + customer c + JOIN + MonthlyOrders mo ON c.c_custkey = mo.o_custkey + WHERE + mo.order_count > 5 + GROUP BY + c.c_custkey, c.c_name +) +SELECT + n.n_name, + COALESCE(hvc.c_name, 'No High Value Customers') AS high_value_customer_name, + n.total_suppliers, + n.total_supply_value, + ROW_NUMBER() OVER (PARTITION BY n.n_nationkey ORDER BY n.total_supply_value DESC) AS rank +FROM + NationSummary n +LEFT JOIN + HighValueCustomers hvc ON hvc.high_value_revenue > 5000 +ORDER BY + n.n_name, rank; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/3232.sql b/vortex-bench/sqlstorm/tpch/3232.sql new file mode 100644 index 00000000000..ba4f576ba37 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/3232.sql @@ -0,0 +1,53 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + RANK() OVER (PARTITION BY o.o_orderdate ORDER BY o.o_totalprice DESC) AS OrderRank + FROM + orders o + WHERE + o.o_orderstatus = 'O' AND + o.o_orderdate >= DATE '1996-01-01' +), +SupplierStats AS ( + SELECT + s.s_suppkey, + SUM(ps.ps_availqty) AS TotalAvailableQty, + AVG(ps.ps_supplycost) AS AvgSupplyCost + FROM + supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY + s.s_suppkey +), +TopSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + ss.TotalAvailableQty, + ss.AvgSupplyCost + FROM + supplier s + JOIN SupplierStats ss ON s.s_suppkey = ss.s_suppkey + WHERE + ss.TotalAvailableQty > 1000 +) +SELECT + ro.o_orderkey, + ro.o_orderdate, + ro.o_totalprice, + ts.s_name AS SupplierName, + ts.TotalAvailableQty, + ts.AvgSupplyCost +FROM + RankedOrders ro +LEFT JOIN + lineitem l ON ro.o_orderkey = l.l_orderkey +LEFT JOIN + TopSuppliers ts ON l.l_suppkey = ts.s_suppkey +WHERE + (ro.OrderRank <= 5 OR ts.AvgSupplyCost IS NULL) +ORDER BY + ro.o_orderdate DESC, + ro.o_totalprice DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/34562.sql b/vortex-bench/sqlstorm/tpch/34562.sql new file mode 100644 index 00000000000..d2cd395d332 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/34562.sql @@ -0,0 +1,53 @@ +WITH RECURSIVE SupplierHierarchy AS ( + SELECT s_suppkey, s_name, s_acctbal, 0 AS level + FROM supplier + WHERE s_acctbal > 1000 + UNION ALL + SELECT s.s_suppkey, s.s_name, s.s_acctbal, sh.level + 1 + FROM supplier s + JOIN SupplierHierarchy sh ON s.s_suppkey = sh.s_suppkey +), +TotalSales AS ( + SELECT c.c_custkey, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales + FROM customer c + JOIN orders o ON c.c_custkey = o.o_custkey + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE o.o_orderstatus = 'O' AND l.l_shipdate >= '1997-01-01' + GROUP BY c.c_custkey +), +HighValueCustomers AS ( + SELECT c.c_custkey, c.c_name, ts.total_sales + FROM customer c + JOIN TotalSales ts ON c.c_custkey = ts.c_custkey + WHERE ts.total_sales > 5000 +), +SupplierSales AS ( + SELECT s.s_suppkey, SUM(l.l_extendedprice * (1 - l.l_discount)) AS supplier_sales + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN lineitem l ON ps.ps_partkey = l.l_partkey + GROUP BY s.s_suppkey +), +FinalReport AS ( + SELECT + hvc.c_custkey, + hvc.c_name, + hvc.total_sales, + ss.supplier_sales, + CASE + WHEN ss.supplier_sales IS NULL THEN 'No sales' + ELSE 'Sales exist' + END AS sales_status + FROM HighValueCustomers hvc + LEFT JOIN SupplierSales ss ON hvc.c_custkey = ss.supplier_sales +) + +SELECT + fr.c_custkey, + fr.c_name, + fr.total_sales, + COALESCE(fr.supplier_sales, 0) AS supplier_sales, + fr.sales_status, + ROW_NUMBER() OVER (ORDER BY fr.total_sales DESC) AS customer_rank +FROM FinalReport fr +ORDER BY fr.total_sales DESC, fr.c_name ASC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/3797.sql b/vortex-bench/sqlstorm/tpch/3797.sql new file mode 100644 index 00000000000..d4ca3238aa4 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/3797.sql @@ -0,0 +1,56 @@ +WITH RegionalSales AS ( + SELECT + n.n_name AS nation, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales, + ROW_NUMBER() OVER (PARTITION BY n.n_nationkey ORDER BY SUM(l.l_extendedprice * (1 - l.l_discount)) DESC) AS sales_rank + FROM + lineitem l + JOIN + orders o ON l.l_orderkey = o.o_orderkey + JOIN + customer c ON o.o_custkey = c.c_custkey + JOIN + nation n ON c.c_nationkey = n.n_nationkey + WHERE + o.o_orderdate >= DATE '1996-01-01' AND o.o_orderdate < DATE '1997-01-01' + GROUP BY + n.n_nationkey, n.n_name +), +TopNations AS ( + SELECT + nation, + total_sales + FROM + RegionalSales + WHERE + sales_rank <= 5 +), +SupplierCosts AS ( + SELECT + s.s_name AS supplier_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_cost + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + part p ON ps.ps_partkey = p.p_partkey + WHERE + p.p_size > 20 + GROUP BY + s.s_name +) +SELECT + t.nation, + t.total_sales, + COALESCE(s.total_cost, 0) AS supplier_total_cost, + CASE + WHEN t.total_sales > 1.0 * COALESCE(s.total_cost, 0) THEN 'Profitable' + ELSE 'Not Profitable' + END AS profitability_status +FROM + TopNations t +LEFT JOIN + SupplierCosts s ON t.nation = s.supplier_name +ORDER BY + t.total_sales DESC, supplier_total_cost ASC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/3946.sql b/vortex-bench/sqlstorm/tpch/3946.sql new file mode 100644 index 00000000000..8f8cf76351c --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/3946.sql @@ -0,0 +1,65 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + c.c_name, + ROW_NUMBER() OVER (PARTITION BY c.c_nationkey ORDER BY o.o_totalprice DESC) AS order_rank + FROM + orders o + JOIN + customer c ON o.o_custkey = c.c_custkey + WHERE + o.o_orderdate >= DATE '1997-01-01' AND o.o_orderdate < DATE '1998-01-01' +), +TopCustomerOrders AS ( + SELECT + r.r_name AS region, + n.n_name AS nation, + ro.c_name AS customer_name, + ro.o_orderkey, + ro.o_orderdate, + ro.o_totalprice + FROM + RankedOrders ro + JOIN + customer c ON ro.c_name = c.c_name + JOIN + nation n ON c.c_nationkey = n.n_nationkey + JOIN + region r ON n.n_regionkey = r.r_regionkey + WHERE + ro.order_rank <= 3 +), +AvgOrderPrice AS ( + SELECT + nu.n_name, + AVG(tco.o_totalprice) AS average_price + FROM + TopCustomerOrders tco + JOIN + nation nu ON tco.nation = nu.n_name + GROUP BY + nu.n_name +) +SELECT + tc.region, + tc.nation, + tc.customer_name, + tc.o_orderkey, + tc.o_orderdate, + tc.o_totalprice, + aop.average_price, + CASE + WHEN tc.o_totalprice > aop.average_price THEN 'Above Average' + WHEN tc.o_totalprice < aop.average_price THEN 'Below Average' + ELSE 'Average' + END AS price_comparison +FROM + TopCustomerOrders tc +LEFT JOIN + AvgOrderPrice aop ON tc.nation = aop.n_name +ORDER BY + tc.region, + tc.nation, + tc.o_totalprice DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/4034.sql b/vortex-bench/sqlstorm/tpch/4034.sql new file mode 100644 index 00000000000..1d0af13d0c3 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/4034.sql @@ -0,0 +1,70 @@ + +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + o.o_custkey, + ROW_NUMBER() OVER (PARTITION BY o.o_orderstatus ORDER BY o.o_totalprice DESC) AS rn + FROM + orders o + WHERE + o.o_orderdate >= DATE '1997-01-01' + AND o.o_orderdate < DATE '1998-01-01' +), +CustomerOrders AS ( + SELECT + c.c_custkey, + c.c_name, + SUM(o.o_totalprice) AS total_spent + FROM + customer c + JOIN + orders o ON c.c_custkey = o.o_custkey + WHERE + c.c_acctbal > 1000 + GROUP BY + c.c_custkey, c.c_name +), +SupplierParts AS ( + SELECT + p.p_partkey, + p.p_name, + s.s_suppkey, + s.s_name, + ps.ps_availqty, + ROW_NUMBER() OVER (PARTITION BY p.p_partkey ORDER BY ps.ps_supplycost DESC) AS rn + FROM + part p + JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey + JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +) +SELECT + co.c_name, + rp.o_orderkey, + rp.o_orderdate, + COALESCE(SUM(l.l_extendedprice * (1 - l.l_discount)), 0) AS total_lineitem_price, + sp.p_name, + rp.rn AS rank_order_status, + CASE + WHEN SUM(l.l_discount) > 0 THEN 'Discount Applied' + ELSE 'No Discount' + END AS discount_status +FROM + RankedOrders rp +LEFT JOIN + lineitem l ON rp.o_orderkey = l.l_orderkey +JOIN + CustomerOrders co ON rp.o_custkey = co.c_custkey +LEFT JOIN + SupplierParts sp ON l.l_partkey = sp.p_partkey AND sp.rn = 1 +WHERE + rp.rn = 1 +GROUP BY + co.c_name, rp.o_orderkey, rp.o_orderdate, sp.p_name, rp.rn +HAVING + COALESCE(SUM(l.l_extendedprice * (1 - l.l_discount)), 0) > 1000 +ORDER BY + total_lineitem_price DESC; diff --git a/vortex-bench/sqlstorm/tpch/490.sql b/vortex-bench/sqlstorm/tpch/490.sql new file mode 100644 index 00000000000..10afb85c161 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/490.sql @@ -0,0 +1,44 @@ + +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + s.s_acctbal, + ROW_NUMBER() OVER (PARTITION BY s.s_nationkey ORDER BY s.s_acctbal DESC) AS rank + FROM supplier s +), +PopularProducts AS ( + SELECT + p.p_partkey, + p.p_name, + SUM(l.l_quantity) AS total_quantity + FROM part p + JOIN lineitem l ON p.p_partkey = l.l_partkey + GROUP BY p.p_partkey, p.p_name + HAVING SUM(l.l_quantity) > 100 +), +CustomerOrders AS ( + SELECT + c.c_custkey, + c.c_name, + COUNT(o.o_orderkey) AS order_count, + SUM(o.o_totalprice) AS total_spent + FROM customer c + LEFT JOIN orders o ON c.c_custkey = o.o_custkey + GROUP BY c.c_custkey, c.c_name + HAVING SUM(o.o_totalprice) > 1000 +) +SELECT + cu.c_name, + cu.order_count, + cu.total_spent, + rs.s_name AS top_supplier, + pp.p_name AS popular_product, + pp.total_quantity, + COALESCE(rs.s_acctbal, 0) AS supplier_balance +FROM CustomerOrders cu +LEFT JOIN RankedSuppliers rs ON cu.order_count = rs.rank +LEFT JOIN PopularProducts pp ON rs.s_suppkey = pp.p_partkey +INNER JOIN supplier s ON s.s_nationkey = cu.c_custkey +WHERE cu.order_count > 5 AND pp.total_quantity IS NOT NULL +ORDER BY cu.total_spent DESC, cu.c_name; diff --git a/vortex-bench/sqlstorm/tpch/4923.sql b/vortex-bench/sqlstorm/tpch/4923.sql new file mode 100644 index 00000000000..32c699f5e24 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/4923.sql @@ -0,0 +1,73 @@ + +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + o.o_orderstatus, + ROW_NUMBER() OVER (PARTITION BY o.o_orderstatus ORDER BY o.o_totalprice DESC) AS order_rank + FROM + orders o + WHERE + o.o_orderdate >= CURRENT_DATE - INTERVAL '1 year' +), +SupplierPartDetails AS ( + SELECT + s.s_suppkey, + s.s_name, + p.p_partkey, + p.p_brand, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supply_value + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + part p ON ps.ps_partkey = p.p_partkey + GROUP BY + s.s_suppkey, s.s_name, p.p_partkey, p.p_brand +), +TopSuppliers AS ( + SELECT + s.s_name, + s.total_supply_value, + RANK() OVER (ORDER BY s.total_supply_value DESC) AS supplier_rank + FROM + SupplierPartDetails s + WHERE + s.total_supply_value > (SELECT AVG(total_supply_value) FROM SupplierPartDetails) +) +SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + o.o_orderstatus, + tp.s_name AS top_supplier, + COALESCE(lp.total_line_items, 0) AS total_line_items, + COALESCE(total_nations.supplier_nation_count, 0) AS nation_count +FROM + RankedOrders o +LEFT JOIN + (SELECT + l.l_orderkey, + COUNT(*) AS total_line_items + FROM + lineitem l + GROUP BY + l.l_orderkey) lp ON o.o_orderkey = lp.l_orderkey +LEFT JOIN + (SELECT + n.n_nationkey, + COUNT(s.s_suppkey) AS supplier_nation_count + FROM + supplier s + JOIN + nation n ON s.s_nationkey = n.n_nationkey + GROUP BY + n.n_nationkey) total_nations ON total_nations.n_nationkey = o.o_orderkey % 5 +JOIN + TopSuppliers tp ON (o.o_totalprice > 1000 OR o.o_orderstatus = 'F') AND tp.supplier_rank <= 5 +WHERE + o.o_orderstatus IN ('O', 'F') +ORDER BY + o.o_orderdate DESC, o.o_totalprice DESC; diff --git a/vortex-bench/sqlstorm/tpch/500.sql b/vortex-bench/sqlstorm/tpch/500.sql new file mode 100644 index 00000000000..fdd2bcc7dc9 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/500.sql @@ -0,0 +1,67 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_custkey, + o.o_totalprice, + RANK() OVER (PARTITION BY o.o_orderstatus ORDER BY o.o_orderdate DESC) AS order_rank + FROM + orders o + WHERE + o.o_orderdate >= DATE '1997-01-01' +), +SupplierParts AS ( + SELECT + ps.ps_partkey, + SUM(ps.ps_availqty) AS total_available_quantity, + AVG(ps.ps_supplycost) AS average_supply_cost + FROM + partsupp ps + GROUP BY + ps.ps_partkey +), +CustomerSegment AS ( + SELECT + c.c_custkey, + c.c_mktsegment, + SUM(o.o_totalprice) AS total_spent + FROM + customer c + JOIN + orders o ON c.c_custkey = o.o_custkey + GROUP BY + c.c_custkey, c.c_mktsegment +) +SELECT + p.p_name, + p.p_type, + rp.o_orderkey, + rp.o_totalprice, + cs.total_spent, + CASE + WHEN cs.total_spent IS NULL THEN 'New Customer' + ELSE 'Returning Customer' + END AS customer_status, + COALESCE(sp.total_available_quantity, 0) AS available_quantity, + sp.average_supply_cost, + RANK() OVER (ORDER BY COALESCE(sp.total_available_quantity, 0) DESC) AS supply_rank +FROM + part p +LEFT JOIN + SupplierParts sp ON p.p_partkey = sp.ps_partkey +LEFT JOIN + RankedOrders rp ON rp.o_custkey = ( + SELECT c.c_custkey + FROM customer c + WHERE c.c_nationkey = ( + SELECT n.n_nationkey + FROM nation n + WHERE n.n_name = 'USA' + ) + ) +LEFT JOIN + CustomerSegment cs ON cs.c_custkey = rp.o_custkey +WHERE + p.p_retailprice > 100.00 + AND p.p_size IN (SELECT DISTINCT p2.p_size FROM part p2 WHERE p2.p_type LIKE 'Medium%') +ORDER BY + supply_rank DESC, customer_status; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/5022.sql b/vortex-bench/sqlstorm/tpch/5022.sql new file mode 100644 index 00000000000..5f0f1cb347a --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/5022.sql @@ -0,0 +1,37 @@ +WITH RegionalSales AS ( + SELECT + r.r_name AS region, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales, + COUNT(DISTINCT o.o_orderkey) AS order_count + FROM + region r + JOIN nation n ON r.r_regionkey = n.n_regionkey + JOIN supplier s ON n.n_nationkey = s.s_nationkey + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN part p ON ps.ps_partkey = p.p_partkey + JOIN lineitem l ON p.p_partkey = l.l_partkey + JOIN orders o ON l.l_orderkey = o.o_orderkey + WHERE + o.o_orderdate >= DATE '1995-01-01' AND o.o_orderdate < DATE '1996-01-01' + GROUP BY + r.r_name +), +TopRegions AS ( + SELECT + region, + total_sales, + order_count, + RANK() OVER (ORDER BY total_sales DESC) AS sales_rank + FROM + RegionalSales +) +SELECT + region, + total_sales, + order_count +FROM + TopRegions +WHERE + sales_rank <= 5 +ORDER BY + total_sales DESC; diff --git a/vortex-bench/sqlstorm/tpch/5250.sql b/vortex-bench/sqlstorm/tpch/5250.sql new file mode 100644 index 00000000000..a2c8977e251 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/5250.sql @@ -0,0 +1,40 @@ +WITH RegionalSales AS ( + SELECT + r.r_name AS region_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales + FROM + region r + JOIN + nation n ON r.r_regionkey = n.n_regionkey + JOIN + supplier s ON n.n_nationkey = s.s_nationkey + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + part p ON ps.ps_partkey = p.p_partkey + JOIN + lineitem l ON p.p_partkey = l.l_partkey + JOIN + orders o ON l.l_orderkey = o.o_orderkey + WHERE + o.o_orderdate >= DATE '1997-01-01' AND o.o_orderdate < DATE '1998-01-01' + GROUP BY + r.r_name +), +TopRegions AS ( + SELECT + region_name, + total_sales, + DENSE_RANK() OVER (ORDER BY total_sales DESC) AS sales_rank + FROM + RegionalSales +) +SELECT + region_name, + total_sales +FROM + TopRegions +WHERE + sales_rank <= 5 +ORDER BY + total_sales DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/5368.sql b/vortex-bench/sqlstorm/tpch/5368.sql new file mode 100644 index 00000000000..d70621dddbb --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/5368.sql @@ -0,0 +1,31 @@ +WITH SupplierOrderCounts AS ( + SELECT s.s_suppkey, COUNT(DISTINCT o.o_orderkey) AS order_count + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN lineitem l ON ps.ps_partkey = l.l_partkey + JOIN orders o ON l.l_orderkey = o.o_orderkey + WHERE o.o_orderstatus = 'O' + GROUP BY s.s_suppkey +), +TopSuppliers AS ( + SELECT s.s_suppkey, s.s_name, soc.order_count + FROM supplier s + JOIN SupplierOrderCounts soc ON s.s_suppkey = soc.s_suppkey + ORDER BY soc.order_count DESC + LIMIT 5 +), +PartDetails AS ( + SELECT p.p_partkey, p.p_name, p.p_mfgr, p.p_brand, p.p_retailprice + FROM part p + WHERE p.p_retailprice > ( + SELECT AVG(p2.p_retailprice) + FROM part p2 + ) +) +SELECT ts.s_name, ts.order_count, pd.p_name, pd.p_mfgr, pd.p_brand, pd.p_retailprice +FROM TopSuppliers ts +JOIN lineitem l ON ts.s_suppkey = l.l_suppkey +JOIN PartDetails pd ON l.l_partkey = pd.p_partkey +WHERE l.l_shipmode = 'REG AIR' +AND l.l_shipdate BETWEEN '1997-01-01' AND '1997-12-31' +ORDER BY ts.order_count DESC, pd.p_retailprice DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/5554.sql b/vortex-bench/sqlstorm/tpch/5554.sql new file mode 100644 index 00000000000..2a0c5f1a86f --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/5554.sql @@ -0,0 +1,29 @@ + +WITH RECURSIVE supplier_sales AS ( + SELECT s.s_suppkey, s.s_name, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_sales + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN lineitem l ON ps.ps_partkey = l.l_partkey + GROUP BY s.s_suppkey, s.s_name +), +top_suppliers AS ( + SELECT s_suppkey, s_name, total_sales, ROW_NUMBER() OVER (ORDER BY total_sales DESC) AS sales_rank + FROM supplier_sales +) +SELECT + s.s_name, + c.c_name AS customer_name, + o.o_orderkey, + o.o_orderdate, + l.l_quantity, + l.l_extendedprice +FROM top_suppliers t +JOIN supplier s ON t.s_suppkey = s.s_suppkey +JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey +JOIN lineitem l ON ps.ps_partkey = l.l_partkey +JOIN orders o ON l.l_orderkey = o.o_orderkey +JOIN customer c ON o.o_custkey = c.c_custkey +WHERE t.sales_rank <= 10 + AND o.o_orderdate BETWEEN DATE '1997-01-01' AND DATE '1997-12-31' + AND l.l_returnflag = 'N' +ORDER BY t.total_sales DESC, o.o_orderdate ASC; diff --git a/vortex-bench/sqlstorm/tpch/5556.sql b/vortex-bench/sqlstorm/tpch/5556.sql new file mode 100644 index 00000000000..1569d5d2613 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/5556.sql @@ -0,0 +1,32 @@ +SELECT + n.n_name AS nation_name, + r.r_name AS region_name, + COUNT(DISTINCT c.c_custkey) AS total_customers, + SUM(o.o_totalprice) AS total_sales, + COUNT(DISTINCT o.o_orderkey) AS total_orders, + AVG(o.o_totalprice) AS avg_order_value, + SUM(CASE WHEN l.l_returnflag = 'R' THEN l.l_quantity ELSE 0 END) AS total_returns, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue +FROM + nation n +JOIN + region r ON n.n_regionkey = r.r_regionkey +JOIN + supplier s ON n.n_nationkey = s.s_nationkey +JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey +JOIN + part p ON ps.ps_partkey = p.p_partkey +JOIN + lineitem l ON p.p_partkey = l.l_partkey +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +WHERE + o.o_orderdate >= DATE '1997-01-01' AND o.o_orderdate < DATE '1998-01-01' +GROUP BY + n.n_name, r.r_name +ORDER BY + total_sales DESC, total_customers DESC +LIMIT 100; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/5654.sql b/vortex-bench/sqlstorm/tpch/5654.sql new file mode 100644 index 00000000000..389a33c2c73 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/5654.sql @@ -0,0 +1,28 @@ + +WITH SupplierCost AS ( + SELECT s.s_suppkey, SUM(ps.ps_supplycost * ps.ps_availqty) AS total_cost + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY s.s_suppkey +), +NationSummary AS ( + SELECT n.n_nationkey, n.n_name, SUM(o.o_totalprice) AS total_sales + FROM nation n + JOIN supplier s ON n.n_nationkey = s.s_nationkey + JOIN customer c ON s.s_suppkey = c.c_custkey + JOIN orders o ON c.c_custkey = o.o_custkey + GROUP BY n.n_nationkey, n.n_name +), +PartDetail AS ( + SELECT p.p_partkey, p.p_name, COUNT(l.l_linenumber) AS total_lines, AVG(l.l_extendedprice) AS avg_price + FROM part p + JOIN lineitem l ON p.p_partkey = l.l_partkey + GROUP BY p.p_partkey, p.p_name +) +SELECT ns.n_name, pd.p_name, SUM(ns.total_sales) AS total_sales, SUM(sc.total_cost) AS total_cost, MAX(pd.avg_price) AS max_avg_price +FROM NationSummary ns +JOIN SupplierCost sc ON ns.n_nationkey = sc.s_suppkey +JOIN PartDetail pd ON pd.p_partkey = sc.s_suppkey +GROUP BY ns.n_name, pd.p_name +ORDER BY total_sales DESC, total_cost DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/6095.sql b/vortex-bench/sqlstorm/tpch/6095.sql new file mode 100644 index 00000000000..6e0c5e14c45 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/6095.sql @@ -0,0 +1,36 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + n.n_name AS nation_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supply_cost, + DENSE_RANK() OVER (PARTITION BY n.n_name ORDER BY SUM(ps.ps_supplycost * ps.ps_availqty) DESC) AS rank + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN nation n ON s.s_nationkey = n.n_nationkey + GROUP BY s.s_suppkey, s.s_name, n.n_name +), +TopSuppliers AS ( + SELECT + nation_name, + s.s_suppkey, + s.s_name, + total_supply_cost + FROM RankedSuppliers rs + JOIN supplier s ON s.s_suppkey = rs.s_suppkey + WHERE rs.rank <= 3 +) +SELECT + p.p_partkey, + p.p_name, + p.p_brand, + p.p_retailprice, + ts.nation_name, + ts.s_name AS top_supplier, + ts.total_supply_cost +FROM part p +JOIN partsupp ps ON p.p_partkey = ps.ps_partkey +JOIN TopSuppliers ts ON ps.ps_suppkey = ts.s_suppkey +WHERE p.p_retailprice > (SELECT AVG(p2.p_retailprice) FROM part p2) +ORDER BY ts.nation_name, ts.total_supply_cost DESC, p.p_retailprice DESC +LIMIT 50; diff --git a/vortex-bench/sqlstorm/tpch/6380.sql b/vortex-bench/sqlstorm/tpch/6380.sql new file mode 100644 index 00000000000..4fab32a006e --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/6380.sql @@ -0,0 +1,61 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + c.c_mktsegment, + ROW_NUMBER() OVER (PARTITION BY o.o_orderstatus ORDER BY o.o_orderdate DESC) AS rn + FROM + orders o + JOIN + customer c ON o.o_custkey = c.c_custkey + WHERE + o.o_orderdate >= DATE '1997-01-01' AND o.o_orderdate < DATE '1997-10-01' +), +SupplierStats AS ( + SELECT + s.s_suppkey, + s.s_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supply_cost + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY + s.s_suppkey, s.s_name +), +PartStats AS ( + SELECT + p.p_partkey, + p.p_name, + p.p_brand, + COUNT(DISTINCT ps.ps_suppkey) AS total_suppliers, + SUM(ps.ps_availqty) AS total_available + FROM + part p + JOIN + partsupp ps ON p.p_partkey = ps.ps_partkey + GROUP BY + p.p_partkey, p.p_name, p.p_brand +) +SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + o.c_mktsegment, + ps.p_name, + ps.total_suppliers, + ps.total_available, + ss.total_supply_cost +FROM + RankedOrders o +JOIN + lineitem l ON o.o_orderkey = l.l_orderkey +JOIN + PartStats ps ON l.l_partkey = ps.p_partkey +JOIN + SupplierStats ss ON l.l_suppkey = ss.s_suppkey +WHERE + o.rn <= 100 +ORDER BY + o.o_orderdate DESC, ps.total_available DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/674.sql b/vortex-bench/sqlstorm/tpch/674.sql new file mode 100644 index 00000000000..9dd255eb5f0 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/674.sql @@ -0,0 +1,57 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_cost, + ROW_NUMBER() OVER (PARTITION BY n.n_name ORDER BY SUM(ps.ps_supplycost * ps.ps_availqty) DESC) AS rank + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + nation n ON s.s_nationkey = n.n_nationkey + GROUP BY + s.s_suppkey, s.s_name, n.n_name +), +CustomerOrders AS ( + SELECT + c.c_custkey, + c.c_name, + COUNT(o.o_orderkey) AS total_orders, + SUM(o.o_totalprice) AS total_spent + FROM + customer c + LEFT JOIN + orders o ON c.c_custkey = o.o_custkey + GROUP BY + c.c_custkey, c.c_name +), +HighValueCustomers AS ( + SELECT + c.c_custkey, + c.c_name, + co.total_orders, + co.total_spent + FROM + CustomerOrders co + JOIN + customer c ON co.c_custkey = c.c_custkey + WHERE + co.total_spent > 10000 +) +SELECT + n.n_name, + COUNT(DISTINCT cu.c_custkey) AS high_value_customers_count, + AVG(s.total_cost) AS average_supplier_cost +FROM + HighValueCustomers cu +LEFT JOIN + RankedSuppliers s ON s.rank = 1 +JOIN + nation n ON cu.c_custkey = n.n_nationkey +GROUP BY + n.n_name +HAVING + COUNT(DISTINCT cu.c_custkey) > 0 +ORDER BY + n.n_name; diff --git a/vortex-bench/sqlstorm/tpch/6795.sql b/vortex-bench/sqlstorm/tpch/6795.sql new file mode 100644 index 00000000000..384d35913a6 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/6795.sql @@ -0,0 +1,29 @@ +SELECT + n.n_name AS nation_name, + r.r_name AS region_name, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + COUNT(DISTINCT o.o_orderkey) AS total_orders, + COUNT(DISTINCT c.c_custkey) AS total_customers +FROM + customer c +JOIN + orders o ON c.c_custkey = o.o_custkey +JOIN + lineitem l ON o.o_orderkey = l.l_orderkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + region r ON n.n_regionkey = r.r_regionkey +WHERE + l.l_shipdate >= DATE '1997-01-01' AND + l.l_shipdate < DATE '1997-12-31' AND + r.r_name = 'Europe' +GROUP BY + n.n_name, r.r_name +ORDER BY + total_revenue DESC, total_orders DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/7036.sql b/vortex-bench/sqlstorm/tpch/7036.sql new file mode 100644 index 00000000000..1c70ff1af9d --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/7036.sql @@ -0,0 +1,36 @@ +WITH RankedSuppliers AS ( + SELECT s.s_suppkey, s.s_name, s.s_acctbal, + ROW_NUMBER() OVER (PARTITION BY n.n_nationkey ORDER BY s.s_acctbal DESC) AS rank + FROM supplier s + JOIN nation n ON s.s_nationkey = n.n_nationkey + WHERE s.s_acctbal > ( + SELECT AVG(s2.s_acctbal) + FROM supplier s2 + WHERE s2.s_nationkey = s.s_nationkey + ) +), +PartOrders AS ( + SELECT l.l_orderkey, p.p_partkey, SUM(l.l_quantity) AS total_quantity + FROM lineitem l + JOIN partsupp ps ON l.l_partkey = ps.ps_partkey + JOIN part p ON ps.ps_partkey = p.p_partkey + GROUP BY l.l_orderkey, p.p_partkey +), +OrderTotal AS ( + SELECT o.o_orderkey, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_price + FROM orders o + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + GROUP BY o.o_orderkey +), +CustomerSpend AS ( + SELECT c.c_custkey, SUM(ot.total_price) AS spent + FROM customer c + JOIN OrderTotal ot ON c.c_custkey = ot.o_orderkey + GROUP BY c.c_custkey +) +SELECT rs.s_name, rs.s_acctbal, ps.total_quantity, cs.spent +FROM RankedSuppliers rs +JOIN PartOrders ps ON rs.s_suppkey = ps.p_partkey +JOIN CustomerSpend cs ON cs.c_custkey = ps.l_orderkey +WHERE rs.rank <= 5 +ORDER BY rs.s_acctbal DESC, cs.spent DESC; diff --git a/vortex-bench/sqlstorm/tpch/7359.sql b/vortex-bench/sqlstorm/tpch/7359.sql new file mode 100644 index 00000000000..1440ff90d42 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/7359.sql @@ -0,0 +1,50 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + c.c_name, + ROW_NUMBER() OVER (PARTITION BY c.c_nationkey ORDER BY o.o_orderdate DESC) AS order_rank + FROM + orders o + JOIN + customer c ON o.o_custkey = c.c_custkey +), +RecentHighValueOrders AS ( + SELECT + ro.o_orderkey, + ro.o_orderdate, + ro.o_totalprice, + ro.c_name + FROM + RankedOrders ro + WHERE + ro.order_rank <= 5 + AND ro.o_totalprice > (SELECT AVG(o_totalprice) FROM orders) +), +SupplierDetails AS ( + SELECT + ps.ps_partkey, + ps.ps_suppkey, + s.s_name, + s.s_acctbal + FROM + partsupp ps + JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +) +SELECT + rhv.o_orderkey, + rhv.o_orderdate, + rhv.o_totalprice, + rhv.c_name, + sd.s_name AS supplier_name, + sd.s_acctbal AS supplier_account_balance +FROM + RecentHighValueOrders rhv +JOIN + lineitem li ON rhv.o_orderkey = li.l_orderkey +JOIN + SupplierDetails sd ON li.l_partkey = sd.ps_partkey +ORDER BY + rhv.o_orderdate DESC, rhv.o_totalprice DESC; diff --git a/vortex-bench/sqlstorm/tpch/7631.sql b/vortex-bench/sqlstorm/tpch/7631.sql new file mode 100644 index 00000000000..1fd767195ff --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/7631.sql @@ -0,0 +1,63 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + n.n_name AS nation_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supply_value, + DENSE_RANK() OVER (PARTITION BY n.n_name ORDER BY SUM(ps.ps_supplycost * ps.ps_availqty) DESC) AS rank + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + nation n ON s.s_nationkey = n.n_nationkey + GROUP BY + s.s_suppkey, s.s_name, n.n_name +), +TopSuppliers AS ( + SELECT + rs.s_suppkey, + rs.s_name, + rs.nation_name, + rs.total_supply_value + FROM + RankedSuppliers rs + WHERE + rs.rank <= 5 +), +OrderDetails AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + li.l_partkey, + li.l_quantity, + li.l_discount, + li.l_tax, + ps.ps_supplycost, + ts.s_name, + ts.nation_name + FROM + orders o + JOIN + lineitem li ON o.o_orderkey = li.l_orderkey + JOIN + TopSuppliers ts ON li.l_suppkey = ts.s_suppkey + JOIN + partsupp ps ON li.l_partkey = ps.ps_partkey AND li.l_suppkey = ps.ps_suppkey + WHERE + o.o_orderdate >= '1996-01-01' AND + o.o_orderdate < '1997-01-01' +) +SELECT + ts.nation_name, + COUNT(DISTINCT od.o_orderkey) AS total_orders, + SUM(od.l_quantity * od.ps_supplycost * (1 - od.l_discount)) AS total_revenue, + AVG(od.l_tax) AS average_tax +FROM + OrderDetails od +JOIN + TopSuppliers ts ON od.s_name = ts.s_name +GROUP BY + ts.nation_name +ORDER BY + total_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/8104.sql b/vortex-bench/sqlstorm/tpch/8104.sql new file mode 100644 index 00000000000..e2ac4f49ea9 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/8104.sql @@ -0,0 +1,42 @@ +WITH RECURSIVE SupplierHierarchy AS ( + SELECT s.s_suppkey, s.s_name, s.s_nationkey, 1 AS level + FROM supplier s + WHERE s.s_acctbal > 50000 + UNION ALL + SELECT s.s_suppkey, s.s_name, s.s_nationkey, sh.level + 1 + FROM supplier s + JOIN SupplierHierarchy sh ON s.s_suppkey = sh.s_nationkey +) +SELECT + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + n.n_name AS nation_name, + COUNT(DISTINCT o.o_orderkey) AS total_orders, + COUNT(DISTINCT c.c_custkey) AS total_customers, + MIN(o.o_orderdate) AS first_order_date, + MAX(o.o_orderdate) AS last_order_date, + CASE + WHEN COUNT(DISTINCT o.o_orderkey) > 1000 THEN 'High' + WHEN COUNT(DISTINCT o.o_orderkey) BETWEEN 500 AND 1000 THEN 'Medium' + ELSE 'Low' + END AS order_volume_category +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + customer c ON o.o_custkey = c.c_custkey +JOIN + nation n ON c.c_nationkey = n.n_nationkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + SupplierHierarchy sh ON ps.ps_suppkey = sh.s_suppkey +WHERE + l.l_shipdate BETWEEN '1997-01-01' AND '1997-12-31' + AND n.n_regionkey IN (SELECT r.r_regionkey FROM region r WHERE r.r_name LIKE '%NA%') + AND l.l_returnflag = 'N' +GROUP BY + n.n_name +ORDER BY + total_revenue DESC +LIMIT 10; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/8256.sql b/vortex-bench/sqlstorm/tpch/8256.sql new file mode 100644 index 00000000000..880c7a1823e --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/8256.sql @@ -0,0 +1,23 @@ +WITH supplier_details AS ( + SELECT s.s_suppkey, s.s_name, s.s_nationkey, SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supply_cost + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY s.s_suppkey, s.s_name, s.s_nationkey +), nation_costs AS ( + SELECT n.n_nationkey, n.n_name, SUM(sd.total_supply_cost) AS nation_supply_cost + FROM supplier_details sd + JOIN nation n ON sd.s_nationkey = n.n_nationkey + GROUP BY n.n_nationkey, n.n_name +), region_costs AS ( + SELECT r.r_regionkey, r.r_name, SUM(nc.nation_supply_cost) AS region_supply_cost + FROM nation_costs nc + JOIN nation n ON nc.n_nationkey = n.n_nationkey + JOIN region r ON n.n_regionkey = r.r_regionkey + GROUP BY r.r_regionkey, r.r_name +) +SELECT r.r_name, r.region_supply_cost, COUNT(DISTINCT s.s_suppkey) AS number_of_suppliers +FROM region_costs r +JOIN supplier s ON s.s_nationkey IN (SELECT n_nationkey FROM nation n JOIN region r ON n.n_regionkey = r.r_regionkey WHERE r.r_regionkey = r.r_regionkey) +GROUP BY r.r_name, r.region_supply_cost +ORDER BY r.region_supply_cost DESC, number_of_suppliers DESC +LIMIT 10; diff --git a/vortex-bench/sqlstorm/tpch/8498.sql b/vortex-bench/sqlstorm/tpch/8498.sql new file mode 100644 index 00000000000..04a23ba4335 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/8498.sql @@ -0,0 +1,53 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + c.c_mktsegment, + ROW_NUMBER() OVER (PARTITION BY c.c_mktsegment ORDER BY o.o_totalprice DESC) AS rn + FROM + orders o + JOIN + customer c ON o.o_custkey = c.c_custkey +), +TopOrderSegments AS ( + SELECT + ro.c_mktsegment, + ro.o_orderkey, + ro.o_orderdate, + ro.o_totalprice + FROM + RankedOrders ro + WHERE + ro.rn <= 10 +) +SELECT + p.p_name, + SUM(l.l_quantity) AS total_quantity, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS revenue, + COUNT(DISTINCT o.o_orderkey) AS order_count, + n.n_name AS supplier_nation +FROM + lineitem l +JOIN + orders o ON l.l_orderkey = o.o_orderkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + nation n ON s.s_nationkey = n.n_nationkey +JOIN + TopOrderSegments tos ON o.o_orderkey = tos.o_orderkey +JOIN + part p ON l.l_partkey = p.p_partkey +WHERE + l.l_shipdate >= DATE '1996-01-01' + AND l.l_shipdate < DATE '1997-01-01' +GROUP BY + p.p_name, + n.n_name +HAVING + SUM(l.l_extendedprice * (1 - l.l_discount)) > 10000 +ORDER BY + revenue DESC, total_quantity DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/8647.sql b/vortex-bench/sqlstorm/tpch/8647.sql new file mode 100644 index 00000000000..8e9aad627b1 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/8647.sql @@ -0,0 +1,62 @@ + +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + RANK() OVER (PARTITION BY o.o_orderdate ORDER BY SUM(l.l_extendedprice * (1 - l.l_discount)) DESC) AS daily_rank + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + GROUP BY + o.o_orderkey, o.o_orderdate +), +SupplierRevenue AS ( + SELECT + s.s_suppkey, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS supplier_revenue + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN + lineitem l ON ps.ps_partkey = l.l_partkey + WHERE + l.l_shipdate >= DATE '1997-01-01' AND l.l_shipdate < DATE '1998-01-01' + GROUP BY + s.s_suppkey +), +TopSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + r.r_name, + sr.supplier_revenue, + RANK() OVER (ORDER BY sr.supplier_revenue DESC) AS revenue_rank + FROM + SupplierRevenue sr + JOIN + supplier s ON sr.s_suppkey = s.s_suppkey + JOIN + nation n ON s.s_nationkey = n.n_nationkey + JOIN + region r ON n.n_regionkey = r.r_regionkey +) +SELECT + o.o_orderkey, + o.o_orderdate, + r.daily_rank AS order_rank, + ts.s_suppkey, + ts.s_name, + ts.supplier_revenue +FROM + RankedOrders r +JOIN + orders o ON r.o_orderkey = o.o_orderkey +JOIN + TopSuppliers ts ON ts.revenue_rank <= 10 +WHERE + r.daily_rank <= 5 +ORDER BY + o.o_orderdate, ts.supplier_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/8975.sql b/vortex-bench/sqlstorm/tpch/8975.sql new file mode 100644 index 00000000000..b05950f23d5 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/8975.sql @@ -0,0 +1,47 @@ +WITH OrderSummary AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + COUNT(DISTINCT l.l_suppkey) AS unique_suppliers, + C.c_mktsegment + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + JOIN + customer C ON o.o_custkey = C.c_custkey + WHERE + o.o_orderdate >= DATE '1995-01-01' + AND o.o_orderdate < DATE '1996-01-01' + GROUP BY + o.o_orderkey, o.o_orderdate, C.c_mktsegment +), +SupplierPerformance AS ( + SELECT + ps.ps_suppkey, + SUM(ps.ps_supplycost * ps.ps_availqty) AS supplier_costs, + COUNT(DISTINCT ps.ps_partkey) AS supplied_parts + FROM + partsupp ps + JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey + GROUP BY + ps.ps_suppkey +) +SELECT + O.o_orderkey, + O.o_orderdate, + O.total_revenue, + O.unique_suppliers, + S.supplier_costs, + S.supplied_parts, + O.c_mktsegment +FROM + OrderSummary O +JOIN + SupplierPerformance S ON O.unique_suppliers = S.supplied_parts +WHERE + O.total_revenue > (SELECT AVG(total_revenue) FROM OrderSummary) +ORDER BY + O.total_revenue DESC; diff --git a/vortex-bench/sqlstorm/tpch/9056.sql b/vortex-bench/sqlstorm/tpch/9056.sql new file mode 100644 index 00000000000..511b7c777ab --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/9056.sql @@ -0,0 +1,57 @@ +WITH RankedSuppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + n.n_name AS nation_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_cost, + RANK() OVER (PARTITION BY n.n_regionkey ORDER BY SUM(ps.ps_supplycost * ps.ps_availqty) DESC) AS supplier_rank + FROM + supplier s + JOIN + nation n ON s.s_nationkey = n.n_nationkey + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY + s.s_suppkey, s.s_name, n.n_name, n.n_regionkey +), +TopSuppliers AS ( + SELECT + r.r_name AS region_name, + rs.s_name, + rs.total_cost + FROM + RankedSuppliers rs + JOIN + nation n ON rs.nation_name = n.n_name + JOIN + region r ON n.n_regionkey = r.r_regionkey + WHERE + rs.supplier_rank <= 5 +), +CustomerOrderSummary AS ( + SELECT + c.c_custkey, + c.c_name, + COUNT(o.o_orderkey) AS order_count, + SUM(o.o_totalprice) AS total_spent + FROM + customer c + LEFT JOIN + orders o ON c.c_custkey = o.o_custkey + WHERE + o.o_orderstatus = 'O' + GROUP BY + c.c_custkey, c.c_name +) +SELECT + tos.region_name, + tos.s_name, + cos.c_name, + cos.order_count, + cos.total_spent +FROM + TopSuppliers tos +JOIN + CustomerOrderSummary cos ON tos.total_cost < cos.total_spent +ORDER BY + tos.region_name, tos.total_cost DESC, cos.total_spent DESC; diff --git a/vortex-bench/sqlstorm/tpch/9127.sql b/vortex-bench/sqlstorm/tpch/9127.sql new file mode 100644 index 00000000000..fe3aef28bef --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/9127.sql @@ -0,0 +1,47 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + c.c_name, + c.c_nationkey, + ROW_NUMBER() OVER (PARTITION BY c.c_nationkey ORDER BY o.o_totalprice DESC) AS OrderRank + FROM + orders o + JOIN + customer c ON o.o_custkey = c.c_custkey + WHERE + o.o_orderdate >= DATE '1997-01-01' + AND o.o_orderdate < DATE '1997-10-01' +), +HighValueOrders AS ( + SELECT + ro.o_orderkey, + ro.o_orderdate, + ro.o_totalprice, + ro.c_name, + n.n_name AS nation_name + FROM + RankedOrders ro + JOIN + nation n ON ro.c_nationkey = n.n_nationkey + WHERE + ro.OrderRank <= 10 +) +SELECT + hvo.o_orderkey, + hvo.o_orderdate, + hvo.o_totalprice, + hvo.c_name, + hvo.nation_name, + SUM(li.l_quantity) AS total_quantity, + AVG(li.l_extendedprice) AS avg_extended_price, + SUM(li.l_discount) AS total_discount +FROM + HighValueOrders hvo +JOIN + lineitem li ON hvo.o_orderkey = li.l_orderkey +GROUP BY + hvo.o_orderkey, hvo.o_orderdate, hvo.o_totalprice, hvo.c_name, hvo.nation_name +ORDER BY + hvo.o_totalprice DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/9336.sql b/vortex-bench/sqlstorm/tpch/9336.sql new file mode 100644 index 00000000000..a38ae9bef8c --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/9336.sql @@ -0,0 +1,50 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + RANK() OVER (PARTITION BY EXTRACT(YEAR FROM o.o_orderdate) ORDER BY SUM(l.l_extendedprice * (1 - l.l_discount)) DESC) AS revenue_rank + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + WHERE + o.o_orderdate >= DATE '1996-01-01' AND o.o_orderdate < DATE '1997-01-01' + GROUP BY + o.o_orderkey, o.o_orderdate +), +TopCustomers AS ( + SELECT + c.c_custkey, + c.c_name, + SUM(lo.total_revenue) AS customer_revenue + FROM + customer c + JOIN + RankedOrders lo ON c.c_custkey = lo.o_orderkey + GROUP BY + c.c_custkey, c.c_name + HAVING + SUM(lo.total_revenue) > 100000 +) +SELECT + rc.r_name AS region, + nc.n_name AS nation, + COUNT(tc.c_custkey) AS top_customer_count, + SUM(tc.customer_revenue) AS total_customer_revenue +FROM + region rc +JOIN + nation nc ON rc.r_regionkey = nc.n_regionkey +JOIN + supplier s ON s.s_nationkey = nc.n_nationkey +JOIN + partsupp ps ON ps.ps_suppkey = s.s_suppkey +JOIN + RankedOrders lo ON lo.o_orderkey = ps.ps_partkey +JOIN + TopCustomers tc ON tc.c_custkey = lo.o_orderkey +GROUP BY + rc.r_name, nc.n_name +ORDER BY + total_customer_revenue DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/9469.sql b/vortex-bench/sqlstorm/tpch/9469.sql new file mode 100644 index 00000000000..0388f49bbb4 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/9469.sql @@ -0,0 +1,31 @@ +WITH SupplierParts AS ( + SELECT s.s_suppkey, s.s_name, p.p_partkey, p.p_name, ps.ps_supplycost, ps.ps_availqty + FROM supplier s + JOIN partsupp ps ON s.s_suppkey = ps.ps_suppkey + JOIN part p ON ps.ps_partkey = p.p_partkey + WHERE ps.ps_availqty > 0 +), +OrderDetails AS ( + SELECT o.o_orderkey, o.o_orderdate, c.c_custkey, SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue + FROM orders o + JOIN customer c ON o.o_custkey = c.c_custkey + JOIN lineitem l ON o.o_orderkey = l.l_orderkey + WHERE o.o_orderdate >= '1997-01-01' AND o.o_orderdate < '1998-01-01' + GROUP BY o.o_orderkey, o.o_orderdate, c.c_custkey +), +RankedSuppliers AS ( + SELECT s.s_suppkey, s.s_name, COUNT(DISTINCT op.o_orderkey) AS order_count + FROM SupplierParts s + JOIN OrderDetails op ON s.s_suppkey = op.o_orderkey + GROUP BY s.s_suppkey, s.s_name + HAVING COUNT(DISTINCT op.o_orderkey) > 5 +), +TopSuppliers AS ( + SELECT *, RANK() OVER (ORDER BY order_count DESC) AS supplier_rank + FROM RankedSuppliers +) +SELECT ts.s_suppkey, ts.s_name, ts.order_count, sp.p_partkey, sp.p_name, sp.ps_supplycost, sp.ps_availqty +FROM TopSuppliers ts +JOIN SupplierParts sp ON ts.s_suppkey = sp.s_suppkey +WHERE ts.supplier_rank <= 10 +ORDER BY ts.order_count DESC, sp.ps_supplycost ASC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/9633.sql b/vortex-bench/sqlstorm/tpch/9633.sql new file mode 100644 index 00000000000..496a0b99614 --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/9633.sql @@ -0,0 +1,61 @@ +WITH ranked_orders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + o.o_totalprice, + RANK() OVER (PARTITION BY o.o_orderdate ORDER BY o.o_totalprice DESC) AS total_price_rank + FROM + orders o + WHERE + o.o_orderdate BETWEEN '1997-01-01' AND '1997-12-31' +), +top_suppliers AS ( + SELECT + s.s_suppkey, + s.s_name, + SUM(ps.ps_supplycost * ps.ps_availqty) AS total_supplycost + FROM + supplier s + JOIN + partsupp ps ON s.s_suppkey = ps.ps_suppkey + GROUP BY + s.s_suppkey, s.s_name + ORDER BY + total_supplycost DESC + LIMIT 10 +), +customer_summary AS ( + SELECT + c.c_custkey, + c.c_name, + c.c_acctbal, + COUNT(o.o_orderkey) AS order_count, + SUM(o.o_totalprice) AS total_spent + FROM + customer c + LEFT JOIN + orders o ON c.c_custkey = o.o_custkey + GROUP BY + c.c_custkey, c.c_name, c.c_acctbal + HAVING + SUM(o.o_totalprice) > 10000 +) +SELECT + r.o_orderkey, + r.o_orderdate, + cs.c_name, + cs.total_spent, + ts.s_name AS supplier_name, + ts.total_supplycost +FROM + ranked_orders r +JOIN + customer_summary cs ON r.o_orderkey = cs.c_custkey +JOIN + lineitem l ON r.o_orderkey = l.l_orderkey +JOIN + top_suppliers ts ON l.l_suppkey = ts.s_suppkey +WHERE + r.total_price_rank <= 5 +ORDER BY + r.o_orderdate, cs.total_spent DESC; \ No newline at end of file diff --git a/vortex-bench/sqlstorm/tpch/9998.sql b/vortex-bench/sqlstorm/tpch/9998.sql new file mode 100644 index 00000000000..3567f3cca3c --- /dev/null +++ b/vortex-bench/sqlstorm/tpch/9998.sql @@ -0,0 +1,51 @@ +WITH RankedOrders AS ( + SELECT + o.o_orderkey, + o.o_orderdate, + SUM(l.l_extendedprice * (1 - l.l_discount)) AS total_revenue, + RANK() OVER (PARTITION BY o.o_orderdate ORDER BY SUM(l.l_extendedprice * (1 - l.l_discount)) DESC) AS order_rank + FROM + orders o + JOIN + lineitem l ON o.o_orderkey = l.l_orderkey + GROUP BY + o.o_orderkey, o.o_orderdate +), +TopOrders AS ( + SELECT + r.o_orderkey, + r.o_orderdate, + r.total_revenue + FROM + RankedOrders r + WHERE + r.order_rank <= 10 +) +SELECT + o.o_orderkey, + o.o_orderdate, + o.total_revenue, + c.c_name, + s.s_name, + p.p_name, + ps.ps_availqty +FROM + TopOrders o +JOIN + orders ord ON o.o_orderkey = ord.o_orderkey +JOIN + lineitem l ON ord.o_orderkey = l.l_orderkey +JOIN + partsupp ps ON l.l_partkey = ps.ps_partkey AND l.l_suppkey = ps.ps_suppkey +JOIN + supplier s ON ps.ps_suppkey = s.s_suppkey +JOIN + customer c ON ord.o_custkey = c.c_custkey +JOIN + part p ON l.l_partkey = p.p_partkey +WHERE + c.c_acctbal > 10000 AND + c.c_mktsegment = 'BUILDING' +ORDER BY + o.total_revenue DESC, + o.o_orderdate ASC; diff --git a/vortex-bench/src/datasets/mod.rs b/vortex-bench/src/datasets/mod.rs index 3e72ba69e7f..886312ac926 100644 --- a/vortex-bench/src/datasets/mod.rs +++ b/vortex-bench/src/datasets/mod.rs @@ -11,6 +11,8 @@ use vortex::array::ArrayRef; use vortex::array::ExecutionCtx; use crate::clickbench::Flavor; +use crate::sqlstorm::SqlstormOrigin; +use crate::sqlstorm::data::table_names as sqlstorm_table_names; pub mod data_downloads; pub mod feature_vectors; @@ -59,6 +61,8 @@ pub trait Dataset { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum BenchmarkDataset { + #[serde(rename = "sqlstorm")] + Sqlstorm { origin: String }, #[serde(rename = "appian")] Appian, #[serde(rename = "tpch")] @@ -82,6 +86,7 @@ pub enum BenchmarkDataset { impl BenchmarkDataset { pub fn name(&self) -> &str { match self { + BenchmarkDataset::Sqlstorm { .. } => "sqlstorm", BenchmarkDataset::Appian => "appian", BenchmarkDataset::TpcH { .. } => "tpch", BenchmarkDataset::TpcDS { .. } => "tpcds", @@ -98,6 +103,7 @@ impl BenchmarkDataset { impl Display for BenchmarkDataset { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + BenchmarkDataset::Sqlstorm { origin } => write!(f, "sqlstorm({origin})"), BenchmarkDataset::Appian => write!(f, "appian"), BenchmarkDataset::TpcH { scale_factor } => write!(f, "tpch(sf={scale_factor})"), BenchmarkDataset::TpcDS { scale_factor } => write!(f, "tpcds(sf={scale_factor})"), @@ -131,6 +137,12 @@ const APPIAN_TABLES: &[&str] = &[ impl BenchmarkDataset { pub fn tables(&self) -> &[&'static str] { match self { + // `origin` is a free `String` (round-tripped through serde), so an unknown + // value yields no tables rather than erroring. In practice it is always one + // of the four `SqlstormOrigin` names written by `SqlstormBenchmark::dataset()`. + BenchmarkDataset::Sqlstorm { origin } => SqlstormOrigin::from_name(origin) + .map(sqlstorm_table_names) + .unwrap_or(&[]), BenchmarkDataset::Appian => APPIAN_TABLES, BenchmarkDataset::TpcDS { .. } => &[ "call_center", diff --git a/vortex-bench/src/lib.rs b/vortex-bench/src/lib.rs index 30ff45c97a8..e77665e77d8 100644 --- a/vortex-bench/src/lib.rs +++ b/vortex-bench/src/lib.rs @@ -22,6 +22,8 @@ use public_bi::PublicBiBenchmark; use realnest::gharchive::GithubArchiveBenchmark; use serde::Deserialize; use serde::Serialize; +use sqlstorm::SqlstormBenchmark; +use sqlstorm::SqlstormOrigin; use statpopgen::StatPopGenBenchmark; use tpcds::TpcDsBenchmark; use tpch::benchmark::TpcHBenchmark; @@ -51,6 +53,7 @@ pub mod public_bi; pub mod random_access; pub mod realnest; pub mod runner; +pub mod sqlstorm; pub mod statpopgen; pub mod tpcds; pub mod tpch; @@ -247,6 +250,8 @@ impl CompactionStrategy { /// CLI argument for selecting which benchmark to run. #[derive(clap::ValueEnum, Clone, Copy)] pub enum BenchmarkArg { + #[clap(name = "sqlstorm")] + Sqlstorm, #[clap(name = "appian")] Appian, #[clap(name = "clickbench")] @@ -276,6 +281,18 @@ const REMOTE_DATA_KEY: &str = "remote-data-dir"; /// Factory function to create a benchmark instance from CLI arguments. pub fn create_benchmark(b: BenchmarkArg, opts: &Opts) -> anyhow::Result> { match b { + BenchmarkArg::Sqlstorm => { + // SQLStorm has no scale factor: each origin runs at a single fixed + // size (TPC-H/TPC-DS at SF10, StackOverflow `math`, JOB fixed), so + // `scale-factor` is intentionally not read here. See + // `vortex-bench/sqlstorm/README.md` ("Data size"). + let origin = opts + .get_as::("origin") + .unwrap_or(SqlstormOrigin::TpcH); + let remote_data_dir = opts.get_as::(REMOTE_DATA_KEY); + let benchmark = SqlstormBenchmark::new(origin, remote_data_dir)?; + Ok(Box::new(benchmark) as _) + } BenchmarkArg::Appian => { let remote_data_dir = opts.get_as::(REMOTE_DATA_KEY); let benchmark = AppianBenchmark::with_remote_data_dir(remote_data_dir)?; diff --git a/vortex-bench/src/sqlstorm/data.rs b/vortex-bench/src/sqlstorm/data.rs new file mode 100644 index 00000000000..9a3c9709f3d --- /dev/null +++ b/vortex-bench/src/sqlstorm/data.rs @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Data acquisition and table specs for SQLStorm StackOverflow / JOB origins. +//! +//! TPC-H and TPC-DS delegate to their own benchmark generators, so only the two +//! non-TPC origins have a download → extract → DuckDB-convert pipeline. Both +//! origins share the same driver ([`generate_origin`]); each is parameterized +//! by an [`OriginData`] recipe. +//! +//! ## Identifier case +//! +//! The upstream StackOverflow DDL uses CamelCase column names (`OwnerUserId`, +//! `CreationDate`, …) and capitalized table names (`Posts`, `Users`, …). The +//! SQLStorm queries reference those names unquoted, which would fail under +//! DataFusion's default `enable_ident_normalization=true` (the parser +//! lowercases identifiers while the Parquet schema preserves case → +//! field-not-found). +//! +//! [`STACKOVERFLOW`]'s DDL inlines the schema with **lowercase** column names, +//! so `COPY (SELECT * FROM "Posts") TO 'posts.parquet'` writes lowercase +//! columns into the Parquet shard. DuckDB's case-insensitive unquoted +//! identifier resolution and DataFusion's identifier lowercasing then both +//! resolve the queries' CamelCase column references against the lowercased +//! schema. Table names in the DDL stay CamelCase so that each +//! `COPY "Posts" FROM 'Posts.csv'` reads naturally; the lowercase output +//! shard name is the second element of each entry in `OriginData::tables`. + +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; + +use anyhow::Context; +use anyhow::bail; +use tracing::info; +use url::Url; + +use crate::Format; +use crate::TableSpec; +use crate::datasets::data_downloads::download_data; +use crate::sqlstorm::SqlstormOrigin; + +/// Archive codec; selects the extraction command in [`extract_archive`]. +enum Archive { + /// gzip-compressed tar (`tar -xzf`). + TarGz, + /// zstd-compressed tar (`zstd -dc | tar -xf -`). + TarZst, +} + +/// Per-origin data-gen recipe consumed by [`generate_origin`]. +pub struct OriginData { + /// Upstream archive URL. + url: &'static str, + /// Local filename to save the downloaded archive as (relative to base dir). + archive_name: &'static str, + /// Archive codec. + archive: Archive, + /// Origin name, used only in log messages. + log_name: &'static str, + /// SQL DDL: one `CREATE TABLE` per entry in [`OriginData::tables`], with + /// lowercase column names so `SELECT *` exports lowercase Parquet columns. + ddl: &'static str, + /// `(upstream_table, output_shard_stem)` for each table. The upstream + /// name is the CamelCase table created by [`OriginData::ddl`] (and equals + /// the CSV file stem); the output is the lowercase Parquet shard stem. + /// For origins already lowercase upstream (JOB), both elements are equal. + tables: &'static [(&'static str, &'static str)], + /// Extra options spliced into each `COPY FROM '' (..., {extra})` + /// statement after the standard csv settings. Empty when only the + /// defaults are needed. + extra_copy_opts: &'static str, +} + +/// StackOverflow `math` data (~12 GB gzip). Schema transcribed from +/// `https://db.in.tum.de/~schmidt/data/stackoverflow_schema.sql` with +/// `ALTER TABLE … ADD FOREIGN KEY` lines (which DuckDB rejects) dropped, +/// inline `primary key` / `references` clauses elided (not enforced by +/// COPY, just noise), and column names lowercased. +/// +/// The `math` tier's large free-text columns (`Posts.body`, `PostHistory.text`, +/// …) contain rows whose embedded quotes don't strictly comply with RFC-4180, +/// which makes DuckDB's CSV dialect sniffer fail outright. `extra_copy_opts` +/// therefore disables auto-detection and pins the dialect explicitly (RFC-4180 +/// doubled-quote escaping), with `strict_mode false` + `ignore_errors true` to +/// tolerate the non-compliant minority of rows. (The smaller `dba` tier happened +/// to be sniffable, so the original empty options worked there.) +pub const STACKOVERFLOW: OriginData = OriginData { + url: "https://db.in.tum.de/~schmidt/data/stackoverflow_math.tar.gz", + archive_name: "stackoverflow_math.tar.gz", + archive: Archive::TarGz, + log_name: "stackoverflow", + ddl: r#" +CREATE TABLE "PostHistoryTypes" ("id" SMALLINT NOT NULL, "name" VARCHAR(50) NOT NULL); +CREATE TABLE "LinkTypes" ("id" SMALLINT NOT NULL, "name" VARCHAR(50) NOT NULL); +CREATE TABLE "PostTypes" ("id" SMALLINT NOT NULL, "name" VARCHAR(50) NOT NULL); +CREATE TABLE "CloseReasonTypes" ("id" SMALLINT NOT NULL, "name" VARCHAR(50) NOT NULL); +CREATE TABLE "VoteTypes" ("id" SMALLINT NOT NULL, "name" VARCHAR(50) NOT NULL); +CREATE TABLE "Users" ("id" INTEGER NOT NULL, "reputation" INTEGER NOT NULL, "creationdate" TIMESTAMP NOT NULL, "displayname" VARCHAR(40), "lastaccessdate" TIMESTAMP NOT NULL, "websiteurl" VARCHAR(200), "location" VARCHAR(300), "aboutme" TEXT, "views" INTEGER, "upvotes" INTEGER, "downvotes" INTEGER, "profileimageurl" VARCHAR(200), "accountid" INTEGER); +CREATE TABLE "Badges" ("id" INTEGER NOT NULL, "userid" INTEGER NOT NULL, "name" VARCHAR(50) NOT NULL, "date" TIMESTAMP NOT NULL, "class" SMALLINT NOT NULL, "tagbased" BOOLEAN NOT NULL); +CREATE TABLE "Posts" ("id" INTEGER NOT NULL, "posttypeid" SMALLINT, "acceptedanswerid" INTEGER, "parentid" INTEGER, "creationdate" TIMESTAMP, "score" INTEGER, "viewcount" INTEGER, "body" TEXT, "owneruserid" INTEGER, "ownerdisplayname" VARCHAR(40), "lasteditoruserid" INTEGER, "lasteditordisplayname" VARCHAR(40), "lasteditdate" TIMESTAMP, "lastactivitydate" TIMESTAMP, "title" VARCHAR(300), "tags" VARCHAR(4000), "answercount" INTEGER, "commentcount" INTEGER, "favoritecount" INTEGER, "closeddate" TIMESTAMP, "communityowneddate" TIMESTAMP, "contentlicense" VARCHAR(30)); +CREATE TABLE "Comments" ("id" INTEGER NOT NULL, "postid" INTEGER NOT NULL, "score" INTEGER, "text" VARCHAR(2000) NOT NULL, "creationdate" TIMESTAMP NOT NULL, "userdisplayname" VARCHAR(40), "userid" INTEGER, "contentlicense" VARCHAR(30)); +CREATE TABLE "PostHistory" ("id" INTEGER NOT NULL, "posthistorytypeid" SMALLINT, "postid" INTEGER, "revisionguid" VARCHAR(36), "creationdate" TIMESTAMP, "userid" INTEGER, "userdisplayname" VARCHAR(40), "comment" VARCHAR(800), "text" TEXT, "contentlicense" VARCHAR(30)); +CREATE TABLE "PostLinks" ("id" BIGINT NOT NULL, "creationdate" TIMESTAMP NOT NULL, "postid" INTEGER NOT NULL, "relatedpostid" INTEGER NOT NULL, "linktypeid" SMALLINT NOT NULL); +CREATE TABLE "Tags" ("id" INTEGER NOT NULL, "tagname" VARCHAR(35), "count" INTEGER NOT NULL, "excerptpostid" INTEGER, "wikipostid" INTEGER, "ismoderatoronly" BOOLEAN, "isrequired" BOOLEAN); +CREATE TABLE "Votes" ("id" INTEGER NOT NULL, "postid" INTEGER NOT NULL, "votetypeid" SMALLINT NOT NULL, "userid" INTEGER, "creationdate" TIMESTAMP, "bountyamount" INTEGER); +"#, + tables: &[ + ("PostHistoryTypes", "posthistorytypes"), + ("LinkTypes", "linktypes"), + ("PostTypes", "posttypes"), + ("CloseReasonTypes", "closereasontypes"), + ("VoteTypes", "votetypes"), + ("Users", "users"), + ("Badges", "badges"), + ("Posts", "posts"), + ("Comments", "comments"), + ("PostHistory", "posthistory"), + ("PostLinks", "postlinks"), + ("Tags", "tags"), + ("Votes", "votes"), + ], + extra_copy_opts: "AUTO_DETECT false, QUOTE '\"', ESCAPE '\"', strict_mode false, ignore_errors true", +}; + +/// IMDB/JOB data (zstd-compressed tar). Columns are already lowercase +/// upstream so no projection is needed at export time. `ESCAPE '\\'` + +/// `ignore_errors true` tolerate backslash-escaped quotes and dirty rows. +pub const JOB: OriginData = OriginData { + url: "https://db.in.tum.de/~schmidt/dbgen/job/imdb.tzst", + archive_name: "imdb.tzst", + archive: Archive::TarZst, + log_name: "job", + ddl: r#" +CREATE TABLE "char_name" ("id" INTEGER, "name" VARCHAR, "imdb_index" VARCHAR, "imdb_id" INTEGER, "name_pcode_nf" VARCHAR, "surname_pcode" VARCHAR, "md5sum" VARCHAR); +CREATE TABLE "company_name" ("id" INTEGER, "name" VARCHAR, "country_code" VARCHAR, "imdb_id" INTEGER, "name_pcode_nf" VARCHAR, "name_pcode_sf" VARCHAR, "md5sum" VARCHAR); +CREATE TABLE "keyword" ("id" INTEGER, "keyword" VARCHAR, "phonetic_code" VARCHAR); +CREATE TABLE "name" ("id" INTEGER, "name" VARCHAR, "imdb_index" VARCHAR, "imdb_id" INTEGER, "gender" VARCHAR, "name_pcode_cf" VARCHAR, "name_pcode_nf" VARCHAR, "surname_pcode" VARCHAR, "md5sum" VARCHAR); +CREATE TABLE "comp_cast_type" ("id" INTEGER, "kind" VARCHAR); +CREATE TABLE "company_type" ("id" INTEGER, "kind" VARCHAR); +CREATE TABLE "info_type" ("id" INTEGER, "info" VARCHAR); +CREATE TABLE "kind_type" ("id" INTEGER, "kind" VARCHAR); +CREATE TABLE "link_type" ("id" INTEGER, "link" VARCHAR); +CREATE TABLE "role_type" ("id" INTEGER, "role" VARCHAR); +CREATE TABLE "title" ("id" INTEGER, "title" VARCHAR, "imdb_index" VARCHAR, "kind_id" INTEGER, "production_year" INTEGER, "imdb_id" INTEGER, "phonetic_code" VARCHAR, "episode_of_id" INTEGER, "season_nr" INTEGER, "episode_nr" INTEGER, "series_years" VARCHAR, "md5sum" VARCHAR); +CREATE TABLE "aka_name" ("id" INTEGER, "person_id" INTEGER, "name" VARCHAR, "imdb_index" VARCHAR, "name_pcode_cf" VARCHAR, "name_pcode_nf" VARCHAR, "surname_pcode" VARCHAR, "md5sum" VARCHAR); +CREATE TABLE "aka_title" ("id" INTEGER, "movie_id" INTEGER, "title" VARCHAR, "imdb_index" VARCHAR, "kind_id" INTEGER, "production_year" INTEGER, "phonetic_code" VARCHAR, "episode_of_id" INTEGER, "season_nr" INTEGER, "episode_nr" INTEGER, "note" VARCHAR, "md5sum" VARCHAR); +CREATE TABLE "cast_info" ("id" INTEGER, "person_id" INTEGER, "movie_id" INTEGER, "person_role_id" INTEGER, "note" VARCHAR, "nr_order" INTEGER, "role_id" INTEGER); +CREATE TABLE "complete_cast" ("id" INTEGER, "movie_id" INTEGER, "subject_id" INTEGER, "status_id" INTEGER); +CREATE TABLE "movie_companies" ("id" INTEGER, "movie_id" INTEGER, "company_id" INTEGER, "company_type_id" INTEGER, "note" VARCHAR); +CREATE TABLE "movie_info" ("id" INTEGER, "movie_id" INTEGER, "info_type_id" INTEGER, "info" VARCHAR, "note" VARCHAR); +CREATE TABLE "movie_info_idx" ("id" INTEGER, "movie_id" INTEGER, "info_type_id" INTEGER, "info" VARCHAR, "note" VARCHAR); +CREATE TABLE "movie_keyword" ("id" INTEGER, "movie_id" INTEGER, "keyword_id" INTEGER); +CREATE TABLE "movie_link" ("id" INTEGER, "movie_id" INTEGER, "linked_movie_id" INTEGER, "link_type_id" INTEGER); +CREATE TABLE "person_info" ("id" INTEGER, "person_id" INTEGER, "info_type_id" INTEGER, "info" VARCHAR, "note" VARCHAR); +"#, + tables: &[ + ("char_name", "char_name"), + ("company_name", "company_name"), + ("keyword", "keyword"), + ("name", "name"), + ("comp_cast_type", "comp_cast_type"), + ("company_type", "company_type"), + ("info_type", "info_type"), + ("kind_type", "kind_type"), + ("link_type", "link_type"), + ("role_type", "role_type"), + ("title", "title"), + ("aka_name", "aka_name"), + ("aka_title", "aka_title"), + ("cast_info", "cast_info"), + ("complete_cast", "complete_cast"), + ("movie_companies", "movie_companies"), + ("movie_info", "movie_info"), + ("movie_info_idx", "movie_info_idx"), + ("movie_keyword", "movie_keyword"), + ("movie_link", "movie_link"), + ("person_info", "person_info"), + ], + extra_copy_opts: "ESCAPE '\\', QUOTE '\"', ignore_errors true", +}; + +/// Table names per origin, in the same order as the corresponding +/// `OriginData::tables` output column. Single source of truth shared by +/// [`table_specs`] (used by `SqlstormBenchmark`) and +/// `BenchmarkDataset::tables()` (the registration layer). +pub fn table_names(origin: SqlstormOrigin) -> &'static [&'static str] { + match origin { + SqlstormOrigin::TpcH => &[ + "customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier", + ], + SqlstormOrigin::TpcDs => &[ + "call_center", + "catalog_page", + "catalog_returns", + "catalog_sales", + "customer", + "customer_address", + "customer_demographics", + "date_dim", + "household_demographics", + "income_band", + "inventory", + "item", + "promotion", + "reason", + "ship_mode", + "store", + "store_returns", + "store_sales", + "time_dim", + "warehouse", + "web_page", + "web_returns", + "web_sales", + "web_site", + ], + SqlstormOrigin::StackOverflow => &[ + "posthistorytypes", + "linktypes", + "posttypes", + "closereasontypes", + "votetypes", + "users", + "badges", + "posts", + "comments", + "posthistory", + "postlinks", + "tags", + "votes", + ], + SqlstormOrigin::Job => &[ + "char_name", + "company_name", + "keyword", + "name", + "comp_cast_type", + "company_type", + "info_type", + "kind_type", + "link_type", + "role_type", + "title", + "aka_name", + "aka_title", + "cast_info", + "complete_cast", + "movie_companies", + "movie_info", + "movie_info_idx", + "movie_keyword", + "movie_link", + "person_info", + ], + } +} + +/// Table specs for an origin (schema inferred at registration time — `None`). +pub fn table_specs(origin: SqlstormOrigin) -> Vec { + table_names(origin) + .iter() + .map(|n| TableSpec::new(n, None)) + .collect() +} + +/// Download `cfg.url`, extract the archive, and convert each table to a +/// Parquet shard under `/parquet/`. Idempotent via a `.success` +/// marker written after the DuckDB script returns 0. +/// +/// Only runs for `file://` data URLs; remote dirs are assumed to already +/// contain the Parquet shards. +pub async fn generate_origin(data_url: &Url, cfg: &OriginData) -> anyhow::Result<()> { + if data_url.scheme() != "file" { + return Ok(()); + } + + let base_dir = data_url.to_file_path().map_err(|_| { + anyhow::anyhow!( + "Failed to convert data URL to filesystem path — ensure data_url uses 'file://' scheme" + ) + })?; + + let parquet_dir = base_dir.join(Format::Parquet.name()); + fs::create_dir_all(&parquet_dir)?; + + let success_marker = parquet_dir.join(".success"); + if success_marker.exists() { + info!( + "{}: base data already generated ({} present)", + cfg.log_name, + success_marker.display(), + ); + return Ok(()); + } + + let archive_path = download_data(base_dir.join(cfg.archive_name), cfg.url).await?; + let csv_dir = extract_archive(&archive_path, &base_dir, &cfg.archive)?; + let script = build_duckdb_script(&csv_dir, &parquet_dir, cfg); + + let output = Command::new("duckdb").arg("-c").arg(&script).output()?; + if !output.status.success() { + bail!( + "duckdb {} COPY failed:\nstdout={}\nstderr={}", + cfg.log_name, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + } + + fs::write(&success_marker, b"")?; + info!( + "{} base data generated in {} ({} Parquet shards)", + cfg.log_name, + parquet_dir.display(), + cfg.tables.len(), + ); + Ok(()) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Helpers +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Extract `archive_path` into `target_dir` and return the directory that +/// holds the resulting CSVs (either `target_dir` itself or a single +/// top-level subdirectory if the archive wraps its contents). +fn extract_archive( + archive_path: &Path, + target_dir: &Path, + archive: &Archive, +) -> anyhow::Result { + info!( + "Extracting {} into {}", + archive_path.display(), + target_dir.display() + ); + let output = match archive { + Archive::TarGz => Command::new("tar") + .arg("-xzf") + .arg(archive_path) + .arg("--directory") + .arg(target_dir) + .output() + .context("failed to spawn tar; ensure it is on PATH")?, + // `tar` alone cannot read .tzst, so we pipe via shell. + Archive::TarZst => Command::new("bash") + .arg("-c") + .arg(format!( + "zstd -dc '{}' | tar -xf - -C '{}'", + archive_path.display(), + target_dir.display(), + )) + .output() + .context( + "failed to spawn bash for zstd/tar extraction; ensure zstd and tar are on PATH", + )?, + }; + if !output.status.success() { + bail!( + "archive extraction failed:\nstdout={}\nstderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + } + let csv_dir = locate_csv_dir(target_dir)?; + info!("CSVs located at {}", csv_dir.display()); + Ok(csv_dir) +} + +/// Locate the directory holding the extracted CSV files: `target_dir` itself +/// if it has any `.csv` files, else its single subdirectory. +fn locate_csv_dir(target_dir: &Path) -> anyhow::Result { + if has_csv(target_dir)? { + return Ok(target_dir.to_owned()); + } + for entry in + fs::read_dir(target_dir).with_context(|| format!("reading {}", target_dir.display()))? + { + let entry = entry?; + let path = entry.path(); + if path.is_dir() && has_csv(&path)? { + return Ok(path); + } + } + bail!( + "no CSV files found in {} after extraction; verify the archive contents", + target_dir.display() + ) +} + +fn has_csv(dir: &Path) -> anyhow::Result { + for entry in + fs::read_dir(dir).with_context(|| format!("reading directory {}", dir.display()))? + { + let entry = entry?; + if entry + .path() + .extension() + .and_then(|e| e.to_str()) + .is_some_and(|e| e.eq_ignore_ascii_case("csv")) + { + return Ok(true); + } + } + Ok(false) +} + +/// Build the DuckDB SQL script: inline DDL, then for each table COPY the CSV +/// in and COPY out to Parquet. The DDL is inlined (not `.read`-ed) because +/// `duckdb -c` does not accept dot-commands. +fn build_duckdb_script(csv_dir: &Path, parquet_dir: &Path, cfg: &OriginData) -> String { + let mut script = cfg.ddl.to_string(); + let extra = if cfg.extra_copy_opts.is_empty() { + String::new() + } else { + format!(", {}", cfg.extra_copy_opts) + }; + for (upstream, output) in cfg.tables { + let csv_path = csv_dir.join(format!("{upstream}.csv")); + script.push_str(&format!( + "COPY \"{upstream}\" FROM '{csv}' (FORMAT csv, DELIMITER ',', NULL '', HEADER false{extra});\n", + csv = csv_path.display(), + )); + let out_path = parquet_dir.join(format!("{output}.parquet")); + script.push_str(&format!( + "COPY (SELECT * FROM \"{upstream}\") TO '{out}' (FORMAT PARQUET);\n", + out = out_path.display(), + )); + } + script +} + +#[cfg(test)] +mod tests { + use super::*; + + /// The `(upstream, output)` pairs in each origin's `tables` must agree + /// with the lowercase output names returned by `table_names(origin)`. + /// Without this guard a future edit could rename one side without the + /// other; both registration (`BenchmarkDataset::tables()`) and data-gen + /// (`generate_origin`) would silently disagree. + #[test] + fn origin_data_tables_match_table_names() { + for (origin, cfg) in [ + (SqlstormOrigin::StackOverflow, &STACKOVERFLOW), + (SqlstormOrigin::Job, &JOB), + ] { + let names = table_names(origin); + let outputs: Vec<&str> = cfg.tables.iter().map(|(_, out)| *out).collect(); + assert_eq!( + outputs.as_slice(), + names, + "{} tables out of sync with table_names()", + cfg.log_name, + ); + } + } + + /// StackOverflow is pinned to the `math` (12 GB) tier, not `dba`. + #[test] + fn stackoverflow_uses_math_tier() { + assert!( + STACKOVERFLOW.url.ends_with("stackoverflow_math.tar.gz"), + "url={}", + STACKOVERFLOW.url + ); + assert_eq!(STACKOVERFLOW.archive_name, "stackoverflow_math.tar.gz"); + } + + /// Every upstream name in `tables` must have a matching `CREATE TABLE ""` + /// in the origin's DDL, and vice versa. A drift here (a renamed table on one + /// side only) would otherwise surface only as a DuckDB COPY failure during + /// nightly data-gen, never in CI. + #[test] + fn origin_data_ddl_tables_match_copy_tables() { + for cfg in [&STACKOVERFLOW, &JOB] { + let mut ddl_tables: Vec<&str> = cfg + .ddl + .split("CREATE TABLE \"") + .skip(1) + .map(|rest| &rest[..rest.find('"').expect("unterminated CREATE TABLE name")]) + .collect(); + ddl_tables.sort_unstable(); + let mut copy_tables: Vec<&str> = + cfg.tables.iter().map(|(upstream, _)| *upstream).collect(); + copy_tables.sort_unstable(); + assert_eq!( + ddl_tables, copy_tables, + "{} DDL CREATE TABLE names disagree with tables[].0", + cfg.log_name, + ); + } + } +} diff --git a/vortex-bench/src/sqlstorm/mod.rs b/vortex-bench/src/sqlstorm/mod.rs new file mode 100644 index 00000000000..364ff75ad45 --- /dev/null +++ b/vortex-bench/src/sqlstorm/mod.rs @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! SQLStorm benchmark: a TPC-DS-shaped suite over a vendored set of 125 SQLStorm +//! queries per origin (500 total), curated as the intersection of queries that +//! succeed on DuckDB and DataFusion against the source data. See +//! `vortex-bench/sqlstorm/README.md` for layout, refresh procedure, and the +//! pinned upstream SHA. + +use std::fs; +use std::path::Path; +use std::str::FromStr; + +pub mod data; +pub mod sqlstorm_benchmark; + +pub use sqlstorm_benchmark::SqlstormBenchmark; + +/// The four SQLStorm base datasets ("origins"). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SqlstormOrigin { + StackOverflow, + Job, + TpcH, + TpcDs, +} + +impl SqlstormOrigin { + /// Stable lowercase identifier; also the vendored-queries subdirectory name. + pub fn name(self) -> &'static str { + match self { + SqlstormOrigin::StackOverflow => "stackoverflow", + SqlstormOrigin::Job => "job", + SqlstormOrigin::TpcH => "tpch", + SqlstormOrigin::TpcDs => "tpcds", + } + } + + /// Parse an origin from its `name()` string. Returns `None` for unknown names. + pub fn from_name(name: &str) -> Option { + match name { + "stackoverflow" => Some(SqlstormOrigin::StackOverflow), + "job" => Some(SqlstormOrigin::Job), + "tpch" => Some(SqlstormOrigin::TpcH), + "tpcds" => Some(SqlstormOrigin::TpcDs), + _ => None, + } + } +} + +impl FromStr for SqlstormOrigin { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + Self::from_name(s).ok_or_else(|| { + anyhow::anyhow!( + "unknown sqlstorm origin: {s:?}; valid values are stackoverflow, job, tpch, tpcds" + ) + }) + } +} + +/// Load the vendored, confirmed-working queries for an origin from +/// `vortex-bench/sqlstorm//*.sql`, sorted by query id for stable ordering. +pub fn sqlstorm_queries(origin: SqlstormOrigin) -> anyhow::Result> { + let dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("sqlstorm") + .join(origin.name()); + let mut entries: Vec<(usize, String)> = Vec::new(); + for entry in + fs::read_dir(&dir).map_err(|e| anyhow::anyhow!("reading {}: {e}", dir.display()))? + { + let path = entry?.path(); + if path.extension().and_then(|e| e.to_str()) != Some("sql") { + continue; + } + let id: usize = path + .file_stem() + .and_then(|s| s.to_str()) + .and_then(|s| s.parse().ok()) + .ok_or_else(|| anyhow::anyhow!("non-numeric query file name: {}", path.display()))?; + entries.push((id, fs::read_to_string(&path)?)); + } + entries.sort_by_key(|(id, _)| *id); + Ok(entries) +} diff --git a/vortex-bench/src/sqlstorm/sqlstorm_benchmark.rs b/vortex-bench/src/sqlstorm/sqlstorm_benchmark.rs new file mode 100644 index 00000000000..c5c4de094ef --- /dev/null +++ b/vortex-bench/src/sqlstorm/sqlstorm_benchmark.rs @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! SQLStorm `Benchmark` implementation, parameterized by origin. + +use anyhow::Result; +use anyhow::anyhow; +use glob::Pattern; +use url::Url; + +use crate::Benchmark; +use crate::BenchmarkDataset; +use crate::Format; +use crate::IdempotentPath; +use crate::TableSpec; +use crate::sqlstorm::SqlstormOrigin; +use crate::sqlstorm::data; +use crate::sqlstorm::sqlstorm_queries; +use crate::tpcds::TpcDsBenchmark; +use crate::tpch::benchmark::TpcHBenchmark; + +/// Fixed TPC scale factor for the SQLStorm TPC-H / TPC-DS origins. SQLStorm has +/// no user-facing scale factor (see `vortex-bench/sqlstorm/README.md`); this is +/// the single fixed point those two origins run at. +const SQLSTORM_TPC_SCALE_FACTOR: &str = "10.0"; + +/// SQLStorm benchmark over one origin's vendored query sample. +pub struct SqlstormBenchmark { + origin: SqlstormOrigin, + data_url: Url, +} + +impl SqlstormBenchmark { + /// Create a benchmark for `origin`, resolving its data directory (or a remote override). + pub fn new(origin: SqlstormOrigin, use_remote_data_dir: Option) -> Result { + Ok(Self { + origin, + data_url: Self::create_data_url(use_remote_data_dir.as_deref(), origin)?, + }) + } + + /// Resolve the base data URL for `origin`. + /// + /// TPC-H and TPC-DS use dedicated datasets under + /// `/`, generated by SQLStorm at that fixed + /// scale, separate from the standalone benchmarks' SF1 data. StackOverflow + /// and JOB live under `sqlstorm//`, mirroring the vendored-queries + /// layout in `vortex-bench/sqlstorm//`. + fn create_data_url(remote_data_dir: Option<&str>, origin: SqlstormOrigin) -> Result { + if let Some(remote) = remote_data_dir { + let mut url = Url::parse(remote)?; + if !url.path().ends_with('/') { + url.set_path(&format!("{}/", url.path())); + } + return Ok(url); + } + let dir = match origin { + SqlstormOrigin::TpcH => "tpch".to_data_path().join(SQLSTORM_TPC_SCALE_FACTOR), + SqlstormOrigin::TpcDs => "tpcds".to_data_path().join(SQLSTORM_TPC_SCALE_FACTOR), + SqlstormOrigin::StackOverflow | SqlstormOrigin::Job => { + "sqlstorm".to_data_path().join(origin.name()) + } + }; + Url::from_directory_path(&dir) + .map_err(|_| anyhow!("Failed to create URL from directory path: {:?}", dir)) + } +} + +#[async_trait::async_trait] +impl Benchmark for SqlstormBenchmark { + fn queries(&self) -> Result> { + sqlstorm_queries(self.origin) + } + + async fn generate_base_data(&self) -> Result<()> { + match self.origin { + SqlstormOrigin::TpcH => { + TpcHBenchmark::new(SQLSTORM_TPC_SCALE_FACTOR.to_string(), None)? + .generate_base_data() + .await + } + SqlstormOrigin::TpcDs => { + TpcDsBenchmark::new(SQLSTORM_TPC_SCALE_FACTOR.to_string(), None)? + .generate_base_data() + .await + } + SqlstormOrigin::StackOverflow => { + data::generate_origin(&self.data_url, &data::STACKOVERFLOW).await + } + SqlstormOrigin::Job => data::generate_origin(&self.data_url, &data::JOB).await, + } + } + + fn dataset(&self) -> BenchmarkDataset { + BenchmarkDataset::Sqlstorm { + origin: self.origin.name().to_string(), + } + } + + fn dataset_name(&self) -> &str { + "sqlstorm" + } + + fn dataset_display(&self) -> String { + format!("sqlstorm({})", self.origin.name()) + } + + fn data_url(&self) -> &Url { + &self.data_url + } + + fn table_specs(&self) -> Vec { + data::table_specs(self.origin) + } + + #[expect(clippy::expect_used)] + fn pattern(&self, table_name: &str, format: Format) -> Option { + // Match each origin's on-disk layout: the reused TPC-H dataset shards large + // tables as `
_.parquet` (mirroring `TpcHBenchmark`), while TPC-DS and + // our single-file StackOverflow/JOB exports use `
.`. + let glob = match self.origin { + SqlstormOrigin::TpcH => format!("{}_*.{}", table_name, format.ext()), + _ => format!("{}.{}", table_name, format.ext()), + }; + Some(glob.parse().expect("valid glob pattern")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::file::data_dir; + + /// Default-data-dir resolution must match the layout the CI harness + /// expects: TPC-H / TPC-DS use dedicated per-dataset SF directories + /// (at `SQLSTORM_TPC_SCALE_FACTOR`); StackOverflow and JOB land under a + /// shared `sqlstorm//` parent + /// that mirrors the in-tree vendored-queries layout + /// (`vortex-bench/sqlstorm//`). The nightly matrix in + /// `.github/workflows/nightly-bench.yml` is the consumer of this contract. + #[test] + fn data_url_layout_per_origin() -> Result<()> { + let cases = [ + ( + SqlstormOrigin::TpcH, + data_dir().join("tpch").join(SQLSTORM_TPC_SCALE_FACTOR), + ), + ( + SqlstormOrigin::TpcDs, + data_dir().join("tpcds").join(SQLSTORM_TPC_SCALE_FACTOR), + ), + ( + SqlstormOrigin::StackOverflow, + data_dir().join("sqlstorm").join("stackoverflow"), + ), + (SqlstormOrigin::Job, data_dir().join("sqlstorm").join("job")), + ]; + for (origin, expected) in cases { + let bench = SqlstormBenchmark::new(origin, None)?; + let got = bench + .data_url() + .to_file_path() + .map_err(|_| anyhow!("data_url not a file URL for {origin:?}"))?; + assert_eq!(got, expected, "data_url mismatch for {origin:?}"); + } + Ok(()) + } + + /// Remote `--opt remote-data-dir=…` overrides take precedence over the + /// local layout for every origin and end up trailing-slash-terminated + /// (the runner builds `//` paths off this URL). + #[test] + fn remote_data_dir_overrides_all_origins() -> Result<()> { + for origin in [ + SqlstormOrigin::TpcH, + SqlstormOrigin::TpcDs, + SqlstormOrigin::StackOverflow, + SqlstormOrigin::Job, + ] { + let bench = SqlstormBenchmark::new( + origin, + Some("s3://vortex-bench-dev-eu/parquet".to_string()), + )?; + assert_eq!( + bench.data_url().as_str(), + "s3://vortex-bench-dev-eu/parquet/" + ); + } + Ok(()) + } +} diff --git a/vortex-bench/src/v3.rs b/vortex-bench/src/v3.rs index 7e7a3e2a553..d46c791bc2b 100644 --- a/vortex-bench/src/v3.rs +++ b/vortex-bench/src/v3.rs @@ -294,6 +294,7 @@ fn canonical_tpc_scale_factor(scale_factor: &str) -> String { /// | `GhArchive` | `gharchive` | `None` | `None` | | /// | `Appian` | `appian` | `None` | `None` | Static dataset; no scale factor. | /// | `PublicBi { name }` | `public-bi` | dataset name (e.g. `cms-provider`) | `None` | Sub-dataset name lives in `dataset_variant`. | +/// | `Sqlstorm { origin }` | `sqlstorm` | origin name (e.g. `tpch`) | `None` | Origin lives in `dataset_variant`. | pub fn benchmark_dataset_dims(d: &BenchmarkDataset) -> (String, Option, Option) { match d { BenchmarkDataset::TpcH { scale_factor } => ( @@ -322,6 +323,9 @@ pub fn benchmark_dataset_dims(d: &BenchmarkDataset) -> (String, Option, BenchmarkDataset::PolarSignals { .. } => ("polarsignals".to_string(), None, None), BenchmarkDataset::Fineweb => ("fineweb".to_string(), None, None), BenchmarkDataset::GhArchive => ("gharchive".to_string(), None, None), + BenchmarkDataset::Sqlstorm { origin } => { + ("sqlstorm".to_string(), Some(origin.clone()), None) + } BenchmarkDataset::Appian => ("appian".to_string(), None, None), } } @@ -724,6 +728,14 @@ mod tests { assert_eq!(variant, None, "dataset_variant for {case:?}"); assert_eq!(sf, None, "scale_factor for {case:?}"); } + + // Sqlstorm: origin goes in dataset_variant. + let (ds, variant, sf) = benchmark_dataset_dims(&BenchmarkDataset::Sqlstorm { + origin: "tpch".to_string(), + }); + assert_eq!(ds, "sqlstorm"); + assert_eq!(variant, Some("tpch".to_string())); + assert_eq!(sf, None); } #[test]