diff --git a/duckdb-vortex-partitioned/benchmark.sh b/duckdb-vortex-partitioned/benchmark.sh index 8bd613314..f70cfaa77 100755 --- a/duckdb-vortex-partitioned/benchmark.sh +++ b/duckdb-vortex-partitioned/benchmark.sh @@ -1,47 +1,34 @@ #!/bin/bash +set -Eeuo pipefail + # Install -sudo apt-get update -y -sudo apt-get install -y ninja-build cmake build-essential make ccache pip clang pkg-config - -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --no-modify-path - -export CC=clang -export CXX=clang++ -git clone https://github.com/vortex-data/duckdb-vortex --recursive -cd duckdb-vortex -git fetch --tags -git checkout v0.44.0 -git submodule update --init --recursive -GEN=ninja NATIVE_ARCH=1 LTO=thin make -export PATH="`pwd`/build/release/:$PATH" -cd .. +export HOME=${HOME:=~} +curl https://install.duckdb.org | sh +export PATH=$HOME'/.duckdb/cli/latest':$PATH + +duckdb -c "INSTALL vortex;" # Load the data seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue --progress=dot:giga https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet' -# Convert parquet files to vortex partitioned +# Convert parquet files to Vortex echo -n "Load time: " seq 0 99 | command time -f '%e' xargs -P"$(nproc)" -I{} bash -c ' - if [ ! -f "hits_{}.vortex" ]; then - duckdb -c " - COPY ( - SELECT * - REPLACE ( - make_date(EventDate) AS EventDate, - epoch_ms(EventTime * 1000) as EventTime - ) - FROM read_parquet('"'"'hits_{}.parquet'"'"', binary_as_string=True) - ) - TO '"'"'hits_{}.vortex'"'"' (FORMAT VORTEX) - " - fi + duckdb -c " + LOAD vortex; + COPY ( + SELECT * + FROM read_parquet('"'"'hits_{}.parquet'"'"', binary_as_string=True) + ) + TO '"'"'hits_{}.vortex'"'"' (FORMAT vortex); + " ' +# Create view and macro echo -n "Load time: " -command time -f '%e' duckdb hits-partitioned.db -c "CREATE VIEW hits AS SELECT * FROM read_vortex('hits_*.vortex')"; +command time -f '%e' duckdb hits-partitioned.db -f create.sql -# Run the queries echo 'partitioned' ./run.sh 'hits-partitioned.db' 2>&1 | tee log-p.txt diff --git a/duckdb-vortex-partitioned/create.sql b/duckdb-vortex-partitioned/create.sql new file mode 100644 index 000000000..0c9223714 --- /dev/null +++ b/duckdb-vortex-partitioned/create.sql @@ -0,0 +1,8 @@ +LOAD vortex; + +CREATE VIEW hits AS +SELECT * + REPLACE (make_date(EventDate) AS EventDate) +FROM read_vortex('hits_*.vortex'); + +CREATE MACRO toDateTime(t) AS epoch_ms(t * 1000); diff --git a/duckdb-vortex-partitioned/queries.sql b/duckdb-vortex-partitioned/queries.sql index b4115ee3a..ceebb80d7 100644 --- a/duckdb-vortex-partitioned/queries.sql +++ b/duckdb-vortex-partitioned/queries.sql @@ -16,7 +16,7 @@ SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, extract(minute FROM toDateTime(EventTime)) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; SELECT UserID FROM hits WHERE UserID = 435090932899640449; SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; @@ -40,4 +40,4 @@ SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate > SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; +SELECT DATE_TRUNC('minute', toDateTime(EventTime)) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', toDateTime(EventTime)) ORDER BY DATE_TRUNC('minute', toDateTime(EventTime)) LIMIT 10 OFFSET 1000; diff --git a/duckdb-vortex-partitioned/results/c6a.4xlarge.json b/duckdb-vortex-partitioned/results/c6a.4xlarge.json index 9c1d37cda..5e9e57855 100644 --- a/duckdb-vortex-partitioned/results/c6a.4xlarge.json +++ b/duckdb-vortex-partitioned/results/c6a.4xlarge.json @@ -1,61 +1,58 @@ { "system": "DuckDB (Vortex, partitioned)", - "date": "2025-08-06", + "date": "2026-05-01", "machine": "c6a.4xlarge", "cluster_size": 1, "proprietary": "no", "hardware": "cpu", "tuned": "no", "comment": "", - "tags": ["Rust", "column-oriented", "embedded", "stateless"], - - "load_time": 742.26, - "data_size": 15961049404, - + "load_time": 93.89, + "data_size": 16071424448, "result": [ - [0.184,0.013,0.003], - [0.523,0.014,0.014], - [1.610,0.035,0.035], - [3.435,0.049,0.052], - [3.466,0.329,0.332], - [4.172,0.297,0.292], - [0.181,0.022,0.020], - [0.567,0.019,0.018], - [4.334,0.415,0.405], - [4.319,0.561,0.558], - [2.784,0.097,0.091], - [3.485,0.124,0.107], - [4.721,0.307,0.316], - [7.135,0.672,0.675], - [4.478,0.343,0.342], - [2.783,0.392,0.387], - [7.061,0.852,0.847], - [6.856,0.740,0.628], - [9.200,1.517,1.505], - [1.971,0.038,0.031], - [33.849,0.556,0.530], - [36.486,0.679,0.636], - [40.129,1.065,1.072], - [7.566,0.392,0.382], - [1.880,0.122,0.062], - [4.896,0.096,0.098], - [1.791,0.126,0.031], - [34.787,0.863,0.790], - [28.059,9.317,9.314], - [0.717,0.033,0.033], - [7.835,0.279,0.317], - [13.900,0.420,0.403], - [10.751,1.919,1.892], - [34.222,2.047,1.980], - [34.208,2.288,2.131], - [1.861,0.511,0.506], - [0.258,0.025,0.024], - [0.840,0.012,0.021], - [1.098,0.024,0.018], - [1.265,0.063,0.053], - [0.815,0.022,0.008], - [0.889,0.010,0.010], - [0.833,0.032,0.012] + [0.047,0.007,0.007], + [0.058,0.021,0.021], + [0.104,0.037,0.036], + [1.055,0.055,0.055], + [1.617,0.285,0.282], + [1.575,0.331,0.327], + [0.053,0.018,0.017], + [0.069,0.025,0.025], + [1.438,0.437,0.434], + [1.865,0.597,0.587], + [0.921,0.125,0.122], + [1.440,0.143,0.137], + [1.906,0.368,0.364], + [3.443,0.721,0.713], + [1.668,0.410,0.404], + [1.030,0.365,0.362], + [3.402,0.863,0.860], + [3.167,0.618,0.613], + [4.845,1.504,1.484], + [0.453,0.041,0.032], + [17.413,0.741,0.699], + [18.787,0.756,0.748], + [25.755,1.186,1.047], + [29.199,3.292,2.460], + [0.135,0.047,0.049], + [2.083,0.132,0.119], + [0.493,0.036,0.088], + [17.434,0.804,0.783], + [14.171,4.728,4.721], + [0.102,0.039,0.037], + [3.196,0.337,0.336], + [6.389,0.504,0.505], + [5.108,1.712,1.715], + [17.288,1.932,1.906], + [17.283,2.033,2.055], + [0.690,0.579,0.577], + [0.106,0.034,0.037], + [0.072,0.024,0.022], + [0.089,0.030,0.024], + [0.168,0.068,0.057], + [0.083,0.018,0.018], + [0.070,0.017,0.017], + [0.082,0.023,0.022] ] } diff --git a/duckdb-vortex-partitioned/run.sh b/duckdb-vortex-partitioned/run.sh index 71bd5c4a5..30484964b 100755 --- a/duckdb-vortex-partitioned/run.sh +++ b/duckdb-vortex-partitioned/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -Eeuo pipefail + TRIES=3 cat queries.sql | while read -r query; do @@ -9,6 +11,8 @@ cat queries.sql | while read -r query; do echo "$query"; cli_params=() cli_params+=("-c") + cli_params+=("LOAD vortex;") + cli_params+=("-c") cli_params+=(".timer on") for i in $(seq 1 $TRIES); do cli_params+=("-c") diff --git a/duckdb-vortex/results/c6a.4xlarge.json b/duckdb-vortex/results/c6a.4xlarge.json index 9510cdafa..512212c04 100644 --- a/duckdb-vortex/results/c6a.4xlarge.json +++ b/duckdb-vortex/results/c6a.4xlarge.json @@ -1,6 +1,6 @@ { "system": "DuckDB (Vortex, single)", - "date": "2026-01-26", + "date": "2026-05-01", "machine": "c6a.4xlarge", "cluster_size": 1, "proprietary": "no", @@ -8,51 +8,51 @@ "tuned": "no", "comment": "", "tags": ["Rust", "column-oriented", "embedded", "stateless"], - "load_time": 85.49, - "data_size": 15973777724, + "load_time": 138.61, + "data_size": 15731820628, "result": [ - [0.045,0.010,0.010], - [0.059,0.017,0.017], - [0.089,0.034,0.031], - [1.028,0.045,0.046], - [1.581,0.228,0.231], - [1.533,0.325,0.322], - [0.046,0.010,0.009], - [0.068,0.023,0.022], - [1.358,0.397,0.397], - [1.672,0.531,0.535], - [0.844,0.107,0.108], - [1.397,0.128,0.127], - [1.854,0.343,0.344], - [3.187,0.809,0.823], - [1.416,0.514,0.512], - [0.948,0.332,0.336], - [3.154,1.536,1.516], - [2.874,1.293,1.313], - [4.454,2.336,2.352], - [0.307,0.033,0.031], - [14.204,0.705,0.692], - [15.012,0.735,0.720], - [15.512,0.899,0.893], - [19.282,5.246,7.634], - [0.148,0.066,0.059], - [1.608,0.131,0.128], - [0.271,0.061,0.061], - [14.155,0.737,0.740], - [10.870,4.797,4.816], - [0.088,0.033,0.031], - [2.961,0.347,0.348], - [6.042,0.475,0.472], - [5.044,1.858,1.860], - [14.017,2.177,2.078], - [14.009,2.901,2.884], - [0.640,0.535,0.542], - [0.191,0.143,0.145], - [0.118,0.090,0.106], - [0.158,0.156,0.135], - [0.389,0.381,0.375], - [0.084,0.036,0.039], - [0.072,0.024,0.023], - [0.071,0.025,0.021] + [0.058,0.012,0.012], + [0.074,0.015,0.015], + [0.108,0.035,0.033], + [0.882,0.062,0.058], + [1.551,0.296,0.277], + [1.455,0.297,0.335], + [0.056,0.009,0.009], + [0.085,0.018,0.018], + [1.409,0.447,0.437], + [1.760,0.602,0.594], + [0.836,0.119,0.117], + [1.354,0.142,0.136], + [1.854,0.355,0.352], + [3.272,0.729,0.730], + [1.553,0.406,0.406], + [0.955,0.356,0.348], + [3.205,0.861,0.866], + [2.955,0.607,0.611], + [4.613,1.502,1.496], + [0.447,0.042,0.036], + [17.118,0.764,0.741], + [18.552,0.875,0.838], + [22.598,1.233,1.199], + [42.321,8.631,8.121], + [0.284,0.072,0.064], + [1.701,0.170,0.145], + [0.364,0.077,0.065], + [17.098,0.823,0.781], + [13.919,4.710,4.729], + [0.117,0.034,0.034], + [2.929,0.351,0.349], + [6.030,0.517,0.530], + [4.896,1.728,1.722], + [16.989,1.982,1.985], + [16.996,2.102,2.099], + [0.694,0.571,0.572], + [0.340,0.147,0.149], + [0.277,0.111,0.093], + [0.234,0.159,0.166], + [0.549,0.373,0.226], + [0.118,0.076,0.054], + [0.091,0.022,0.022], + [0.096,0.021,0.025] ] }