diff --git a/byconity/benchmark.sh b/byconity/benchmark.sh index af59bdff2..a7c00995a 100755 --- a/byconity/benchmark.sh +++ b/byconity/benchmark.sh @@ -36,6 +36,10 @@ byconity --database bench --query "INSERT INTO hits FORMAT TSV" < hits.tsv END=$(date +%s) echo "Load time: $(echo "$END - $START" | bc)" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv hits.tsv.gz + # NOTE: sometimes may hung due to docker-compose, using docker directly may help ./run.sh diff --git a/cedardb/benchmark.sh b/cedardb/benchmark.sh index 530be23dc..1cc728ed6 100755 --- a/cedardb/benchmark.sh +++ b/cedardb/benchmark.sh @@ -37,6 +37,10 @@ PGPASSWORD=test command time -f '%e' psql -h localhost -U postgres -q -t -c "COP echo -n "Data size: " PGPASSWORD=test psql -h localhost -U postgres -q -t -c "SELECT pg_total_relation_size('hits');" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f data/hits.tsv + # run benchmark echo "running benchmark..." ./run.sh 2>&1 | tee log.txt diff --git a/chdb/benchmark.sh b/chdb/benchmark.sh index 2f5daed52..532d29ef0 100755 --- a/chdb/benchmark.sh +++ b/chdb/benchmark.sh @@ -16,6 +16,10 @@ pigz -d -f hits.csv.gz echo -n "Load time: " command time -f '%e' ./load.py +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.csv + # Run the queries ./run.sh 2>&1 | tee log.txt diff --git a/citus/benchmark.sh b/citus/benchmark.sh index d23b3bf64..4d88ecda6 100755 --- a/citus/benchmark.sh +++ b/citus/benchmark.sh @@ -26,6 +26,10 @@ command time -f '%e' psql -U postgres -h localhost -d postgres test -q -t -c "\\ # COPY 99997497 # Time: 1579203.482 ms (26:19.203) +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/clickhouse/benchmark.sh b/clickhouse/benchmark.sh index 34c19b4f0..edb355852 100755 --- a/clickhouse/benchmark.sh +++ b/clickhouse/benchmark.sh @@ -44,6 +44,10 @@ sudo chown clickhouse:clickhouse /var/lib/clickhouse/user_files/hits_*.parquet echo -n "Load time: " clickhouse-client --time --query "INSERT INTO hits SELECT * FROM file('hits_*.parquet')" --max-insert-threads $(( $(nproc) / 4 )) +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +sudo rm -f /var/lib/clickhouse/user_files/hits_*.parquet + # Run the queries ./run.sh "$1" diff --git a/cloudberry/benchmark.sh b/cloudberry/benchmark.sh index 4271c6030..cda1d9fa1 100755 --- a/cloudberry/benchmark.sh +++ b/cloudberry/benchmark.sh @@ -120,6 +120,9 @@ elif [[ $1 == 'test' ]]; then if [[ $2 != 'no_dl' ]]; then echo -n "Load time: " command time -f '%e' sudo -iu gpadmin psql -d postgres -t -c "ANALYZE hits;"; fi du -sh /data0* + # Drop the downloaded source files so the sync at the top of run.sh + # doesn't flush their pages and inflate cold-run prep time. + sudo -u gpadmin rm -f ~gpadmin/hits.tsv ~gpadmin/hits.tsv.gz sudo -iu gpadmin /home/gpadmin/run.sh 2>&1 | tee log.txt cat log.txt | grep -oP 'Time: \d+\.\d+ ms|psql: error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/; s/^.*psql: error.*$/null/' |awk '{ if (i % 3 == 0) { printf "[" }; if ($1 == "null") { printf $1 } else { printf $1 / 1000 }; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/cockroachdb/benchmark.sh b/cockroachdb/benchmark.sh index 4951f6cfe..ca714c1c9 100755 --- a/cockroachdb/benchmark.sh +++ b/cockroachdb/benchmark.sh @@ -33,6 +33,10 @@ cockroach sql --insecure --host=localhost --database=test --execute="IMPORT INTO END=$(date +%s) echo "Load time: $(echo "$END - $START" | bc)" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +sudo rm -f /tmp/hits.csv.gz $CRDBDATADIR/extern/hits.csv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/cratedb/benchmark.sh b/cratedb/benchmark.sh index 7d5307b29..011f981e1 100755 --- a/cratedb/benchmark.sh +++ b/cratedb/benchmark.sh @@ -68,6 +68,10 @@ if [[ $MODE == "tuned" ]]; then psql -U crate -h localhost --no-password -t -c "REFRESH TABLE hits; OPTIMIZE TABLE hits;" fi; +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +sudo rm -f /tmp/hits.tsv + # Some queries don't fit into the available heap space and raise an CircuitBreakingException ./run.sh "$MODE" 2>&1 | tee log.txt diff --git a/databend/benchmark.sh b/databend/benchmark.sh index 6fedd2978..29d5dc783 100755 --- a/databend/benchmark.sh +++ b/databend/benchmark.sh @@ -73,4 +73,8 @@ du -bcs _data | grep total # curl 'http://default@localhost:8124/' --data-binary "select humanize_size(bytes_compressed) from fuse_snapshot('default', 'hits') order by timestamp desc limit 1" # 18.48 GiB +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee log.txt diff --git a/doris/benchmark.sh b/doris/benchmark.sh index b57738670..5ccc0a07f 100755 --- a/doris/benchmark.sh +++ b/doris/benchmark.sh @@ -228,6 +228,10 @@ echo "$LOADTIME" > loadtime du -bs "$DORIS_HOME"/be/storage/ | cut -f1 | tee storage_size echo "Data size: $(cat storage_size)" +# Drop the downloaded source files so the per-query sync below +# doesn't flush their pages and inflate cold-run prep time. +rm -f "$BE_DATA_DIR/user_files_secure"/hits_*.parquet + mysql -h 127.0.0.1 -P9030 -uroot hits -e "set global enable_sql_cache = false" # Dataset contains 99997497 rows, storage size is about 13319588503 bytes mysql -h 127.0.0.1 -P9030 -uroot hits -e "SELECT count(*) FROM hits" diff --git a/druid/benchmark.sh b/druid/benchmark.sh index 6a3139784..72c2fea66 100755 --- a/druid/benchmark.sh +++ b/druid/benchmark.sh @@ -36,6 +36,10 @@ command time -f '%e' ./apache-druid-${VERSION}/bin/post-index-task --file ingest # The command above will fail due to timeout but still continue to run in background. # The loading time should be checked from the logs. +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + # Run the queries ./run.sh diff --git a/duckdb-vortex/benchmark.sh b/duckdb-vortex/benchmark.sh index cd25a6409..f8100ae6c 100755 --- a/duckdb-vortex/benchmark.sh +++ b/duckdb-vortex/benchmark.sh @@ -22,6 +22,10 @@ command time -f '%e' duckdb hits-single.db -f create.sql echo 'single' +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.parquet + ./run.sh 'hits-single.db' 2>&1 | tee log-s.txt cat log-s.txt | grep -P '^\d|Killed|Segmentation|^Run Time \(s\): real' | diff --git a/duckdb/benchmark.sh b/duckdb/benchmark.sh index 7042b968f..5cf30bb75 100755 --- a/duckdb/benchmark.sh +++ b/duckdb/benchmark.sh @@ -11,6 +11,10 @@ wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compat echo -n "Load time: " command time -f '%e' duckdb hits.db -storage_version latest -f create.sql -f load.sql +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.parquet + # Run the queries ./run.sh 2>&1 | tee log.txt diff --git a/elasticsearch/benchmark.sh b/elasticsearch/benchmark.sh index e50248e12..f7445e644 100755 --- a/elasticsearch/benchmark.sh +++ b/elasticsearch/benchmark.sh @@ -76,5 +76,9 @@ echo "Data size: $(jq -r '._all.total.store.total_data_set_size_in_bytes' stats. END=$(date +%s) echo "Load time: $(echo "$END - $START" | bc)" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.json.gz + ###### Run the queries ./run.sh diff --git a/greenplum/benchmark.sh b/greenplum/benchmark.sh index ec5a52604..503b966dd 100755 --- a/greenplum/benchmark.sh +++ b/greenplum/benchmark.sh @@ -78,5 +78,8 @@ command time -f '%e' psql -d postgres -t -c "insert into hits select * from hits echo -n "Load time: " command time -f '%e' psql -d postgres -t -c "ANALYZE hits;" du -sh /gpdata* +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv ./run.sh 2>&1 | tee log.txt cat log.txt | grep -oP 'Time: \d+\.\d+ ms|psql: error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/; s/^.*psql: error.*$/null/' |awk '{ if (i % 3 == 0) { printf "[" }; if ($1 == "null") { printf $1 } else { printf $1 / 1000 }; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/heavyai/benchmark.sh b/heavyai/benchmark.sh index fe6c9758b..37a48ce2e 100755 --- a/heavyai/benchmark.sh +++ b/heavyai/benchmark.sh @@ -48,6 +48,10 @@ command time -f '%e' /opt/heavyai/bin/heavysql -q -t -p HyperInteractive <<< "CO # Loaded: 99997497 recs, Rejected: 0 recs in 572.633 secs +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.csv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/hologres/benchmark.sh b/hologres/benchmark.sh index 74a821111..741d15eea 100755 --- a/hologres/benchmark.sh +++ b/hologres/benchmark.sh @@ -56,6 +56,10 @@ for file in hits_part_*; do PGUSER=$PG_USER PGPASSWORD=$PG_PASSWORD command time -f '%e' psql -h $HOST_NAME -p $PORT -d $DATABASE -t -c "\\copy hits FROM '$file'" done +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv hits_part_* + # run clickbench test with queries echo "Starting to run queries..." diff --git a/hyper/benchmark.sh b/hyper/benchmark.sh index 20583879b..9f2bb55a9 100755 --- a/hyper/benchmark.sh +++ b/hyper/benchmark.sh @@ -13,6 +13,10 @@ pigz -d -f hits.csv.gz echo -n "Load time: " command time -f '%e' ./load.py +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.csv + ./run.sh | tee log.txt cat log.txt | diff --git a/infobright/benchmark.sh b/infobright/benchmark.sh index 9c0b72d21..77bd60020 100755 --- a/infobright/benchmark.sh +++ b/infobright/benchmark.sh @@ -32,6 +32,10 @@ sudo docker exec mysql_ib du -bcs /mnt/mysql_data/ /usr/local/infobright-4.0.7-x # 13 760 341 294 +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv hits90m.tsv + ./run.sh 2>&1 | tee log.txt cat log.txt | diff --git a/kinetica/benchmark.sh b/kinetica/benchmark.sh index 046fa0b30..8612d01c6 100755 --- a/kinetica/benchmark.sh +++ b/kinetica/benchmark.sh @@ -31,5 +31,9 @@ LOADTIME=$(echo "$END - $START" | bc) echo "Load time: $LOADTIME" echo "Data size: $(du -bcs ./kinetica-persist/gpudb | grep total)" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +sudo rm -f ./kinetica-persist/hits.tsv.gz + # run the queries ./run.sh diff --git a/mariadb-columnstore/benchmark.sh b/mariadb-columnstore/benchmark.sh index d050c705a..e0e6fe0b9 100755 --- a/mariadb-columnstore/benchmark.sh +++ b/mariadb-columnstore/benchmark.sh @@ -32,6 +32,10 @@ command time -f '%e' mysql --password="${PASSWORD}" --host 127.0.0.1 clickbench # 41m47.856s +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/mariadb/benchmark.sh b/mariadb/benchmark.sh index 3363dd20a..483763266 100755 --- a/mariadb/benchmark.sh +++ b/mariadb/benchmark.sh @@ -24,6 +24,10 @@ sudo mariadb test < create.sql echo -n "Load time: " command time -f '%e' split -l 10000 --filter="sudo mariadb test -e \"SET sql_log_bin = 0; LOAD DATA LOCAL INFILE '/dev/stdin' INTO TABLE hits;\"" hits.tsv +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/monetdb/benchmark.sh b/monetdb/benchmark.sh index 529851954..ec9e7bd5c 100755 --- a/monetdb/benchmark.sh +++ b/monetdb/benchmark.sh @@ -33,6 +33,10 @@ command time -f '%e' ./query.expect "COPY INTO hits FROM '$(pwd)/hits.tsv' USING # 99997497 affected rows # clk: 15:39 min +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/mysql-myisam/benchmark.sh b/mysql-myisam/benchmark.sh index 3c41141a4..a99bc4c5d 100755 --- a/mysql-myisam/benchmark.sh +++ b/mysql-myisam/benchmark.sh @@ -20,6 +20,10 @@ command time -f '%e' sudo mysql test -e "SET sql_log_bin = 0; LOAD DATA LOCAL IN # 41m8.979s +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/mysql/benchmark.sh b/mysql/benchmark.sh index 1a04d06bb..1d33a1827 100755 --- a/mysql/benchmark.sh +++ b/mysql/benchmark.sh @@ -20,6 +20,10 @@ command time -f '%e' sudo mysql test -e "SET sql_log_bin = 0; LOAD DATA LOCAL IN # 2:37:52 elapsed +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee log.txt echo -n "Data size: " diff --git a/oxla/benchmark.sh b/oxla/benchmark.sh index e9ad00536..a3add3c4c 100755 --- a/oxla/benchmark.sh +++ b/oxla/benchmark.sh @@ -38,6 +38,10 @@ PGPASSWORD=oxla command time -f '%e' psql -h localhost -U oxla -q -t -c "COPY hi echo -n "Data size: " PGPASSWORD=oxla psql -h localhost -U oxla -q -t -c "SELECT pg_total_relation_size('hits');" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +sudo rm -f data/hits.csv + # run benchmark echo "running benchmark..." ./run.sh diff --git a/pg_duckdb-indexed/benchmark.sh b/pg_duckdb-indexed/benchmark.sh index 608d29794..ed894caaa 100755 --- a/pg_duckdb-indexed/benchmark.sh +++ b/pg_duckdb-indexed/benchmark.sh @@ -74,6 +74,10 @@ fi echo -n "Load time: " command time -f '%e' ./load.sh +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + psql $CONNECTION -c "ALTER DATABASE postgres SET duckdb.force_execution = true;" ./run.sh 2>&1 | tee log.txt diff --git a/pg_duckdb/benchmark.sh b/pg_duckdb/benchmark.sh index 14704efbc..51acdf64c 100755 --- a/pg_duckdb/benchmark.sh +++ b/pg_duckdb/benchmark.sh @@ -74,6 +74,10 @@ fi echo -n "Load time: " command time -f '%e' ./load.sh +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + psql $CONNECTION -c "ALTER DATABASE postgres SET duckdb.force_execution = true;" ./run.sh 2>&1 | tee log.txt diff --git a/pgpro_tam/benchmark.sh b/pgpro_tam/benchmark.sh index 07fa462f7..cc28d02c4 100755 --- a/pgpro_tam/benchmark.sh +++ b/pgpro_tam/benchmark.sh @@ -60,6 +60,10 @@ else command time -f '%e' psql -h 127.0.0.1 -U postgres -t -c "COPY hits FROM '/tmp/hits.tsv'" fi +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +sudo docker exec pgpro_tam rm -f /tmp/hits.tsv + #run benchmark ./run.sh 2>&1 | tee log.txt diff --git a/pinot/benchmark.sh b/pinot/benchmark.sh index afe663f63..41fbd1c7f 100755 --- a/pinot/benchmark.sh +++ b/pinot/benchmark.sh @@ -38,6 +38,10 @@ command time -f '%e' ./apache-pinot-$PINOT_VERSION-bin/bin/pinot-admin.sh Launch # After upload it shows 94465149 rows instead of 99997497 in the dataset +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv parts*.tsv + # Run the queries ./run.sh diff --git a/selectdb/benchmark.sh b/selectdb/benchmark.sh index ca9a30d42..820a4a9fe 100755 --- a/selectdb/benchmark.sh +++ b/selectdb/benchmark.sh @@ -119,6 +119,10 @@ du -bs "$DORIS_HOME"/be/storage/ | cut -f1 | tee storage_size echo "Data size: $(cat storage_size)" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + # Run queries ./run.sh 2>&1 | tee -a log.txt diff --git a/starrocks/benchmark.sh b/starrocks/benchmark.sh index f6d73ddf7..2da562919 100755 --- a/starrocks/benchmark.sh +++ b/starrocks/benchmark.sh @@ -69,6 +69,10 @@ du -bcs StarRocks-${VERSION}/storage/ | grep total # Dataset contains 99997497 rows mysql -h 127.0.0.1 -P9030 -uroot hits -e "SELECT count(*) FROM hits" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f hits.tsv + ./run.sh 2>&1 | tee -a log.txt cat log.txt | diff --git a/umbra/benchmark.sh b/umbra/benchmark.sh index 73b434ec9..92550c1cf 100755 --- a/umbra/benchmark.sh +++ b/umbra/benchmark.sh @@ -34,6 +34,10 @@ then fi echo "Load time: $(( (end - start) / 1000 ))" +# Drop the downloaded source files so the sync at the top of run.sh +# doesn't flush their pages and inflate cold-run prep time. +rm -f data/hits.tsv + ./run.sh 2>&1 | tee log.txt # Calculate persistence size