Skip to content

Commit

Permalink
100 file variant
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamGS committed Nov 27, 2024
1 parent 157a6a0 commit cb3265a
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 22 deletions.
16 changes: 0 additions & 16 deletions .github/workflows/bench-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/cleanup
- name: Configure AWS Credentials for benchmark data
uses: aws-actions/configure-aws-credentials@v4
if: matrix.benchmark.id == 'clickbench'
with:
audience: sts.amazonaws.com
aws-region: us-west-2
role-to-assume: arn:aws:iam::375504701696:role/vortex-benchmark
- uses: ./.github/actions/setup-rust
- uses: spiraldb/actions/.github/actions/[email protected]

Expand All @@ -63,11 +56,6 @@ jobs:
run: |
echo "TMPDIR=/work" >> $GITHUB_ENV
- name: Download Clickbench data
if: matrix.benchmark.id == 'clickbench'
run:
aws s3 cp s3://vortex-bench-dev/clickbench/processed.parquet bench-vortex/data/clickbench/

- name: Run benchmark
shell: bash
env:
Expand All @@ -94,10 +82,6 @@ jobs:
| jq --slurp --compact-output '.' >${{ matrix.benchmark.id }}.json
cat ${{ matrix.benchmark.id }}.json
- name: Cleanup Raw Data
if: always()
run:
rm -rf bench-vortex/data/
- name: Store benchmark result
if: '!cancelled()'
uses: benchmark-action/github-action-benchmark@v1
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/benches/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fn benchmark(c: &mut Criterion) {
epoch_ms(ClientEventTime * 1000) AS ClientEventTime, \
epoch_ms(LocalEventTime * 1000) AS LocalEventTime, \
DATE '1970-01-01' + INTERVAL (EventDate) DAYS AS EventDate) \
FROM read_parquet('https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{idx}.parquet')) TO '{}' (FORMAT 'parquet');",
FROM read_parquet('https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{idx}.parquet', binary_as_string=True)) TO '{}' (FORMAT 'parquet');",
output_path.to_str().unwrap()
);
Command::new("duckdb")
Expand Down
2 changes: 1 addition & 1 deletion bench-vortex/src/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ pub async fn register_vortex_file(
let vortex_dir = input_path.parent().unwrap().join("vortex_compressed");
create_dir_all(&vortex_dir).await?;

for idx in 0..1 {
for idx in 0..100 {
let parquet_file_path = input_path.join(format!("hits_{idx}.parquet"));
let output_path = vortex_dir.join(format!("hits_{idx}.{VORTEX_FILE_EXTENSION}"));
idempotent_async(&output_path, |vtx_file| async move {
Expand Down
6 changes: 2 additions & 4 deletions bench-vortex/src/tpch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ use vortex_datafusion::memory::VortexMemTableOptions;
use vortex_datafusion::persistent::format::VortexFormat;
use vortex_datafusion::SessionContextExt;

use crate::clickbench::HITS_SCHEMA;
use crate::{idempotent_async, CTX, TARGET_BLOCK_BYTESIZE, TARGET_BLOCK_SIZE};

pub mod dbgen;
Expand Down Expand Up @@ -337,9 +336,8 @@ async fn register_vortex_file(
let table_url = ListingTableUrl::parse(vtx_file.to_str().unwrap())?;
let config = ListingTableConfig::new(table_url)
.with_listing_options(ListingOptions::new(format as _))
.with_schema(HITS_SCHEMA.clone().into());
// .infer_schema(&session.state())
// .await?;
.infer_schema(&session.state())
.await?;

let listing_table = Arc::new(ListingTable::try_new(config)?);

Expand Down

0 comments on commit cb3265a

Please sign in to comment.