.github/workflows/bench-pr.yml

name: PR Benchmarks

on:
  pull_request:
    types: [ labeled, synchronize ]
    branches: [ "develop" ]
  workflow_dispatch: { }

permissions:
  actions: write
  contents: read
  pull-requests: write
  id-token: write

jobs:
  label_trigger:
    runs-on: ubuntu-latest
    if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
    steps:
      # We remove the benchmark label first so that the workflow can be re-triggered.
      - uses: actions-ecosystem/action-remove-labels@v1
        with:
          labels: benchmark
  bench:
    needs: label_trigger
    strategy:
      matrix:
        benchmark:
          - id: datafusion
            name: DataFusion
          - id: random_access
            name: Random Access
          - id: compress
            name: Vortex Compression
    runs-on: self-hosted
    if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/cleanup
      - uses: ./.github/actions/setup-rust
      - uses: spiraldb/actions/.github/actions/setup-uv@0.2.0

      # The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
      - name: Install DuckDB
        uses: opt-nc/setup-duckdb-action@v1.0.9
        if: runner.environment != 'self-hosted'
        with:
          version: v1.0.0

      - name: Set tempdir
        if: runner.environment == 'self-hosted'
        run: |
          echo "TMPDIR=/work" >> $GITHUB_ENV

      - name: Run benchmark
        shell: bash
        env:
          BENCH_VORTEX_RATIOS: '.*'
          RUSTFLAGS: '-C target-cpu=native'
        run: |
          cargo install cargo-criterion
          sudo apt-get update && sudo apt-get install -y jq

          cargo criterion \
                --bench ${{ matrix.benchmark.id }} \
                --message-format=json \
            > ${{ matrix.benchmark.id }}-raw.json

          cat ${{ matrix.benchmark.id }}-raw.json \
            | bash scripts/coerce-criterion-json.sh \
            > ${{ matrix.benchmark.id }}.json

      - name: Setup AWS CLI
        uses: aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
          aws-region: us-east-1
      - name: Compare results
        shell: bash
        run: |
          set -Eeu -o pipefail -x

          base_commit_sha=${{ github.event.pull_request.base.sha }}

          aws s3 cp s3://vortex-benchmark-results-database/data.json - \
            | grep $base_commit_sha \
            > base.json

          echo '# Benchmarks: ${{ matrix.benchmark.id }}' > comment.md
          echo '<details>' >> comment.md
          echo '<summary>Table of Results</summary>' >> comment.md
          echo '' >> comment.md
          uv run python3 scripts/compare-benchmark-jsons.py base.json ${{ matrix.benchmark.id }}.json \
            >> comment.md
          echo '</details>' >> comment.md
      - name: Comment PR
        uses: thollander/actions-comment-pull-request@v3
        with:
          file-path: comment.md
          comment-tag: bench-pr-comment-${{ matrix.benchmark.id }}
  tpch:
    needs: label_trigger
    runs-on: self-hosted
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/cleanup
      - uses: ./.github/actions/setup-rust
      - uses: spiraldb/actions/.github/actions/setup-uv@0.2.0

      # The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
      - name: Install DuckDB
        uses: opt-nc/setup-duckdb-action@v1.0.9
        if: runner.environment != 'self-hosted'
        with:
          version: v1.0.0

      - name: Set tempdir
        if: runner.environment == 'self-hosted'
        run: |
          echo "TMPDIR=/work" >> $GITHUB_ENV

      - name: Run TPC-H benchmark
        shell: bash
        env:
          BENCH_VORTEX_RATIOS: '.*'
          RUSTFLAGS: '-C target-cpu=native'
        run: |
          cargo run --bin tpch_benchmark --release -- -d gh-json -t 1 | tee tpch.json
      - name: Setup AWS CLI
        uses: aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
          aws-region: us-east-1
      - name: Compare results
        shell: bash
        run: |
          set -Eeu -o pipefail -x

          base_commit_sha=${{ github.event.pull_request.base.sha }}

          aws s3 cp s3://vortex-benchmark-results-database/data.json - \
            | grep $base_commit_sha \
            > base.json

          echo '# Benchmarks: TPC-H' > comment.md
          echo '<details>' >> comment.md
          echo '<summary>Table of Results</summary>' >> comment.md
          echo '' >> comment.md
          uv run python3 scripts/compare-benchmark-jsons.py base.json tpch.json \
            >> comment.md
          echo '</details>' >> comment.md
      - name: Comment PR
        uses: thollander/actions-comment-pull-request@v3
        with:
          file-path: comment.md
          comment-tag: bench-pr-comment-tpch
  clickbench:
    needs: label_trigger
    runs-on: self-hosted
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/cleanup
      - uses: ./.github/actions/setup-rust
      - uses: spiraldb/actions/.github/actions/setup-uv@0.2.0

      # The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
      - name: Install DuckDB
        uses: opt-nc/setup-duckdb-action@v1.0.9
        if: runner.environment != 'self-hosted'
        with:
          version: v1.0.0

      - name: Set tempdir
        if: runner.environment == 'self-hosted'
        run: |
          echo "TMPDIR=/work" >> $GITHUB_ENV

      - name: Run Clickbench benchmark
        shell: bash
        env:
          BENCH_VORTEX_RATIOS: '.*'
          RUSTFLAGS: '-C target-cpu=native'
          HOME: /home/ci-runner
          RUST_BACKTRACE: full
        run: |
          cargo run --bin clickbench --release -- -d gh-json | tee clickbench.json
      - name: Setup AWS CLI
        uses: aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
          aws-region: us-east-1
      - name: Compare results
        shell: bash
        run: |
          set -Eeu -o pipefail -x

          base_commit_sha=${{ github.event.pull_request.base.sha }}

          aws s3 cp s3://vortex-benchmark-results-database/data.json - \
            | grep $base_commit_sha \
            > base.json

          echo '# Benchmarks: Clickbench' > comment.md
          echo '<details>' >> comment.md
          echo '<summary>Table of Results</summary>' >> comment.md
          echo '' >> comment.md
          uv run python3 scripts/compare-benchmark-jsons.py base.json clickbench.json \
            >> comment.md
          echo '</details>' >> comment.md
      - name: Comment PR
        uses: thollander/actions-comment-pull-request@v3
        with:
          file-path: comment.md
          comment-tag: bench-pr-comment-clickbench