.github/workflows/test-docker-gpu.yaml

name: docker-test-gpu

on:
  workflow_dispatch:

permissions: read-all

jobs:
  test:
    runs-on: self-hosted
    timeout-minutes: 240
    strategy:
      fail-fast: false
      max-parallel: 1
      matrix:
        backend:
          - llama-cuda
          - exllama2-gptq
          - exllama2-exl2
          - diffusers
          # - mamba
    steps:
      - name: cleanup workspace
        run: |
          rm -rf ./* || true
          rm -rf ./.??* || true
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

        # use default docker driver builder with containerd image store for local aikit image
        # these must be setup before running this test
      - run: docker buildx use default

      - name: build aikit
        run: |
          docker buildx build . -t aikit:test \
            --load --provenance=false --progress plain

      - name: build test model
        run: |
          docker buildx build . -t testmodel:test \
            -f test/aikitfile-${{ matrix.backend }}.yaml \
            --load --provenance=false --progress plain

      - name: list images
        run: docker images

      - name: run test model
        run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test

      - name: run test (gguf)
        if: matrix.backend == 'llama-cuda'
        run: |
          result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
            "model": "llama-3.2-1b-instruct",
            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
          }')
          echo $result

          choices=$(echo "$result" | jq '.choices')
          if [ -z "$choices" ]; then
            exit 1
          fi

      - name: run test (exl2/gptq)
        if: matrix.backend == 'exllama2-gptq' || matrix.backend == 'exllama2-exl2'
        run: |
          result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
            "model": "llama-2-7b-chat",
            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
          }')
          echo $result

          choices=$(echo "$result" | jq '.choices')
          if [ -z "$choices" ]; then
            exit 1
          fi

      - name: run test (mamba)
        if: matrix.backend == 'mamba'
        run: |
          result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
            "model": "mamba-chat",
            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
          }')
          echo $result

          choices=$(echo "$result" | jq '.choices')
          if [ -z "$choices" ]; then
            exit 1
          fi

      - name: run test (diffusers)
        if: matrix.backend == 'diffusers'
        run: |
          result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
            "model": "dreamshaper",
            "prompt": "A cute baby llama",
            "size": "256x256"
          }')
          echo $result

          url=$(echo "$result" | jq '.data[0].url')
          if [ -z "$url" ]; then
            exit 1
          fi

      - name: save generated image
        if: matrix.backend == 'diffusers'
        run: docker cp testmodel:/tmp/generated/images /tmp

      - name: save logs
        if: always()
        run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log

      - run: docker stop testmodel
        if: always()

      - run: docker system prune -a -f --volumes || true
        if: always()

      - name: publish test artifacts
        if: always()
        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
        with:
          name: test-${{ matrix.backend }}
          path: |
            /tmp/*.log
            /tmp/images/*.png