docker-test-gpu #157

Workflow file for this run

.github/workflows/test-docker-gpu.yaml at 27cfeb2

	name: docker-test-gpu

	on:
	workflow_dispatch:

	permissions: read-all

	jobs:
	test:
	runs-on: self-hosted
	timeout-minutes: 240
	strategy:
	fail-fast: false
	max-parallel: 1
	matrix:
	backend:
	- llama-cuda
	- exllama2-gptq
	- exllama2-exl2
	- diffusers
	# - mamba
	steps:
	- name: cleanup workspace
	run: \|
	rm -rf ./* \|\| true
	rm -rf ./.??* \|\| true
	- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

	# use default docker driver builder with containerd image store for local aikit image
	# these must be setup before running this test
	- run: docker buildx use default

	- name: build aikit
	run: \|
	docker buildx build . -t aikit:test \
	--load --provenance=false --progress plain

	- name: build test model
	run: \|
	docker buildx build . -t testmodel:test \
	-f test/aikitfile-${{ matrix.backend }}.yaml \
	--load --provenance=false --progress plain

	- name: list images
	run: docker images

	- name: run test model
	run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test

	- name: run test (gguf)
	if: matrix.backend == 'llama-cuda'
	run: \|
	result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
	"model": "llama-3.2-1b-instruct",
	"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
	}')
	echo $result

	choices=$(echo "$result" \| jq '.choices')
	if [ -z "$choices" ]; then
	exit 1
	fi

	- name: run test (exl2/gptq)
	if: matrix.backend == 'exllama2-gptq' \|\| matrix.backend == 'exllama2-exl2'
	run: \|
	result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
	"model": "llama-2-7b-chat",
	"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
	}')
	echo $result

	choices=$(echo "$result" \| jq '.choices')
	if [ -z "$choices" ]; then
	exit 1
	fi

	- name: run test (mamba)
	if: matrix.backend == 'mamba'
	run: \|
	result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
	"model": "mamba-chat",
	"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
	}')
	echo $result

	choices=$(echo "$result" \| jq '.choices')
	if [ -z "$choices" ]; then
	exit 1
	fi

	- name: run test (diffusers)
	if: matrix.backend == 'diffusers'
	run: \|
	result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
	"model": "dreamshaper",
	"prompt": "A cute baby llama",
	"size": "256x256"
	}')
	echo $result

	url=$(echo "$result" \| jq '.data[0].url')
	if [ -z "$url" ]; then
	exit 1
	fi

	- name: save generated image
	if: matrix.backend == 'diffusers'
	run: docker cp testmodel:/tmp/generated/images /tmp

	- name: save logs
	if: always()
	run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log

	- run: docker stop testmodel
	if: always()

	- run: docker system prune -a -f --volumes \|\| true
	if: always()

	- name: publish test artifacts
	if: always()
	uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
	with:
	name: test-${{ matrix.backend }}
	path: \|
	/tmp/*.log
	/tmp/images/*.png

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

docker-test-gpu #157

Workflow file

docker-test-gpu #157

Jobs

Run details

Workflow file for this run