-
Notifications
You must be signed in to change notification settings - Fork 31
128 lines (107 loc) · 3.95 KB
/
test-docker-gpu.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
name: docker-test-gpu
on:
workflow_dispatch:
permissions: read-all
jobs:
test:
runs-on: self-hosted
timeout-minutes: 240
strategy:
fail-fast: false
max-parallel: 1
matrix:
backend:
- llama-cuda
- exllama2-gptq
- exllama2-exl2
- diffusers
# - mamba
steps:
- name: cleanup workspace
run: |
rm -rf ./* || true
rm -rf ./.??* || true
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
# use default docker driver builder with containerd image store for local aikit image
# these must be setup before running this test
- run: docker buildx use default
- name: build aikit
run: |
docker buildx build . -t aikit:test \
--load --provenance=false --progress plain
- name: build test model
run: |
docker buildx build . -t testmodel:test \
-f test/aikitfile-${{ matrix.backend }}.yaml \
--load --provenance=false --progress plain
- name: list images
run: docker images
- name: run test model
run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test
- name: run test (gguf)
if: matrix.backend == 'llama-cuda'
run: |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-3.2-1b-instruct",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}')
echo $result
choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi
- name: run test (exl2/gptq)
if: matrix.backend == 'exllama2-gptq' || matrix.backend == 'exllama2-exl2'
run: |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-2-7b-chat",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}')
echo $result
choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi
- name: run test (mamba)
if: matrix.backend == 'mamba'
run: |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "mamba-chat",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}')
echo $result
choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi
- name: run test (diffusers)
if: matrix.backend == 'diffusers'
run: |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
"model": "dreamshaper",
"prompt": "A cute baby llama",
"size": "256x256"
}')
echo $result
url=$(echo "$result" | jq '.data[0].url')
if [ -z "$url" ]; then
exit 1
fi
- name: save generated image
if: matrix.backend == 'diffusers'
run: docker cp testmodel:/tmp/generated/images /tmp
- name: save logs
if: always()
run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log
- run: docker stop testmodel
if: always()
- run: docker system prune -a -f --volumes || true
if: always()
- name: publish test artifacts
if: always()
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: test-${{ matrix.backend }}
path: |
/tmp/*.log
/tmp/images/*.png