-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
50 lines (38 loc) · 1.61 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Start with an ubuntu base image
FROM ubuntu:22.04 as base
# Update system and install necessary packages
RUN apt update -q && apt install -y ca-certificates wget && \
wget -qO /cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i /cuda-keyring.deb && apt update -q
# Builder image used to build and compile the llama server
FROM base as builder
# Install build essentials and specific CUDA development tools
RUN apt-get install -y --no-install-recommends \
git \
cuda-nvcc-12-2 \
libcublas-dev-12-2 \
libcurl4-openssl-dev
# Set the PATH to include the CUDA binaries
ENV PATH=$PATH:/usr/local/cuda/bin
# Clone the repository and build the application
RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp.git /llama.cpp \
&& cd /llama.cpp \
&& make -j$(nproc) LLAMA_CUDA=1 LLAMA_CURL=1 CUDA_DOCKER_ARCH=compute_86 llama-server
# Runtime image that will run the application
FROM base as runtime
WORKDIR /app
# Install runtime-specific CUDA packages, without development headers/libs
RUN apt-get install -y --no-install-recommends \
cuda-cudart-12-2 \
libcublas-12-2 \
libcurl4 \
libgomp1
# Copy built binary from the builder stage
COPY --from=builder /llama.cpp/llama-server /app/llama-server
# Add and configure the script that will start the application
COPY ./llama-server.sh /app/llama-server.sh
RUN chmod +x /app/llama-server.sh
# Include the CUDA library directory in the LD_LIBRARY_PATH
ENV LD_LIBRARY_PATh=/usr/local/cuda-12.2/lib64:${LD_LIBRARY_PATH}
# Command to run on container start
CMD ["/app/llama-server.sh"]