-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathDockerfile_cuda
89 lines (74 loc) · 2.35 KB
/
Dockerfile_cuda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# syntax=docker/dockerfile:1
ARG CUDA_VERSION="12.6.3"
ARG UBUNTU_VERSION="24.04"
# Use NVIDIA CUDA base image for development
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS builder
# Install build dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
cmake \
git \
libcurl4-openssl-dev \
python3.12 \
python3.12-venv \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Set CUDA architecture
ARG CUDA_DOCKER_ARCH="86"
WORKDIR /build
RUN git clone https://github.com/ggerganov/llama.cpp
WORKDIR /build/llama.cpp
# Build with CUDA support
ARG CMAKE_ARGS=""
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build \
-DCMAKE_BUILD_TYPE=Release \
-DLLAMA_CUDA=ON \
-DLLAMA_CURL=ON \
-DGGML_CUDA=ON \
-DBUILD_SHARED_LIBS=ON \
${CMAKE_ARGS} \
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build -j$(nproc)
# Copy built files
RUN mkdir -p build/all_libs && \
find build -name "*.so*" -exec cp {} build/all_libs/ \; || true
# Runtime stage
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
RUN apt-get update && \
apt-get install -y --no-install-recommends \
python3.12 \
python3.12-dev \
python3.12-venv \
python3-pluggy \
libcurl4-openssl-dev \
libgomp1 \
build-essential \
curl \
pngquant \
tesseract-ocr-deu \
ocrmypdf \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
COPY --from=builder /build/llama.cpp/build/all_libs/* /usr/local/lib/
RUN ldconfig /usr/local/lib
WORKDIR /app
ENV PYTHONPATH=/app/.venv/lib/python3.12/site-packages
ENV PATH="/root/.local/bin:$PATH"
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
uv venv && \
. .venv/bin/activate && \
uv pip install wheel setuptools
COPY requirements.txt .
RUN . .venv/bin/activate && \
uv pip install -r requirements.txt
COPY . .
ENV LLAMA_ARG_HOST=0.0.0.0
ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
EXPOSE 5000
CMD [".venv/bin/python", "app.py", "--server_path", "/usr/local/bin/llama-server", "--model_path", "/models"]