Skip to content

Commit a9ccb35

Browse files
committed
Re-order Containerfile to improve cache hits
1 parent 0c28193 commit a9ccb35

File tree

2 files changed

+30
-19
lines changed

2 files changed

+30
-19
lines changed

Containerfile

+18-18
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,49 @@
11
FROM nvidia/cuda:11.7.0-devel-ubuntu22.04 as builder
22

3-
ENV NVIDIA_VISIBLE_DEVICES=all
4-
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
5-
6-
# If you are running something modern, reducing this to 8.6 will speed up build times slightly.
7-
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
8-
93
RUN apt-get update && \
104
apt-get install --no-install-recommends -y git ninja-build build-essential python3-dev python3-pip && \
115
rm -rf /var/lib/apt/lists/*
126

7+
RUN --mount=type=cache,target=/root/.cache/pip,Z pip3 install torch
138
RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa /build
149

1510
WORKDIR /build
1611

17-
# Fix until new parameter "groupsize" is figured out
18-
RUN git reset --hard 468c47c01b4fe370616747b6d69a2d3f48bab5e4
12+
ARG GPTQ_SHA=HEAD
13+
RUN git reset --hard ${GPTQ_SHA}
1914

20-
RUN --mount=type=cache,target=/root/.cache/pip,Z pip3 install torch
2115
RUN --mount=type=cache,target=/root/.cache/pip,Z pip3 install -r requirements.txt
16+
17+
ARG TORCH_CUDA_ARCH_LIST="8.6+PTX"
2218
RUN python3 setup_cuda.py bdist_wheel -d .
2319

2420
FROM ubuntu:22.04
2521

26-
ENV CLI_ARGS=""
27-
ENV NVIDIA_VISIBLE_DEVICES=all
28-
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
29-
30-
VOLUME /data
31-
VOLUME /output
32-
3322
RUN apt-get update && \
3423
apt-get install --no-install-recommends -y git python3 python3-pip && \
3524
rm -rf /var/lib/apt/lists/*
3625

26+
RUN --mount=type=cache,target=/root/.cache/pip,Z pip install torch torchvision torchaudio
27+
3728
RUN git clone https://github.com/oobabooga/text-generation-webui /app
3829

3930
WORKDIR /app
4031

41-
COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
32+
ARG WEBUI_SHA=HEAD
33+
RUN git reset --hard ${WEBUI_SHA}
4234

43-
RUN --mount=type=cache,target=/root/.cache/pip,Z pip install torch torchvision torchaudio
4435
RUN --mount=type=cache,target=/root/.cache/pip,Z pip install -r requirements.txt
36+
37+
COPY --from=builder /build /app/repositories/GPTQ-for-LLaMa
4538
RUN --mount=type=cache,target=/root/.cache/pip,Z pip install /app/repositories/GPTQ-for-LLaMa/*.whl
4639

4740
COPY entrypoint.sh .
41+
42+
VOLUME /data
43+
VOLUME /output
44+
45+
ENV CLI_ARGS=""
46+
ENV NVIDIA_VISIBLE_DEVICES=all
47+
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
4848
ENTRYPOINT ["/app/entrypoint.sh"]
4949
CMD python3 server.py ${CLI_ARGS}

podman-compose.yaml

+12-1
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,19 @@ version: "3.3"
22

33
services:
44
text-generation-webui:
5-
build: .
5+
build:
6+
context: .
7+
args:
8+
# Use HEAD instead of a sha hash to use the latest
9+
# GPTQ_SHA below is the last good commit for older models
10+
- GPTQ_SHA=468c47c01b4fe370616747b6d69a2d3f48bab5e4
11+
- WEBUI_SHA=HEAD
12+
# If you know which specific architecture your GPU is using,
13+
# specifying the exact TORCH_CUDA_ARCH_LIST version below can
14+
# speed up build times slightly.
15+
- "TORCH_CUDA_ARCH_LIST=7.0 7.5 8.0 8.6+PTX"
616
environment:
17+
# Feel free to customize CLI_ARGS. Below is an example for running 4bit llama.
718
- CLI_ARGS=--gptq-bits 4 --auto-devices --gpu-memory 8 --listen --no-stream --listen-port 7861 --extensions llama_prompts api sd_api_pictures --cai-chat --model llama-13b
819
# May be needed in some instances with Docker on machines with selinux
920
# privileged: true

0 commit comments

Comments
 (0)