Skip to content

Commit fc0773c

Browse files
authored
Merge pull request #1 from runpod-workers/feat/AE-998-lb-uc
feat: AE-998 add a load balancer worker code
2 parents 527a3da + 447f018 commit fc0773c

File tree

10 files changed

+962
-0
lines changed

10 files changed

+962
-0
lines changed

.gitignore

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/#use-with-ide
110+
.pdm.toml
111+
112+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113+
__pypackages__/
114+
115+
# Celery stuff
116+
celerybeat-schedule
117+
celerybeat.pid
118+
119+
# SageMath parsed files
120+
*.sage.py
121+
122+
# Environments
123+
.env
124+
.venv
125+
env/
126+
venv/
127+
ENV/
128+
env.bak/
129+
venv.bak/
130+
131+
# Spyder project settings
132+
.spyderproject
133+
.spyproject
134+
135+
# Rope project settings
136+
.ropeproject
137+
138+
# mkdocs documentation
139+
/site
140+
141+
# mypy
142+
.mypy_cache/
143+
.dmypy.json
144+
dmypy.json
145+
146+
# Pyre type checker
147+
.pyre/
148+
149+
# pytype static type analyzer
150+
.pytype/
151+
152+
# Cython debug symbols
153+
cython_debug/
154+
155+
# PyCharm
156+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158+
# and can be added to the global gitignore or merged into this file. For a more nuclear
159+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160+
#.idea/

Dockerfile

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
FROM nvidia/cuda:12.1.0-base-ubuntu22.04
2+
3+
RUN apt-get update -y \
4+
&& apt-get install -y python3-pip
5+
6+
RUN ldconfig /usr/local/cuda-12.1/compat/
7+
8+
# Install Python dependencies
9+
COPY builder/requirements.txt /requirements.txt
10+
RUN --mount=type=cache,target=/root/.cache/pip \
11+
python3 -m pip install --upgrade pip && \
12+
python3 -m pip install --upgrade -r /requirements.txt
13+
14+
# Pin vLLM version for stability - 0.9.1 is latest stable as of 2024-07
15+
# FlashInfer provides optimized attention for better performance
16+
ARG VLLM_VERSION=0.9.1
17+
ARG CUDA_VERSION=cu121
18+
ARG TORCH_VERSION=torch2.3
19+
20+
RUN python3 -m pip install vllm==${VLLM_VERSION} && \
21+
python3 -m pip install flashinfer -i https://flashinfer.ai/whl/${CUDA_VERSION}/${TORCH_VERSION}
22+
23+
ENV PYTHONPATH="/:/vllm-workspace"
24+
25+
COPY src /src
26+
27+
WORKDIR /src
28+
29+
CMD ["python3", "handler.py"]

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 runpod-workers
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# vLLM Load Balancer
2+
3+
A FastAPI-based load balancer for serving vLLM models with RunPod integration. Provides OpenAI-compatible APIs with streaming and non-streaming text generation.
4+
5+
## Prerequisites
6+
7+
Before you begin, make sure you have:
8+
9+
- A RunPod account (sign up at [runpod.io](https://runpod.io))
10+
- RunPod API key (available in your RunPod dashboard)
11+
- Basic understanding of REST APIs and HTTP requests
12+
- `curl` or a similar tool for testing API endpoints
13+
14+
## Docker Image
15+
16+
Use the pre-built Docker image: `runpod/vllm-loadbalancer:dev`
17+
18+
## Environment Variables
19+
20+
Configure these environment variables in your RunPod endpoint:
21+
22+
| Variable | Required | Description | Default | Example |
23+
|----------|----------|-------------|---------|---------|
24+
| `MODEL_NAME` | **Yes** | HuggingFace model identifier | None | `microsoft/DialoGPT-medium` |
25+
| `TENSOR_PARALLEL_SIZE` | No | Number of GPUs for model parallelism | `1` | `2` |
26+
| `DTYPE` | No | Model precision type | `auto` | `float16` |
27+
| `TRUST_REMOTE_CODE` | No | Allow remote code execution | `true` | `false` |
28+
| `MAX_MODEL_LEN` | No | Maximum sequence length | None (auto) | `2048` |
29+
| `GPU_MEMORY_UTILIZATION` | No | GPU memory usage ratio | `0.9` | `0.8` |
30+
| `ENFORCE_EAGER` | No | Disable CUDA graphs | `false` | `true` |
31+
32+
## Deployment on RunPod
33+
34+
1. Create a new serverless endpoint
35+
2. Use Docker image: `runpod/vllm-loadbalancer:dev`
36+
3. Set required environment variable: `MODEL_NAME` (e.g., "microsoft/DialoGPT-medium")
37+
4. Optional: Configure additional environment variables as needed
38+
39+
## API Usage with curl
40+
41+
### Text Completion (Non-streaming)
42+
43+
```bash
44+
curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/completions" \
45+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
46+
-H "Content-Type: application/json" \
47+
-d '{
48+
"prompt": "Write a story about a brave knight",
49+
"max_tokens": 100,
50+
"temperature": 0.7,
51+
"stream": false
52+
}'
53+
```
54+
55+
### Text Completion (Streaming)
56+
57+
```bash
58+
curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/completions" \
59+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
60+
-H "Content-Type: application/json" \
61+
-d '{
62+
"prompt": "Tell me about artificial intelligence",
63+
"max_tokens": 200,
64+
"temperature": 0.8,
65+
"stream": true
66+
}'
67+
```
68+
69+
### Chat Completions
70+
71+
```bash
72+
curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/chat/completions" \
73+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
74+
-H "Content-Type: application/json" \
75+
-d '{
76+
"messages": [
77+
{"role": "user", "content": "What is the capital of France?"}
78+
],
79+
"max_tokens": 50,
80+
"temperature": 0.7
81+
}'
82+
```
83+
84+
### Health Check
85+
86+
```bash
87+
curl -X GET "https://your-endpoint-id.api.runpod.ai/ping" \
88+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY"
89+
```
90+
91+
## Local Testing
92+
93+
Run the test script:
94+
```bash
95+
export ENDPOINT_ID="your-endpoint-id"
96+
export RUNPOD_API_KEY="your-api-key"
97+
python example.py
98+
```

builder/requirements.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Required Python packages get listed here, one per line.
2+
# Reccomended to lock the version number to avoid unexpected changes.
3+
4+
# You can also install packages from a git repository, e.g.:
5+
# git+https://github.com/runpod/runpod-python.git
6+
# To learn more, see https://pip.pypa.io/en/stable/reference/requirements-file-format/
7+
8+
ray
9+
pandas
10+
pyarrow
11+
runpod~=1.7.0
12+
huggingface-hub
13+
packaging
14+
typing-extensions==4.7.1
15+
pydantic
16+
pydantic-settings
17+
hf-transfer
18+
transformers

builder/setup.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
3+
# NOTE: This script is not run by default for the template docker image.
4+
# If you use a custom base image you can add your required system dependencies here.
5+
#
6+
# USAGE: This script can be used to install additional system packages or configurations:
7+
# - Jupyter kernels (R, Julia, Scala, etc.)
8+
# - Additional CUDA libraries or drivers
9+
# - System-level debugging tools (htop, nvtop, etc.)
10+
# - Custom compilers or build tools
11+
# - SSH keys or security configurations
12+
# - Custom Python versions or environments
13+
#
14+
# To use this script, uncomment the COPY and RUN commands in the Dockerfile:
15+
# COPY builder/setup.sh /setup.sh
16+
# RUN chmod +x /setup.sh && /setup.sh
17+
18+
set -e # Stop script on error
19+
apt-get update && apt-get upgrade -y # Update System
20+
21+
# Install System Dependencies
22+
# - openssh-server: for ssh access and web terminal
23+
apt-get install -y --no-install-recommends software-properties-common curl git openssh-server
24+
25+
# Install Python 3.10
26+
add-apt-repository ppa:deadsnakes/ppa -y
27+
apt-get update && apt-get install -y --no-install-recommends python3.10 python3.10-dev python3.10-distutils
28+
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
29+
30+
# Install pip for Python 3.10
31+
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
32+
python3 get-pip.py
33+
34+
# Clean up, remove unnecessary packages and help reduce image size
35+
apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*

0 commit comments

Comments
 (0)