Skip to content

Commit ca0689d

Browse files
committed
feat: AE-998 add a load balancer worker code
Signed-off-by: pandyamarut <[email protected]>
1 parent 527a3da commit ca0689d

File tree

8 files changed

+807
-0
lines changed

8 files changed

+807
-0
lines changed

.gitignore

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/#use-with-ide
110+
.pdm.toml
111+
112+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113+
__pypackages__/
114+
115+
# Celery stuff
116+
celerybeat-schedule
117+
celerybeat.pid
118+
119+
# SageMath parsed files
120+
*.sage.py
121+
122+
# Environments
123+
.env
124+
.venv
125+
env/
126+
venv/
127+
ENV/
128+
env.bak/
129+
venv.bak/
130+
131+
# Spyder project settings
132+
.spyderproject
133+
.spyproject
134+
135+
# Rope project settings
136+
.ropeproject
137+
138+
# mkdocs documentation
139+
/site
140+
141+
# mypy
142+
.mypy_cache/
143+
.dmypy.json
144+
dmypy.json
145+
146+
# Pyre type checker
147+
.pyre/
148+
149+
# pytype static type analyzer
150+
.pytype/
151+
152+
# Cython debug symbols
153+
cython_debug/
154+
155+
# PyCharm
156+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158+
# and can be added to the global gitignore or merged into this file. For a more nuclear
159+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160+
#.idea/

Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
FROM nvidia/cuda:12.1.0-base-ubuntu22.04
2+
3+
RUN apt-get update -y \
4+
&& apt-get install -y python3-pip
5+
6+
RUN ldconfig /usr/local/cuda-12.1/compat/
7+
8+
# Install Python dependencies
9+
COPY builder/requirements.txt /requirements.txt
10+
RUN --mount=type=cache,target=/root/.cache/pip \
11+
python3 -m pip install --upgrade pip && \
12+
python3 -m pip install --upgrade -r /requirements.txt
13+
14+
# Install vLLM (switching back to pip installs since issues that required building fork are fixed and space optimization is not as important since caching) and FlashInfer
15+
RUN python3 -m pip install vllm==0.9.1 && \
16+
python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3
17+
18+
# Setup for Option 2: Building the Image with the Model included
19+
20+
21+
ENV PYTHONPATH="/:/vllm-workspace"
22+
23+
24+
COPY src /src
25+
26+
# Start the handler
27+
CMD ["python3", "/src/handler.py"]

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 runpod-workers
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# vLLM Load Balancer
2+
3+
A FastAPI-based load balancer for serving vLLM models with RunPod integration. Provides OpenAI-compatible APIs with streaming and non-streaming text generation.
4+
5+
## Docker Image
6+
7+
Use the pre-built Docker image: `mwiki/lbvll:v1`
8+
9+
## Deployment on RunPod
10+
11+
1. Create a new serverless endpoint
12+
2. Use Docker image: `mwiki/lbvll:v4`
13+
3. Set environment variable: `MODEL_NAME` (e.g., "microsoft/DialoGPT-medium")
14+
15+
## API Usage with curl
16+
17+
### Text Completion (Non-streaming)
18+
19+
```bash
20+
curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/completions" \
21+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
22+
-H "Content-Type: application/json" \
23+
-d '{
24+
"prompt": "Write a story about a brave knight",
25+
"max_tokens": 100,
26+
"temperature": 0.7,
27+
"stream": false
28+
}'
29+
```
30+
31+
### Text Completion (Streaming)
32+
33+
```bash
34+
curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/completions" \
35+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
36+
-H "Content-Type: application/json" \
37+
-d '{
38+
"prompt": "Tell me about artificial intelligence",
39+
"max_tokens": 200,
40+
"temperature": 0.8,
41+
"stream": true
42+
}'
43+
```
44+
45+
### Chat Completions
46+
47+
```bash
48+
curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/chat/completions" \
49+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
50+
-H "Content-Type: application/json" \
51+
-d '{
52+
"messages": [
53+
{"role": "user", "content": "What is the capital of France?"}
54+
],
55+
"max_tokens": 50,
56+
"temperature": 0.7
57+
}'
58+
```
59+
60+
### Health Check
61+
62+
```bash
63+
curl -X GET "https://your-endpoint-id.api.runpod.ai/ping" \
64+
-H "Authorization: Bearer YOUR_RUNPOD_API_KEY"
65+
```
66+
67+
## Local Testing
68+
69+
Run the test script:
70+
```bash
71+
export ENDPOINT_ID="your-endpoint-id"
72+
export RUNPOD_API_KEY="your-api-key"
73+
python example.py
74+
```

builder/requirements.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Required Python packages get listed here, one per line.
2+
# Reccomended to lock the version number to avoid unexpected changes.
3+
4+
# You can also install packages from a git repository, e.g.:
5+
# git+https://github.com/runpod/runpod-python.git
6+
# To learn more, see https://pip.pypa.io/en/stable/reference/requirements-file-format/
7+
8+
ray
9+
pandas
10+
pyarrow
11+
runpod~=1.7.0
12+
huggingface-hub
13+
packaging
14+
typing-extensions==4.7.1
15+
pydantic
16+
pydantic-settings
17+
hf-transfer
18+
transformers

builder/setup.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
# NOTE: This script is not ran by default for the template docker image.
4+
# If you use a custom base image you can add your required system dependencies here.
5+
6+
set -e # Stop script on error
7+
apt-get update && apt-get upgrade -y # Update System
8+
9+
# Install System Dependencies
10+
# - openssh-server: for ssh access and web terminal
11+
apt-get install -y --no-install-recommends software-properties-common curl git openssh-server
12+
13+
# Install Python 3.10
14+
add-apt-repository ppa:deadsnakes/ppa -y
15+
apt-get update && apt-get install -y --no-install-recommends python3.10 python3.10-dev python3.10-distutils
16+
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
17+
18+
# Install pip for Python 3.10
19+
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
20+
python3 get-pip.py
21+
22+
# Clean up, remove unnecessary packages and help reduce image size
23+
apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*

0 commit comments

Comments
 (0)