runpod-workers
diff --git a/‎.gitignore‎
Lines changed: 160 additions & 0 deletions b/‎.gitignore‎
Lines changed: 160 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 29 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 98 additions & 0 deletions b/‎README.md‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎builder/requirements.txt‎
Lines changed: 18 additions & 0 deletions b/‎builder/requirements.txt‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎builder/setup.sh‎
Lines changed: 35 additions & 0 deletions b/‎builder/setup.sh‎
Lines changed: 35 additions & 0 deletions
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
@@ -0,0 +1,29 @@
+FROM nvidia/cuda:12.1.0-base-ubuntu22.04 
+
+RUN apt-get update -y \
+    && apt-get install -y python3-pip
+
+RUN ldconfig /usr/local/cuda-12.1/compat/
+
+# Install Python dependencies
+COPY builder/requirements.txt /requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install --upgrade pip && \
+    python3 -m pip install --upgrade -r /requirements.txt
+
+# Pin vLLM version for stability - 0.9.1 is latest stable as of 2024-07
+# FlashInfer provides optimized attention for better performance
+ARG VLLM_VERSION=0.9.1
+ARG CUDA_VERSION=cu121
+ARG TORCH_VERSION=torch2.3
+
+RUN python3 -m pip install vllm==${VLLM_VERSION} && \
+    python3 -m pip install flashinfer -i https://flashinfer.ai/whl/${CUDA_VERSION}/${TORCH_VERSION}
+
+ENV PYTHONPATH="/:/vllm-workspace"
+
+COPY src /src
+
+WORKDIR /src
+
+CMD ["python3", "handler.py"]
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 runpod-workers
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,98 @@
+# vLLM Load Balancer
+
+A FastAPI-based load balancer for serving vLLM models with RunPod integration. Provides OpenAI-compatible APIs with streaming and non-streaming text generation.
+
+## Prerequisites
+
+Before you begin, make sure you have:
+
+- A RunPod account (sign up at [runpod.io](https://runpod.io))
+- RunPod API key (available in your RunPod dashboard)
+- Basic understanding of REST APIs and HTTP requests
+- `curl` or a similar tool for testing API endpoints
+
+## Docker Image
+
+Use the pre-built Docker image: `runpod/vllm-loadbalancer:dev`
+
+## Environment Variables
+
+Configure these environment variables in your RunPod endpoint:
+
+| Variable | Required | Description | Default | Example |
+|----------|----------|-------------|---------|---------|
+| `MODEL_NAME` | **Yes** | HuggingFace model identifier | None | `microsoft/DialoGPT-medium` |
+| `TENSOR_PARALLEL_SIZE` | No | Number of GPUs for model parallelism | `1` | `2` |
+| `DTYPE` | No | Model precision type | `auto` | `float16` |
+| `TRUST_REMOTE_CODE` | No | Allow remote code execution | `true` | `false` |
+| `MAX_MODEL_LEN` | No | Maximum sequence length | None (auto) | `2048` |
+| `GPU_MEMORY_UTILIZATION` | No | GPU memory usage ratio | `0.9` | `0.8` |
+| `ENFORCE_EAGER` | No | Disable CUDA graphs | `false` | `true` |
+
+## Deployment on RunPod
+
+1. Create a new serverless endpoint
+2. Use Docker image: `runpod/vllm-loadbalancer:dev`
+3. Set required environment variable: `MODEL_NAME` (e.g., "microsoft/DialoGPT-medium")
+4. Optional: Configure additional environment variables as needed
+
+## API Usage with curl
+
+### Text Completion (Non-streaming)
+
+```bash
+curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/completions" \
+  -H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "Write a story about a brave knight",
+    "max_tokens": 100,
+    "temperature": 0.7,
+    "stream": false
+  }'
+```
+
+### Text Completion (Streaming)
+
+```bash
+curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/completions" \
+  -H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "Tell me about artificial intelligence",
+    "max_tokens": 200,
+    "temperature": 0.8,
+    "stream": true
+  }'
+```
+
+### Chat Completions
+
+```bash
+curl -X POST "https://your-endpoint-id.api.runpod.ai/v1/chat/completions" \
+  -H "Authorization: Bearer YOUR_RUNPOD_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "messages": [
+      {"role": "user", "content": "What is the capital of France?"}
+    ],
+    "max_tokens": 50,
+    "temperature": 0.7
+  }'
+```
+
+### Health Check
+
+```bash
+curl -X GET "https://your-endpoint-id.api.runpod.ai/ping" \
+  -H "Authorization: Bearer YOUR_RUNPOD_API_KEY"
+```
+
+## Local Testing
+
+Run the test script:
+```bash
+export ENDPOINT_ID="your-endpoint-id"
+export RUNPOD_API_KEY="your-api-key"
+python example.py
+```
@@ -0,0 +1,18 @@
+# Required Python packages get listed here, one per line.
+# Reccomended to lock the version number to avoid unexpected changes.
+
+# You can also install packages from a git repository, e.g.:
+# git+https://github.com/runpod/runpod-python.git
+# To learn more, see https://pip.pypa.io/en/stable/reference/requirements-file-format/
+
+ray
+pandas
+pyarrow
+runpod~=1.7.0
+huggingface-hub
+packaging
+typing-extensions==4.7.1
+pydantic
+pydantic-settings
+hf-transfer
+transformers
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# NOTE: This script is not run by default for the template docker image.
+#       If you use a custom base image you can add your required system dependencies here.
+#
+# USAGE: This script can be used to install additional system packages or configurations:
+#   - Jupyter kernels (R, Julia, Scala, etc.)
+#   - Additional CUDA libraries or drivers
+#   - System-level debugging tools (htop, nvtop, etc.)
+#   - Custom compilers or build tools
+#   - SSH keys or security configurations
+#   - Custom Python versions or environments
+#
+# To use this script, uncomment the COPY and RUN commands in the Dockerfile:
+#   COPY builder/setup.sh /setup.sh
+#   RUN chmod +x /setup.sh && /setup.sh
+
+set -e # Stop script on error
+apt-get update && apt-get upgrade -y # Update System
+
+# Install System Dependencies
+# - openssh-server: for ssh access and web terminal
+apt-get install -y --no-install-recommends software-properties-common curl git openssh-server
+
+# Install Python 3.10
+add-apt-repository ppa:deadsnakes/ppa -y
+apt-get update && apt-get install -y --no-install-recommends python3.10 python3.10-dev python3.10-distutils
+update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
+
+# Install pip for Python 3.10
+curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
+python3 get-pip.py
+
+# Clean up, remove unnecessary packages and help reduce image size
+apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*