Skip to content

Commit 77882bc

Browse files
committed
fix: apply upstream fixes for testing phase issues
Implement changes from runpod-workers/worker-vllm PRs runpod-workers#234, runpod-workers#236, and runpod-workers#138: - Remove space from gpuIds in hub.json (PR runpod-workers#234) - Remove unsupported CUDA versions 12.1-12.4 from hub.json and tests.json (PR runpod-workers#236) - Add error handling for engine initialization and handler exceptions (PR runpod-workers#138 style) - Ensure all errors return proper ErrorResponse format These fixes address GPU allocation, CUDA compatibility, and error handling issues that cause the testing phase to hang indefinitely.
1 parent 462c6d9 commit 77882bc

File tree

3 files changed

+22
-67
lines changed

3 files changed

+22
-67
lines changed

.runpod/hub.json

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,14 @@
77
"config": {
88
"runsOn": "GPU",
99
"containerDiskInGb": 150,
10-
"gpuIds": "ADA_80_PRO, AMPERE_80",
10+
"gpuIds": "ADA_80_PRO,AMPERE_80",
1111
"gpuCount": 1,
1212
"allowedCudaVersions": [
1313
"12.9",
1414
"12.8",
1515
"12.7",
1616
"12.6",
17-
"12.5",
18-
"12.4",
19-
"12.3",
20-
"12.2",
21-
"12.1"
17+
"12.5"
2218
],
2319
"presets": [
2420
{

src/handler.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,33 @@
11
import os
22
import runpod
3-
from utils import JobInput
3+
import logging
4+
from utils import JobInput, create_error_response
45
from engine import vLLMEngine, OpenAIvLLMEngine
56

6-
vllm_engine = vLLMEngine()
7-
OpenAIvLLMEngine = OpenAIvLLMEngine(vllm_engine)
7+
# Initialize engines at module level with error handling
8+
try:
9+
vllm_engine = vLLMEngine()
10+
OpenAIvLLMEngine = OpenAIvLLMEngine(vllm_engine)
11+
except Exception as e:
12+
logging.error(f"Failed to initialize vLLM engines: {e}")
13+
raise
814

915
async def handler(job):
10-
job_input = JobInput(job["input"])
11-
engine = OpenAIvLLMEngine if job_input.openai_route else vllm_engine
12-
results_generator = engine.generate(job_input)
13-
async for batch in results_generator:
14-
yield batch
16+
try:
17+
job_input = JobInput(job["input"])
18+
engine = OpenAIvLLMEngine if job_input.openai_route else vllm_engine
19+
results_generator = engine.generate(job_input)
20+
async for batch in results_generator:
21+
yield batch
22+
except Exception as e:
23+
# Return error in the same format as engine errors
24+
logging.error(f"Error in handler: {e}")
25+
yield {"error": create_error_response(str(e)).model_dump()}
1526

1627
runpod.serverless.start(
1728
{
1829
"handler": handler,
1930
"concurrency_modifier": lambda x: vllm_engine.max_concurrency,
2031
"return_aggregate_stream": True,
2132
}
22-
)
33+
)

tests.json

Lines changed: 0 additions & 52 deletions
This file was deleted.

0 commit comments

Comments
 (0)