File tree Expand file tree Collapse file tree 1 file changed +8
-6
lines changed Expand file tree Collapse file tree 1 file changed +8
-6
lines changed Original file line number Diff line number Diff line change 3636SYSTEM_PROMPT = "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step." # noqa: E501
3737MAX_TOKENS = 16384
3838# We explicitly set the target number of blocks to help tune performance.
39- # For materialized datasets, the number of blocks determined by ray data can be small,
40- # especially for a multi-stage pipeline like the one here.
41- TARGET_NUM_ROWS_PER_BLOCK = 100
39+ # For materialized datasets, the number of blocks determined by ray data can be small
40+ # for a multi-stage pipeline like the one here.
41+ TARGET_NUM_ROWS_PER_BLOCK = 256
4242
4343# Enable more detailed logging of tasks per actor
44- ray .init (runtime_env = {"env_vars" : {"RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING" : 1 }})
44+ ray .init (runtime_env = {"env_vars" : {"RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING" : "1" }})
4545
4646# 1. Load datasets
4747apps_ds = datasets .load_dataset (
170170 enable_prefix_caching = True ,
171171 enable_chunked_prefill = True ,
172172 max_num_batched_tokens = 4096 ,
173- tensor_parallel_size = 4 ,
173+ tensor_parallel_size = 2 ,
174174 ),
175- concurrency = 2 ,
175+ concurrency = 4 ,
176176 batch_size = 128 ,
177177 )
178178
205205 # Each handles a batch of requests
206206 concurrency = 1 ,
207207 batch_size = 16 ,
208+ # Throttle QPS to avoid rate limit errors
209+ qps = 5 ,
208210 )
209211 # define the reformatter
210212 reformatter = build_llm_processor (
You can’t perform that action at this time.
0 commit comments