Enhance model configuration files by adding 'max_connections' parameter and updating 'timeout' and 'max_retries' values for improved API request handling. Update config_loader.py to unify timeout handling across clients while maintaining backward compatibility with legacy parameters.

tinkerrman · tinkerrman · commit 53f38eda1deb · 2026-01-27T10:23:47.000+09:00
diff --git a/configs/models/_template_api.yaml b/configs/models/_template_api.yaml
@@ -46,8 +46,9 @@ model:
     temperature: 0.6
     top_p: 0.95
     reasoning_effort: high        # For reasoning models (high/low/xhigh)
-    timeout: 3600                  # Request timeout (seconds)
-    max_retries: 10                # Retry count on errors
+    timeout: 7200                 # Request timeout (seconds)
+    max_retries: 2                # Retry count on errors
+    max_connections: 30           # API 동시 요청 수 (기본값: 10)
 
 # =============================================================================
 # Benchmark-specific Overrides (optional)
diff --git a/configs/models/_template_vllm.yaml b/configs/models/_template_vllm.yaml
@@ -55,6 +55,9 @@ model:
     max_tokens: 16384
     temperature: 0.6
     top_p: 0.95
+    timeout: 7200                 # Request timeout (seconds)
+    max_retries: 2
+    max_connections: 30           # API 동시 요청 수 (기본값: 10)
     
     # For thinking/reasoning models, add:
     extra_body:
diff --git a/run_eval.py b/run_eval.py
@@ -355,6 +355,7 @@ def get_model_generate_config(config_name: str, benchmark: str) -> dict:
         "reasoning_tokens": "reasoning_tokens",
         "timeout": "timeout",
         "max_retries": "max_retries",
+        "max_connections": "max_connections",  # API 동시 요청 수
     }
     
     for key, eval_key in key_mapping.items():
diff --git a/src/core/config_loader.py b/src/core/config_loader.py
@@ -436,21 +436,21 @@ def get_inspect_model_args(self, config_name: str, benchmark: Optional[str] = No
             if api_key:
                 args["api_key"] = api_key
         
-        # Client timeout and request timeout
+        # Client timeout - unified "timeout" parameter for all clients
         client = model_section.get("client", "openai")
         params = model_section.get("params", {})
         benchmark_overrides = model_config.get("benchmarks", {}).get(benchmark, {}) if benchmark else {}
         
-        # client_timeout for OpenAI provider
-        client_timeout = benchmark_overrides.get("client_timeout", params.get("client_timeout"))
-        if client_timeout is not None:
-            if client == "openai":
-                args["client_timeout"] = float(client_timeout)
-        
-        # timeout for litellm provider (passed to acompletion)
+        # timeout: works for both openai and litellm clients
+        # Also support legacy client_timeout for backward compatibility
         timeout = benchmark_overrides.get("timeout", params.get("timeout"))
+        if timeout is None:
+            timeout = benchmark_overrides.get("client_timeout", params.get("client_timeout"))
+        
         if timeout is not None:
-            if client == "litellm":
+            if client == "openai":
+                args["client_timeout"] = float(timeout)
+            elif client == "litellm":
                 args["timeout"] = float(timeout)
         
         return args

Original file line number	Diff line number	Diff line change
`@@ -355,6 +355,7 @@ def get_model_generate_config(config_name: str, benchmark: str) -> dict:`
`355`	`355`	`"reasoning_tokens": "reasoning_tokens",`
`356`	`356`	`"timeout": "timeout",`
`357`	`357`	`"max_retries": "max_retries",`
	`358`	`+ "max_connections": "max_connections", # API 동시 요청 수`
`358`	`359`	`}`
`359`	`360`
`360`	`361`	`for key, eval_key in key_mapping.items():`