|
| 1 | +--- |
| 2 | +tags: |
| 3 | + - Inside |
| 4 | +--- |
| 5 | + |
| 6 | +# LiteLLM Inside |
| 7 | + |
| 8 | +- 价格信息 |
| 9 | + - https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json |
| 10 | + |
| 11 | +| Header | For | |
| 12 | +| ----------------------------------------------- | ------------------------------------------ | |
| 13 | +| **Request** | | |
| 14 | +| `x-litellm-timeout: <seconds>` | 请求超时设置 | |
| 15 | +| `x-litellm-stream-timeout: <seconds>` | 第一个 Chunk 超时 | |
| 16 | +| `x-litellm-enable-message-redaction: <boolean>` | 启用消息内容屏蔽 | |
| 17 | +| `x-litellm-tags: <tag1,tag2,...>` | 请求标签 | |
| 18 | +| `x-litellm-num-retries: <number>` | 请求重试次数 | |
| 19 | +| `x-litellm-spend-logs-metadata: <json>` | 请求开销日志元数据 | |
| 20 | +| **Request/Anthropic** | | |
| 21 | +| `anthropic-version: <str>` | API version | |
| 22 | +| `anthropic-beta: <str>` | beta version | |
| 23 | +| **Request/OpenAI** | | |
| 24 | +| `openai-organization: <str>` | organization id | |
| 25 | +| **Request/Bypass** | |
| 26 | +| `x-*` | 需要配置 forward_client_headers_to_llm_api | |
| 27 | +| **Response/Rate Limit** | | |
| 28 | +| `x-ratelimit-remaining-requests: <int>` | 剩余可用请求数 | |
| 29 | +| `x-ratelimit-remaining-tokens: <int>` | 剩余可用token数 | |
| 30 | +| `x-ratelimit-limit-requests: <int>` | 最大请求数限制 | |
| 31 | +| `x-ratelimit-limit-tokens: <int>` | 最大token数限制 | |
| 32 | +| `x-ratelimit-reset-requests: <int>` | 请求限制重置时间 | |
| 33 | +| `x-ratelimit-reset-tokens: <int>` | token限制重置时间 | |
| 34 | +| **Response/Latency** | | |
| 35 | +| `x-litellm-response-duration-ms: <float>` | 从请求到响应的总耗时(毫秒) | |
| 36 | +| `x-litellm-overhead-duration-ms: <float>` | LiteLLM处理开销时间(毫秒) | |
| 37 | +| **Response/Retry&Fallback** | | |
| 38 | +| `x-litellm-attempted-retries: <int>` | 实际重试次数 | |
| 39 | +| `x-litellm-attempted-fallbacks: <int>` | 实际回退次数 | |
| 40 | +| `x-litellm-max-fallbacks: <int>` | 最大回退次数限制 | |
| 41 | +| **Response/Cost** | | |
| 42 | +| `x-litellm-response-cost: <float>` | API调用费用 | |
| 43 | +| `x-litellm-key-spend: <float>` | API密钥总消费 | |
| 44 | +| **Response/Bypass** | | |
| 45 | +| `llm_provider-*` | 透传LLM提供商的响应头 | |
| 46 | + |
| 47 | +```json title="spend-logs-metadata" |
| 48 | +{ "user_id": "12345", "project_id": "proj_abc", "request_type": "chat_completion" } |
| 49 | +``` |
| 50 | + |
| 51 | +## config.yaml |
| 52 | + |
| 53 | +```yaml |
| 54 | +include: |
| 55 | + - model_config.yaml |
| 56 | + |
| 57 | +model_list: [] |
| 58 | +litellm_settings: |
| 59 | + num_retries: 3 # retry call 3 times on each model_name (e.g. zephyr-beta) |
| 60 | + request_timeout: 10 # raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout |
| 61 | + fallbacks: [{"zephyr-beta": ["gpt-4o"]}] # fallback to gpt-4o if call fails num_retries |
| 62 | + context_window_fallbacks: [{"zephyr-beta": ["gpt-3.5-turbo-16k"]}, {"gpt-4o": ["gpt-3.5-turbo-16k"]}] # fallback to gpt-3.5-turbo-16k if context window error |
| 63 | + allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. |
| 64 | +router_settings: # router_settings are optional |
| 65 | + routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle" |
| 66 | + model_group_alias: {"gpt-4": "gpt-4o"} # all requests with `gpt-4` will be routed to models with `gpt-4o` |
| 67 | + num_retries: 2 |
| 68 | + timeout: 30 # 30 seconds |
| 69 | + redis_host: <your redis host> # set this when using multiple litellm proxy deployments, load balancing state stored in redis |
| 70 | + redis_password: <your redis password> |
| 71 | + redis_port: 1992 |
| 72 | +general_settings: {} |
| 73 | +environment_variables: {} |
| 74 | +``` |
| 75 | +
|
| 76 | +```yaml |
| 77 | +model_list: |
| 78 | + - model_name: glm-4.5 |
| 79 | + litellm_params: |
| 80 | + model: openai/glm-4.5 |
| 81 | + litellm_credential_name: zhipu_credential |
| 82 | + |
| 83 | + - model_name: glm-4.5-air |
| 84 | + litellm_params: |
| 85 | + model: openai/glm-4.5-air |
| 86 | + litellm_credential_name: zhipu_credential |
| 87 | + |
| 88 | + - model_name: '*' |
| 89 | + litellm_params: |
| 90 | + model: openai/glm-4.5-air |
| 91 | + litellm_credential_name: zhipu_credential |
| 92 | + |
| 93 | +credential_list: |
| 94 | + - credential_name: zhipu_credential |
| 95 | + credential_values: |
| 96 | + api_base: os.environ/ZHIPU_API_BASE |
| 97 | + api_key: os.environ/ZHIPU_API_KEY |
| 98 | + credential_info: |
| 99 | + description: '智普' |
| 100 | +``` |
| 101 | +
|
| 102 | +**支持通配符** |
| 103 | +
|
| 104 | +```yaml |
| 105 | +model_list: |
| 106 | + - model_name: xai/* |
| 107 | + litellm_params: |
| 108 | + model: xai/* |
| 109 | + api_key: os.environ/XAI_API_KEY |
| 110 | + |
| 111 | +litellm_settings: |
| 112 | + check_provider_endpoint: true |
| 113 | +``` |
| 114 | +
|
| 115 | +```yaml |
| 116 | +# params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body |
| 117 | +litellm_params: |
| 118 | + model: openai/facebook/opt-125m |
| 119 | + api_base: http://0.0.0.0:4000/v1 |
| 120 | + api_key: none |
| 121 | + api_version: "2023-05-15" |
| 122 | + rpm: 60 # Optional[int]: When rpm/tpm set - litellm uses weighted pick for load balancing. rpm = Rate limit for this deployment: in requests per minute (rpm). |
| 123 | + tpm: 1000 # Optional[int]: tpm = Tokens Per Minute |
| 124 | + azure_ad_token: "" |
| 125 | + seed: 1234 |
| 126 | + max_token: 1024 |
| 127 | + temperature: 0.2 |
| 128 | + organization: "org-12345" |
| 129 | + aws_region_name: "us-west-2" |
| 130 | + extra_headers: {"AI-Resource Group": "ishaan-resource"} |
| 131 | +model_info: |
| 132 | + version: 2 |
| 133 | + access_groups: ['restricted-models'] |
| 134 | + supported_environments: ["development", "production", "staging"] |
| 135 | + custom_tokenizer: |
| 136 | + identifier: deepseek-ai/DeepSeek-V3-Base |
| 137 | + revision: main |
| 138 | + auth_token: os.environ/HUGGINGFACE_API_KEY |
| 139 | +``` |
| 140 | +
|
| 141 | +- https://docs.litellm.ai/docs/proxy/configs |
| 142 | +- https://docs.litellm.ai/docs/proxy/config_settings |
| 143 | +
|
| 144 | +## 参考 |
| 145 | +
|
| 146 | +- https://docs.litellm.ai/docs/proxy/request_headers |
| 147 | +- Anthropic |
| 148 | + - Beta header |
| 149 | + - https://docs.claude.com/en/api/beta-headers |
| 150 | + - Features |
| 151 | + - https://docs.claude.com/en/docs/build-with-claude/overview |
0 commit comments