Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fixed summary models for deepseek and openrouter, added TODOs for improving openrouter support #414

Merged
merged 2 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions gptme/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,20 +172,21 @@ def _summarize_str(content: str) -> str:
Message("user", content=f"Summarize this:\n{content}"),
]

# get default provider
provider: Provider = get_model().provider # type: ignore
model = f"{provider}/{get_summary_model(provider)}"
base_model = _get_base_model(model)
context_limit = MODELS[provider][base_model]["context"]

if len_tokens(messages, base_model) > context_limit:
# get summary model for provider
model = get_model(f"{provider}/{get_summary_model(provider)}")

if len_tokens(messages, model.model) > model.context:
raise ValueError(
f"Cannot summarize more than {context_limit} tokens, got {len_tokens(messages, base_model)}"
f"Cannot summarize more than {model.context} tokens, got {len_tokens(messages, model.model)}"
)

summary = _chat_complete(messages, model, None)
summary = _chat_complete(messages, model.full, None)
assert summary
logger.debug(
f"Summarized long output ({len_tokens(content, base_model)} -> {len_tokens(summary, base_model)} tokens): "
f"Summarized long output ({len_tokens(content, model.model)} -> {len_tokens(summary, model.model)} tokens): "
+ summary
)
return summary
Expand Down
5 changes: 5 additions & 0 deletions gptme/llm/llm_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from openai import OpenAI # fmt: skip
from openai.types.chat import ChatCompletionToolParam # fmt: skip


# Dictionary to store clients for each provider
clients: dict[Provider, "OpenAI"] = {}
logger = logging.getLogger(__name__)
Expand All @@ -27,6 +28,10 @@
"X-Title": "gptme",
}

# TODO: improve provider routing for openrouter: https://openrouter.ai/docs/provider-routing
# TODO: set required-parameters: https://openrouter.ai/docs/provider-routing#required-parameters-_beta_
# TODO: set quantization: https://openrouter.ai/docs/provider-routing#quantization


ALLOWED_FILE_EXTS = ["jpg", "jpeg", "png", "gif"]

Expand Down
2 changes: 2 additions & 0 deletions gptme/llm/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,5 +258,7 @@ def get_summary_model(provider: Provider) -> str: # pragma: no cover
return "gemini-1.5-flash-latest"
elif provider == "anthropic":
return "claude-3-haiku-20240307"
elif provider == "deepseek":
return "deepseek-chat"
else:
raise ValueError(f"Provider {provider} did not have a summary model")
Loading