tmp

enitrat · enitrat · commit 502c252018cd · 2025-11-21T19:13:43.000Z
diff --git a/python/src/cairo_coder/agents/registry.py b/python/src/cairo_coder/agents/registry.py
@@ -5,8 +5,10 @@
 agent system with a simple, in-memory registry of available agents.
 """
 
-from dataclasses import dataclass
+from collections.abc import Callable
+from dataclasses import dataclass, field
 from enum import Enum
+from typing import Any
 
 from cairo_coder.core.config import VectorStoreConfig
 from cairo_coder.core.rag_pipeline import RagPipeline, RagPipelineFactory
@@ -33,7 +35,8 @@ class AgentSpec:
     name: str
     description: str
     sources: list[DocumentSource]
-    generation_program_type: AgentId
+    pipeline_builder: Callable[..., RagPipeline]
+    builder_kwargs: dict[str, Any] = field(default_factory=dict)
     max_source_count: int = 5
     similarity_threshold: float = 0.4
 
@@ -48,31 +51,15 @@ def build(self, vector_db: SourceFilteredPgVectorRM, vector_store_config: Vector
         Returns:
             Configured RagPipeline instance
         """
-        match self.generation_program_type:
-            case AgentId.STARKNET:
-                return RagPipelineFactory.create_pipeline(
-                    name=self.name,
-                    vector_store_config=vector_store_config,
-                    sources=self.sources,
-                    query_processor=create_query_processor(),
-                    generation_program=create_generation_program(AgentId.STARKNET),
-                    mcp_generation_program=create_mcp_generation_program(),
-                    max_source_count=self.max_source_count,
-                    similarity_threshold=self.similarity_threshold,
-                    vector_db=vector_db,
-                )
-            case AgentId.CAIRO_CODER:
-                return RagPipelineFactory.create_pipeline(
-                    name=self.name,
-                    vector_store_config=vector_store_config,
-                    sources=self.sources,
-                    query_processor=create_query_processor(),
-                    generation_program=create_generation_program(AgentId.CAIRO_CODER),
-                    mcp_generation_program=create_mcp_generation_program(),
-                    max_source_count=self.max_source_count,
-                    similarity_threshold=self.similarity_threshold,
-                    vector_db=vector_db,
-                )
+        return self.pipeline_builder(
+            name=self.name,
+            vector_store_config=vector_store_config,
+            vector_db=vector_db,
+            sources=self.sources,
+            max_source_count=self.max_source_count,
+            similarity_threshold=self.similarity_threshold,
+            **self.builder_kwargs,
+        )
 
 
 # The global registry of available agents
@@ -81,15 +68,25 @@ def build(self, vector_db: SourceFilteredPgVectorRM, vector_store_config: Vector
         name="Cairo Coder",
         description="General Cairo programming assistant",
         sources=list(DocumentSource),  # All sources
-        generation_program_type=AgentId.CAIRO_CODER,
+        pipeline_builder=RagPipelineFactory.create_pipeline,
+        builder_kwargs={
+            "query_processor": create_query_processor(),
+            "generation_program": create_generation_program(AgentId.CAIRO_CODER),
+            "mcp_generation_program": create_mcp_generation_program(),
+        },
         max_source_count=5,
         similarity_threshold=0.4,
     ),
     AgentId.STARKNET: AgentSpec(
         name="Starknet Agent",
         description="Assistant for the Starknet ecosystem (contracts, tools, docs).",
         sources=list(DocumentSource),
-        generation_program_type=AgentId.STARKNET,
+        pipeline_builder=RagPipelineFactory.create_pipeline,
+        builder_kwargs={
+            "query_processor": create_query_processor(),
+            "generation_program": create_generation_program(AgentId.STARKNET),
+            "mcp_generation_program": create_mcp_generation_program(),
+        },
         max_source_count=5,
         similarity_threshold=0.4,
     ),
diff --git a/python/src/cairo_coder/core/rag_pipeline.py b/python/src/cairo_coder/core/rag_pipeline.py
@@ -19,6 +19,7 @@
 from cairo_coder.core.types import (
     Document,
     DocumentSource,
+    FormattedSource,
     Message,
     ProcessedQuery,
     StreamEvent,
@@ -82,11 +83,34 @@ def __init__(self, config: RagPipelineConfig):
         self._current_processed_query: ProcessedQuery | None = None
         self._current_documents: list[Document] = []
 
+        # Token usage accumulator
+        self._accumulated_usage: dict[str, dict[str, int]] = {}
+
     @property
     def last_retrieved_documents(self) -> list[Document]:
         """Documents retrieved during the most recent pipeline execution."""
         return self._current_documents
 
+    def _accumulate_usage(self, prediction: dspy.Prediction) -> None:
+        """
+        Accumulate token usage from a prediction.
+
+        Args:
+            prediction: DSPy prediction object with usage information
+        """
+        usage = prediction.get_lm_usage();
+        for model_name, metrics in usage.items():
+            if model_name not in self._accumulated_usage:
+                self._accumulated_usage[model_name] = {}
+            for metric_name, value in metrics.items():
+                self._accumulated_usage[model_name][metric_name] = (
+                    self._accumulated_usage[model_name].get(metric_name, 0) + value
+                )
+
+    def _reset_usage(self) -> None:
+        """Reset accumulated usage for a new request."""
+        self._accumulated_usage = {}
+
     async def _aprocess_query_and_retrieve_docs(
         self,
         query: str,
@@ -97,6 +121,7 @@ async def _aprocess_query_and_retrieve_docs(
         processed_query = await self.query_processor.aforward(
             query=query, chat_history=chat_history_str
         )
+        self._accumulate_usage(processed_query)
         self._current_processed_query = processed_query
 
         # Use provided sources or fall back to processed query sources
@@ -158,6 +183,9 @@ async def aforward(
         mcp_mode: bool = False,
         sources: list[DocumentSource] | None = None,
     ) -> dspy.Prediction:
+        # Reset usage for this request
+        self._reset_usage()
+
         chat_history_str = self._format_chat_history(chat_history or [])
         processed_query, documents = await self._aprocess_query_and_retrieve_docs(
             query, chat_history_str, sources
@@ -167,13 +195,17 @@ async def aforward(
         )
 
         if mcp_mode:
-            return await self.mcp_generation_program.aforward(documents)
+            result = await self.mcp_generation_program.aforward(documents)
+            self._accumulate_usage(result)
+            return result
 
         context = self._prepare_context(documents)
 
-        return await self.generation_program.aforward(
+        result = await self.generation_program.aforward(
             query=query, context=context, chat_history=chat_history_str
         )
+        self._accumulate_usage(result)
+        return result
 
 
     async def aforward_streaming(
@@ -268,28 +300,12 @@ async def aforward_streaming(
 
     def get_lm_usage(self) -> dict[str, dict[str, int]]:
         """
-        Get the total number of tokens used by the LLM.
-        """
-        generation_usage = self.generation_program.get_lm_usage()
-        query_usage = self.query_processor.get_lm_usage()
-        judge_usage = self.retrieval_judge.get_lm_usage()
-
-        # Additive merge strategy
-        merged_usage = {}
-
-        # Helper function to merge usage dictionaries
-        def merge_usage_dict(target: dict, source: dict) -> None:
-            for model_name, metrics in source.items():
-                if model_name not in target:
-                    target[model_name] = {}
-                for metric_name, value in metrics.items():
-                    target[model_name][metric_name] = target[model_name].get(metric_name, 0) + value
+        Get accumulated token usage from all predictions in the pipeline.
 
-        merge_usage_dict(merged_usage, generation_usage)
-        merge_usage_dict(merged_usage, query_usage)
-        merge_usage_dict(merged_usage, judge_usage)
-
-        return merged_usage
+        Returns:
+            Dictionary mapping model names to usage metrics
+        """
+        return self._accumulated_usage
 
     def _format_chat_history(self, chat_history: list[Message]) -> str:
         """
@@ -311,7 +327,7 @@ def _format_chat_history(self, chat_history: list[Message]) -> str:
 
         return "\n".join(formatted_messages)
 
-    def _format_sources(self, documents: list[Document]) -> list[dict[str, Any]]:
+    def _format_sources(self, documents: list[Document]) -> list[FormattedSource]:
         """
         Format documents for the frontend-friendly sources event.
 
@@ -322,9 +338,9 @@ def _format_sources(self, documents: list[Document]) -> list[dict[str, Any]]:
             documents: List of retrieved documents
 
         Returns:
-            List of dicts: [{"title": str, "url": str}, ...]
+            List of formatted sources with metadata
         """
-        sources: list[dict[str, str]] = []
+        sources: list[FormattedSource] = []
         seen_urls: set[str] = set()
 
 
diff --git a/python/src/cairo_coder/core/types.py b/python/src/cairo_coder/core/types.py
@@ -74,6 +74,29 @@ class ProcessedQuery:
     is_test_related: bool = False
     resources: list[DocumentSource] = field(default_factory=list)
 
+LMUsageEntry = dict[str, Any]
+LMUsage = dict[str, LMUsageEntry]
+
+
+class RetrievedSourceData(TypedDict):
+    """Structure for retrieved source data stored in database."""
+
+    page_content: str
+    metadata: DocumentMetadata
+
+
+class FormattedSourceMetadata(TypedDict):
+    """Metadata structure for formatted sources sent to frontend."""
+
+    title: str
+    url: str
+    source_type: str
+
+
+class FormattedSource(TypedDict):
+    """Structure for formatted sources sent to frontend."""
+
+    metadata: FormattedSourceMetadata
 
 # Helper to extract domain title
 def title_from_url(url: str) -> str:
diff --git a/python/src/cairo_coder/db/models.py b/python/src/cairo_coder/db/models.py
@@ -10,6 +10,8 @@
 
 from pydantic import BaseModel, Field
 
+from cairo_coder.core.types import RetrievedSourceData
+
 
 class UserInteraction(BaseModel):
     """Represents a record in the user_interactions table."""
@@ -21,5 +23,5 @@ class UserInteraction(BaseModel):
     chat_history: Optional[list[dict[str, Any]]] = None
     query: str
     generated_answer: Optional[str] = None
-    retrieved_sources: Optional[list[dict[str, Any]]] = None
+    retrieved_sources: Optional[list[RetrievedSourceData]] = None
     llm_usage: Optional[dict[str, Any]] = None
diff --git a/python/src/cairo_coder/dspy/generation_program.py b/python/src/cairo_coder/dspy/generation_program.py
@@ -192,12 +192,6 @@ def __init__(self, program_type):
             raise FileNotFoundError(f"{compiled_program_path} not found")
         self.generation_program.load(compiled_program_path)
 
-    def get_lm_usage(self) -> dict[str, int]:
-        """
-        Get the total number of tokens used by the LLM.
-        """
-        return self.generation_program.get_lm_usage()
-
     @traceable(
         name="GenerationProgram", run_type="llm", metadata={"llm_provider": dspy.settings.lm}
     )
@@ -339,14 +333,6 @@ async def aforward(self, documents: list[Document]) -> dspy.Prediction:
         """
         return self(documents)
 
-    def get_lm_usage(self) -> dict[str, int]:
-        """
-        Get the total number of tokens used by the LLM.
-        Note: MCP mode doesn't use LLM generation, so no tokens are consumed.
-        """
-        # MCP mode doesn't use LLM generation, return empty dict
-        return {}
-
 
 def create_generation_program(program_type: str) -> GenerationProgram:
     """
diff --git a/python/src/cairo_coder/dspy/grok_search.py b/python/src/cairo_coder/dspy/grok_search.py
@@ -103,6 +103,7 @@ async def aforward(self, processed_query: ProcessedQuery, chat_history: str) ->
             {', '.join(processed_query.search_queries)}. \
             Make sure that your final answer will contain links to the relevant sources used to construct your answer.
         """
+        # TODO: track LM usage
         chat = self.client.chat.create(
             model=DEFAULT_GROK_MODEL,
             tools=[web_search(), x_search()],
diff --git a/python/src/cairo_coder/dspy/query_processor.py b/python/src/cairo_coder/dspy/query_processor.py
@@ -13,7 +13,7 @@
 from langsmith import traceable
 
 import dspy
-from cairo_coder.core.types import DocumentSource, ProcessedQuery
+from cairo_coder.core.types import DocumentSource, ProcessedQuery, LMUsage
 
 logger = structlog.get_logger(__name__)
 
@@ -125,7 +125,7 @@ def __init__(self):
         }
 
     @traceable(name="QueryProcessorProgram", run_type="llm", metadata={"llm_provider": dspy.settings.lm})
-    async def aforward(self, query: str, chat_history: Optional[str] = None) -> ProcessedQuery:
+    async def aforward(self, query: str, chat_history: Optional[str] = None) -> tuple[ProcessedQuery, LMUsage]:
         """
         Process a user query into a structured format for document retrieval.
 
@@ -144,19 +144,14 @@ async def aforward(self, query: str, chat_history: Optional[str] = None) -> Proc
         resources = self._validate_resources(result.resources)
 
         # Build structured query result
-        return ProcessedQuery(
+        processed_query = ProcessedQuery(
             original=query,
             search_queries=search_queries,
             is_contract_related=self._is_contract_query(query),
             is_test_related=self._is_test_query(query),
             resources=resources,
         )
-
-    def get_lm_usage(self) -> dict[str, int]:
-        """
-        Get the total number of tokens used by the LLM.
-        """
-        return self.retrieval_program.get_lm_usage()
+        return processed_query, result.get_lm_usage()
 
     def _validate_resources(self, resources: list[str]) -> list[DocumentSource]:
         """
diff --git a/python/src/cairo_coder/dspy/retrieval_judge.py b/python/src/cairo_coder/dspy/retrieval_judge.py
@@ -171,12 +171,6 @@ async def judge_one(doc_string: str):
 
         return keep_docs
 
-    def get_lm_usage(self) -> dict[str, int]:
-        """
-        Get the total number of tokens used by the LLM.
-        """
-        return self.rater.get_lm_usage()
-
     # =========================
     # Internal Helpers
     # =========================
diff --git a/python/src/cairo_coder/server/app.py b/python/src/cairo_coder/server/app.py
diff --git a/python/uv.lock b/python/uv.lock