KasarLabs
diff --git a/‎python/AGENTS.md‎
Lines changed: 1 addition & 0 deletions b/‎python/AGENTS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/src/cairo_coder/core/rag_pipeline.py‎
Lines changed: 22 additions & 14 deletions b/‎python/src/cairo_coder/core/rag_pipeline.py‎
Lines changed: 22 additions & 14 deletions
diff --git a/‎python/src/cairo_coder/core/types.py‎
Lines changed: 27 additions & 0 deletions b/‎python/src/cairo_coder/core/types.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎python/src/cairo_coder/dspy/document_retriever.py‎
Lines changed: 19 additions & 6 deletions b/‎python/src/cairo_coder/dspy/document_retriever.py‎
Lines changed: 19 additions & 6 deletions
diff --git a/‎python/src/cairo_coder/dspy/grok_search.py‎
Lines changed: 4 additions & 2 deletions b/‎python/src/cairo_coder/dspy/grok_search.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎python/src/cairo_coder/dspy/query_processor.py‎
Lines changed: 8 additions & 4 deletions b/‎python/src/cairo_coder/dspy/query_processor.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎python/src/cairo_coder/dspy/retrieval_judge.py‎
Lines changed: 15 additions & 5 deletions b/‎python/src/cairo_coder/dspy/retrieval_judge.py‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎python/src/cairo_coder/server/app.py‎
Lines changed: 33 additions & 19 deletions b/‎python/src/cairo_coder/server/app.py‎
Lines changed: 33 additions & 19 deletions
@@ -12,6 +12,7 @@ This repo uses a unified, deterministic testing infrastructure to keep tests fas
   - Unit client uses `mock_agent_factory` and `mock_vector_db`.
   - Integration client injects a real `RagPipeline` wired to `mock_query_processor` + `mock_vector_db` (via the same `mock_agent_factory`).
 - Replace ad‑hoc stubs with shared fixtures: `sample_processed_query`, `mock_query_processor`, `sample_documents`, and `mock_returned_documents` (built from `sample_documents`).
+- Respect declared types. When a signature says the argument is type `T`, never guard it with `is None` or `hasattr` checks for `T`'s own surface area—just call the method and let the type system show bugs. (Example: if something is typed `dspy.Prediction`, call `get_lm_usage()` directly and set usage via `set_lm_usage`. Don't assume these attributes are not present.)
 
 ## DSPy/LLM Behavior
 
 
@@ -24,6 +24,7 @@
     ProcessedQuery,
     StreamEvent,
     StreamEventType,
+    combine_usage,
     title_from_url,
 )
 from cairo_coder.dspy.document_retriever import DocumentRetrieverProgram
@@ -98,14 +99,8 @@ def _accumulate_usage(self, prediction: dspy.Prediction) -> None:
         Args:
             prediction: DSPy prediction object with usage information
         """
-        usage = prediction.get_lm_usage();
-        for model_name, metrics in usage.items():
-            if model_name not in self._accumulated_usage:
-                self._accumulated_usage[model_name] = {}
-            for metric_name, value in metrics.items():
-                self._accumulated_usage[model_name][metric_name] = (
-                    self._accumulated_usage[model_name].get(metric_name, 0) + value
-                )
+        usage = prediction.get_lm_usage()
+        self._accumulated_usage = combine_usage(self._accumulated_usage, usage)
 
     def _reset_usage(self) -> None:
         """Reset accumulated usage for a new request."""
@@ -118,22 +113,28 @@ async def _aprocess_query_and_retrieve_docs(
         sources: list[DocumentSource] | None = None,
     ) -> tuple[ProcessedQuery, list[Document]]:
         """Process query and retrieve documents - shared async logic."""
-        processed_query = await self.query_processor.aforward(
+        qp_prediction = await self.query_processor.aforward(
             query=query, chat_history=chat_history_str
         )
-        self._accumulate_usage(processed_query)
+        self._accumulate_usage(qp_prediction)
+        processed_query = qp_prediction.processed_query
         self._current_processed_query = processed_query
 
         # Use provided sources or fall back to processed query sources
         retrieval_sources = sources or processed_query.resources
-        documents = await self.document_retriever.aforward(
+        dr_prediction = await self.document_retriever.aforward(
             processed_query=processed_query, sources=retrieval_sources
         )
+        self._accumulate_usage(dr_prediction)
+        documents = dr_prediction.documents
 
         # Optional Grok web/X augmentation: activate when STARKNET_BLOG is among sources.
         try:
             if DocumentSource.STARKNET_BLOG in retrieval_sources:
-                grok_docs = await self.grok_search.aforward(processed_query, chat_history_str)
+                grok_pred = await self.grok_search.aforward(processed_query, chat_history_str)
+                self._accumulate_usage(grok_pred)
+                grok_docs = grok_pred.documents
+
                 self._grok_citations = list(self.grok_search.last_citations)
                 if grok_docs:
                     documents.extend(grok_docs)
@@ -151,7 +152,9 @@ async def _aprocess_query_and_retrieve_docs(
                 lm=dspy.LM("gemini/gemini-flash-lite-latest", max_tokens=10000, temperature=0.5),
                 adapter=XMLAdapter(),
             ):
-                documents = await self.retrieval_judge.aforward(query=query, documents=documents)
+                judge_pred = await self.retrieval_judge.aforward(query=query, documents=documents)
+                self._accumulate_usage(judge_pred)
+                documents = judge_pred.documents
         except Exception as e:
             logger.warning(
                 "Retrieval judge failed (async), using all documents",
@@ -197,14 +200,18 @@ async def aforward(
         if mcp_mode:
             result = await self.mcp_generation_program.aforward(documents)
             self._accumulate_usage(result)
+            result.set_lm_usage(self._accumulated_usage)
             return result
 
         context = self._prepare_context(documents)
 
         result = await self.generation_program.aforward(
             query=query, context=context, chat_history=chat_history_str
         )
-        self._accumulate_usage(result)
+        if result:
+            self._accumulate_usage(result)
+            # Update the result's usage to include accumulated usage from previous steps
+            result.set_lm_usage(self._accumulated_usage)
         return result
 
 
@@ -283,6 +290,7 @@ async def aforward_streaming(
                                     logger.warning(f"Unknown signature field name: {chunk.signature_field_name}")
                             elif isinstance(chunk, dspy.Prediction):
                                 # Final complete answer
+                                self._accumulate_usage(chunk)
                                 final_text = getattr(chunk, "answer", None) or chunk_accumulator
                                 yield StreamEvent(type=StreamEventType.FINAL_RESPONSE, data=final_text)
                                 rt.end(outputs={"output": final_text})
 
@@ -197,6 +197,33 @@ def to_dict(self) -> dict[str, Any]:
             "details": self.details,
             "timestamp": self.timestamp.isoformat(),
         }
+
+
+def combine_usage(usage1: LMUsage, usage2: LMUsage) -> LMUsage:
+    """Combine two LM usage dictionaries, tolerating missing inputs."""
+    result: LMUsage = {model: (metrics or {}).copy() for model, metrics in usage1.items()}
+
+    for model, metrics in usage2.items():
+        if model not in result:
+            result[model] = metrics.copy()
+        else:
+            # Merge metrics
+            for key, value in metrics.items():
+                if isinstance(value, int | float):
+                    result[model][key] = result[model].get(key, 0) + value
+                elif isinstance(value, dict):
+                    if key not in result[model] or result[model][key] is None:
+                        result[model][key] = value.copy()
+                    else:
+                        # Recursive merge for nested dicts
+                        for detail_key, detail_value in value.items():
+                            if isinstance(detail_value, int | float):
+                                result[model][key][detail_key] = (
+                                    result[model][key].get(detail_key, 0) + detail_value
+                                )
+    return result
+
+
 class AgentResponse(BaseModel):
     """Response from agent processing."""
 
 
@@ -565,7 +565,7 @@ def __init__(
 
     async def aforward(
         self, processed_query: ProcessedQuery, sources: list[DocumentSource] | None = None
-    ) -> list[Document]:
+    ) -> dspy.Prediction:
         """
         Execute the document retrieval process asynchronously.
 
@@ -574,7 +574,7 @@ async def aforward(
             sources: Optional list of DocumentSource to filter by
 
         Returns:
-            List of relevant Document objects, ranked by similarity
+            dspy.Prediction containing list of relevant Document objects, ranked by similarity
         """
         # Use sources from processed query if not provided
         if sources is None:
@@ -584,10 +584,15 @@ async def aforward(
         documents = await self._afetch_documents(processed_query, sources)
 
         if not documents:
-            return []
+            empty_prediction = dspy.Prediction(documents=[])
+            empty_prediction.set_lm_usage({})
+            return empty_prediction
 
         # Step 2: Enrich context with appropriate templates based on query type.
-        return self._enhance_context(processed_query, documents)
+        enhanced_documents = self._enhance_context(processed_query, documents)
+        prediction = dspy.Prediction(documents=enhanced_documents)
+        prediction.set_lm_usage({})
+        return prediction
 
     def forward(
         self, processed_query: ProcessedQuery, sources: list[DocumentSource] | None = None
@@ -701,7 +706,11 @@ def _enhance_context(self, processed_query: ProcessedQuery, context: list[Docume
             context.append(
                 Document(
                     page_content=CONTRACT_TEMPLATE,
-                    metadata={"title": CONTRACT_TEMPLATE_TITLE, "source": CONTRACT_TEMPLATE_TITLE, "sourceLink": "https://www.starknet.io/cairo-book/ch103-06-01-deploying-and-interacting-with-a-voting-contract.html"},
+                    metadata={
+                        "title": CONTRACT_TEMPLATE_TITLE,
+                        "source": DocumentSource.CAIRO_BOOK,
+                        "sourceLink": "https://www.starknet.io/cairo-book/ch103-06-01-deploying-and-interacting-with-a-voting-contract.html",
+                    },
                 )
             )
 
@@ -710,7 +719,11 @@ def _enhance_context(self, processed_query: ProcessedQuery, context: list[Docume
             context.append(
                 Document(
                     page_content=TEST_TEMPLATE,
-                    metadata={"title": TEST_TEMPLATE_TITLE, "source": TEST_TEMPLATE_TITLE, "sourceLink": "https://www.starknet.io/cairo-book/ch104-02-testing-smart-contracts.html"},
+                    metadata={
+                        "title": TEST_TEMPLATE_TITLE,
+                        "source": DocumentSource.CAIRO_BOOK,
+                        "sourceLink": "https://www.starknet.io/cairo-book/ch104-02-testing-smart-contracts.html",
+                    },
                 )
             )
         return context
 
@@ -96,7 +96,7 @@ def _domain_from_url(url: str) -> str:
             return url
 
     @traceable(name="GrokSearchProgram", run_type="llm")
-    async def aforward(self, processed_query: ProcessedQuery, chat_history: str) -> list[Document]:
+    async def aforward(self, processed_query: ProcessedQuery, chat_history: str) -> dspy.Prediction:
         formatted_query = f"""Answer the following query: {processed_query.original}. \
             Here is the chat history: {chat_history}, that might be relevant to the question. \
             For more context, here are some semantic terms associated with the question: \
@@ -148,4 +148,6 @@ async def aforward(self, processed_query: ProcessedQuery, chat_history: str) ->
             )
         )
 
-        return documents
+        prediction = dspy.Prediction(documents=documents)
+        prediction.set_lm_usage({})
+        return prediction
@@ -13,7 +13,7 @@
 from langsmith import traceable
 
 import dspy
-from cairo_coder.core.types import DocumentSource, ProcessedQuery, LMUsage
+from cairo_coder.core.types import DocumentSource, ProcessedQuery
 
 logger = structlog.get_logger(__name__)
 
@@ -125,7 +125,7 @@ def __init__(self):
         }
 
     @traceable(name="QueryProcessorProgram", run_type="llm", metadata={"llm_provider": dspy.settings.lm})
-    async def aforward(self, query: str, chat_history: Optional[str] = None) -> tuple[ProcessedQuery, LMUsage]:
+    async def aforward(self, query: str, chat_history: Optional[str] = None) -> dspy.Prediction:
         """
         Process a user query into a structured format for document retrieval.
 
@@ -134,7 +134,7 @@ async def aforward(self, query: str, chat_history: Optional[str] = None) -> tupl
             chat_history: Previous conversation context (optional)
 
         Returns:
-            ProcessedQuery with search terms, resource identification, and categorization
+            dspy.Prediction containing processed_query and attached usage
         """
         # Execute the DSPy retrieval program
         result = await self.retrieval_program.aforward(query=query, chat_history=chat_history)
@@ -151,7 +151,11 @@ async def aforward(self, query: str, chat_history: Optional[str] = None) -> tupl
             is_test_related=self._is_test_query(query),
             resources=resources,
         )
-        return processed_query, result.get_lm_usage()
+
+        prediction = dspy.Prediction(processed_query=processed_query)
+        prediction.set_lm_usage(result.get_lm_usage() or {})
+
+        return prediction
 
     def _validate_resources(self, resources: list[str]) -> list[DocumentSource]:
         """
 
@@ -16,7 +16,7 @@
 from langsmith import traceable
 
 import dspy
-from cairo_coder.core.types import Document
+from cairo_coder.core.types import Document, combine_usage
 from cairo_coder.dspy.document_retriever import CONTRACT_TEMPLATE_TITLE, TEST_TEMPLATE_TITLE
 
 logger = structlog.get_logger(__name__)
@@ -135,14 +135,16 @@ def __init__(self):
     @traceable(
         name="RetrievalJudge", run_type="llm", metadata={"llm_provider": dspy.settings.lm}
     )
-    async def aforward(self, query: str, documents: list[Document]) -> list[Document]:
+    async def aforward(self, query: str, documents: list[Document]) -> dspy.Prediction:
         """Async judge."""
         if not documents:
-            return documents
+            return dspy.Prediction(documents=documents)
 
         keep_docs, judged_indices, judged_payloads = self._split_templates_and_prepare_docs(
             documents
         )
+        
+        aggregated_usage = {}
 
         # TODO: can we use dspy.Parallel here instead of asyncio gather?
         if judged_payloads:
@@ -154,6 +156,12 @@ async def judge_one(doc_string: str):
                 results = await asyncio.gather(
                     *[judge_one(ds) for ds in judged_payloads], return_exceptions=True
                 )
+                
+                # Aggregate usage from results
+                for res in results:
+                    if isinstance(res, dspy.Prediction):
+                        aggregated_usage = combine_usage(aggregated_usage, res.get_lm_usage())
+
                 self._attach_scores_and_filter_async(
                     query=query,
                     documents=documents,
@@ -167,9 +175,11 @@ async def judge_one(doc_string: str):
                     error=str(e),
                     exc_info=True,
                 )
-                return documents
+                return dspy.Prediction(documents=documents)
 
-        return keep_docs
+        pred = dspy.Prediction(documents=keep_docs)
+        pred.set_lm_usage(aggregated_usage)
+        return pred
 
     # =========================
     # Internal Helpers
 
@@ -171,8 +171,7 @@ async def log_interaction_task(
         query=query,
         generated_answer=response.choices[0].message.content if response.choices else None,
         retrieved_sources=sources_data,
-        # TODO: fix LLM usage metrics
-        llm_usage={}
+        llm_usage=agent.get_lm_usage(),
     )
     await create_user_interaction(interaction)
 
@@ -203,7 +202,7 @@ async def log_interaction_raw(
         query=query,
         generated_answer=generated_answer,
         retrieved_sources=sources_data,
-        llm_usage={},
+        llm_usage=agent.get_lm_usage()
     )
     await create_user_interaction(interaction)
 
@@ -270,13 +269,15 @@ async def value_error_handler(request: Request, exc: ValueError):
             logger.warning("Bad request", error=str(exc), path=request.url.path)
             return JSONResponse(
                 status_code=400,
-                content=ErrorResponse(
-                    error=ErrorDetail(
-                        message=str(exc),
-                        type="invalid_request_error",
-                        code="invalid_request",
-                    )
-                ).model_dump(),
+                content={
+                    "detail": ErrorResponse(
+                        error=ErrorDetail(
+                            message=str(exc),
+                            type="invalid_request_error",
+                            code="invalid_request",
+                        )
+                    ).model_dump()
+                },
             )
 
         @self.app.exception_handler(Exception)
@@ -285,13 +286,15 @@ async def global_exception_handler(request: Request, exc: Exception):
             logger.error("Unhandled exception", error=str(exc), path=request.url.path, exc_info=True)
             return JSONResponse(
                 status_code=500,
-                content=ErrorResponse(
-                    error=ErrorDetail(
-                        message="Internal server error",
-                        type="server_error",
-                        code="internal_error",
-                    )
-                ).model_dump(),
+                content={
+                    "detail": ErrorResponse(
+                        error=ErrorDetail(
+                            message=f"Internal server error: {str(exc)}",
+                            type="server_error",
+                            code="internal_error",
+                        )
+                    ).model_dump()
+                },
             )
 
     def _setup_routes(self):
@@ -340,8 +343,19 @@ async def agent_chat_completions(
             agent_factory: AgentFactory = Depends(get_agent_factory),
         ):
             """Agent-specific chat completions"""
-            # Validate agent exists (will raise ValueError if not found, handled by global handler)
-            agent_factory.get_agent_info(agent_id=agent_id)
+            try:
+                agent_factory.get_agent_info(agent_id=agent_id)
+            except ValueError as exc:
+                raise HTTPException(
+                    status_code=404,
+                    detail={
+                        "error": {
+                            "message": str(exc),
+                            "type": "invalid_request_error",
+                            "code": "agent_not_found",
+                        }
+                    },
+                ) from exc
 
             # Determine MCP mode
             mcp_mode = bool(mcp or x_mcp_mode)
Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@ def _domain_from_url(url: str) -> str:`
`96`	`96`	`return url`
`97`	`97`
`98`	`98`	`@traceable(name="GrokSearchProgram", run_type="llm")`
`99`		`- async def aforward(self, processed_query: ProcessedQuery, chat_history: str) -> list[Document]:`
	`99`	`+ async def aforward(self, processed_query: ProcessedQuery, chat_history: str) -> dspy.Prediction:`
`100`	`100`	`formatted_query = f"""Answer the following query: {processed_query.original}. \`
`101`	`101`	`Here is the chat history: {chat_history}, that might be relevant to the question. \`
`102`	`102`	`For more context, here are some semantic terms associated with the question: \`
`@@ -148,4 +148,6 @@ async def aforward(self, processed_query: ProcessedQuery, chat_history: str) ->`
`148`	`148`	`)`
`149`	`149`	`)`
`150`	`150`
`151`		`- return documents`
	`151`	`+ prediction = dspy.Prediction(documents=documents)`
	`152`	`+ prediction.set_lm_usage({})`
	`153`	`+ return prediction`