KasarLabs
diff --git a/‎README.md‎
Lines changed: 3 additions & 2 deletions b/‎README.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎python/optimizers/results/optimized_rag.json‎
Lines changed: 27 additions & 0 deletions b/‎python/optimizers/results/optimized_rag.json‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎python/src/cairo_coder/core/agent_factory.py‎
Lines changed: 5 additions & 3 deletions b/‎python/src/cairo_coder/core/agent_factory.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎python/src/cairo_coder/core/rag_pipeline.py‎
Lines changed: 41 additions & 10 deletions b/‎python/src/cairo_coder/core/rag_pipeline.py‎
Lines changed: 41 additions & 10 deletions
diff --git a/‎python/src/cairo_coder/dspy/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎python/src/cairo_coder/dspy/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/src/cairo_coder/dspy/document_retriever.py‎
Lines changed: 27 additions & 20 deletions b/‎python/src/cairo_coder/dspy/document_retriever.py‎
Lines changed: 27 additions & 20 deletions
@@ -182,8 +182,9 @@ The RAG pipeline is implemented in the `python/src/cairo_coder/core/` directory
 
 1.  **QueryProcessorProgram**: Analyzes user queries to extract semantic search queries and identify relevant documentation sources.
 2.  **DocumentRetrieverProgram**: Retrieves relevant Cairo documentation from the vector database.
-3.  **GenerationProgram**: Generates Cairo code and explanations based on the retrieved context.
-4.  **RagPipeline**: Orchestrates the entire RAG process, chaining the modules together.
+3.  **RetrievalJudge**: LLM-based judge that scores retrieved documents for relevance, filtering out low-quality results.
+4.  **GenerationProgram**: Generates Cairo code and explanations based on the retrieved context.
+5.  **RagPipeline**: Orchestrates the entire RAG process, chaining the modules together.
 
 ## Development
 
 
@@ -77,6 +77,33 @@
   "document_retriever.vector_db": {
     "k": 5
   },
+  "retrieval_judge.rater": {
+    "traces": [],
+    "train": [],
+    "demos": [],
+    "signature": {
+      "instructions": "Compare a system's retrieval response to the query and rate how much it can be leveraged to answer the query. When asked to reason, enumerate key ideas in each response, and whether they are present in the expected output. A document is considered useful if it is directly relevant to the query, or if it is informative and can be useful for context. For example, if the query is about creating or fixing a smart contract, then, an example of a smart contract, even if not _directly_ related, is considered useful. If the query is about a specific Cairo language feature, then a document about that feature is considered useful. Contract and test templates are always considered useful.",
+      "fields": [
+        {
+          "prefix": "Query:",
+          "description": "User's specific Cairo programming question or request for code generation"
+        },
+        {
+          "prefix": "System Resource:",
+          "description": "Single resource text (content + minimal metadata/title)"
+        },
+        {
+          "prefix": "Reasoning:",
+          "description": "A short sentence, on why a selected resource will be useful. If it's not selected, reason about why it's not going to be useful. Start by Resource <resource_title>..."
+        },
+        {
+          "prefix": "Resource Note",
+          "description": "A note between 0 and 1.0 on how useful the resource is to directly answer the query. 0 being completely unrelated, 1.0 being very relevant, 0.5 being 'not directly relatd but still informative and can be useful for context."
+        }
+      ]
+    },
+    "lm": null
+  },
   "generation_program.generation_program.predict": {
     "traces": [],
     "train": [],
 
@@ -12,6 +12,9 @@
 from cairo_coder.core.config import AgentConfiguration, VectorStoreConfig
 from cairo_coder.core.rag_pipeline import RagPipeline, RagPipelineFactory
 from cairo_coder.core.types import DocumentSource, Message
+from cairo_coder.utils.logging import get_logger
+
+logger = get_logger(__name__)
 
 
 @dataclass
@@ -91,7 +94,6 @@ def create_agent(
             vector_db=vector_db,
         )
 
-
     @staticmethod
     def create_agent_by_id(
         query: str,
@@ -178,7 +180,7 @@ def get_or_create_agent(
 
         return agent
 
-    def clear_cache(self):
+    def clear_cache(self) -> None:
         """Clear the agent cache."""
         self._agent_cache.clear()
 
@@ -276,7 +278,7 @@ def _create_pipeline_from_config(
 
         Args:
             agent_config: Agent configuration
-            vector_store: Vector store for document retrieval
+            vector_store_config: Vector store for document retrieval
             query: User's query
             history: Chat history
             mcp_mode: Whether to use MCP mode
 
@@ -26,10 +26,13 @@
 from cairo_coder.dspy.document_retriever import DocumentRetrieverProgram
 from cairo_coder.dspy.generation_program import GenerationProgram, McpGenerationProgram
 from cairo_coder.dspy.query_processor import QueryProcessorProgram
+from cairo_coder.dspy.retrieval_judge import RetrievalJudge
 from cairo_coder.utils.logging import get_logger
 
 logger = get_logger(__name__)
 
+SOURCE_PREVIEW_MAX_LEN = 200
+
 
 # 1. Define a custom callback class that extends BaseCallback class
 class AgentLoggingCallback(BaseCallback):
@@ -38,28 +41,28 @@ def on_module_start(
         call_id: str,
         instance: Any,
         inputs: dict[str, Any],
-    ):
+    ) -> None:
         logger.debug("Starting module", call_id=call_id, inputs=inputs)
 
     # 2. Implement on_module_end handler to run a custom logging code.
-    def on_module_end(self, call_id, outputs, exception):
+    def on_module_end(self, call_id: str, outputs: dict[str, Any], exception: Exception | None) -> None:
         step = "Reasoning" if self._is_reasoning_output(outputs) else "Acting"
         logger.debug(f"== {step} Step ===")
         for k, v in outputs.items():
             logger.debug(f"  {k}: {v}")
         logger.debug("\n")
 
-    def _is_reasoning_output(self, outputs):
+    def _is_reasoning_output(self, outputs: dict[str, Any]) -> bool:
         return any(k.startswith("Thought") for k in outputs if isinstance(k, str))
 
 
 class LangsmithTracingCallback(BaseCallback):
     @traceable()
-    def on_lm_start(self, call_id, instance, inputs):
+    def on_lm_start(self, call_id: str, instance: Any, inputs: dict[str, Any]) -> None:
         pass
 
     @traceable()
-    def on_lm_end(self, call_id, outputs, exception):
+    def on_lm_end(self, call_id: str, outputs: dict[str, Any], exception: Exception | None) -> None:
         pass
 
 
@@ -103,6 +106,7 @@ def __init__(self, config: RagPipelineConfig):
         self.document_retriever = config.document_retriever
         self.generation_program = config.generation_program
         self.mcp_generation_program = config.mcp_generation_program
+        self.retrieval_judge = RetrievalJudge()
 
         # Pipeline state
         self._current_processed_query: ProcessedQuery | None = None
@@ -122,6 +126,19 @@ def _process_query_and_retrieve_docs(
         documents = self.document_retriever.forward(
             processed_query=processed_query, sources=retrieval_sources
         )
+
+        # Apply LLM judge if enabled
+        try:
+            with dspy.context(lm=dspy.LM("gemini/gemini-2.5-flash-lite", max_tokens=10000)):
+                documents = self.retrieval_judge.forward(query=query, documents=documents)
+        except Exception as e:
+            logger.warning(
+                "Retrieval judge failed (sync), using all documents",
+                error=str(e),
+                exc_info=True,
+            )
+            # documents already contains all retrieved docs, no action needed
+
         self._current_documents = documents
 
         return processed_query, documents
@@ -142,6 +159,18 @@ async def _aprocess_query_and_retrieve_docs(
         documents = await self.document_retriever.aforward(
             processed_query=processed_query, sources=retrieval_sources
         )
+
+        try:
+            with dspy.context(lm=dspy.LM("gemini/gemini-2.5-flash-lite", max_tokens=10000)):
+                documents = await self.retrieval_judge.aforward(query=query, documents=documents)
+        except Exception as e:
+            logger.warning(
+                "Retrieval judge failed (async), using all documents",
+                error=str(e),
+                exc_info=True,
+            )
+            # documents already contains all retrieved docs, no action needed
+
         self._current_documents = documents
 
         return processed_query, documents
@@ -258,12 +287,13 @@ async def forward_streaming(
             logger.error("Pipeline error", error=e)
             yield StreamEvent(StreamEventType.ERROR, data=f"Pipeline error: {str(e)}")
 
-    def get_lm_usage(self) -> dict[str, int]:
+    def get_lm_usage(self) -> dict[str, dict[str, int]]:
         """
         Get the total number of tokens used by the LLM.
         """
         generation_usage = self.generation_program.get_lm_usage()
         query_usage = self.query_processor.get_lm_usage()
+        judge_usage = self.retrieval_judge.get_lm_usage()
 
         # Additive merge strategy
         merged_usage = {}
@@ -278,6 +308,7 @@ def merge_usage_dict(target: dict, source: dict) -> None:
 
         merge_usage_dict(merged_usage, generation_usage)
         merge_usage_dict(merged_usage, query_usage)
+        merge_usage_dict(merged_usage, judge_usage)
 
         return merged_usage
 
@@ -317,8 +348,8 @@ def _format_sources(self, documents: list[Document]) -> list[dict[str, Any]]:
                 "title": doc.metadata.get("title", "Untitled"),
                 "url": doc.metadata.get("url", "#"),
                 "source_display": doc.metadata.get("source_display", "Unknown Source"),
-                "content_preview": doc.page_content[:200]
-                + ("..." if len(doc.page_content) > 200 else ""),
+                "content_preview": doc.page_content[:SOURCE_PREVIEW_MAX_LEN]
+                + ("..." if len(doc.page_content) > SOURCE_PREVIEW_MAX_LEN else ""),
             }
             sources.append(source_info)
 
@@ -481,7 +512,7 @@ def create_pipeline(
 
     @staticmethod
     def create_scarb_pipeline(
-        name: str, vector_store_config: VectorStoreConfig, **kwargs
+        name: str, vector_store_config: VectorStoreConfig, **kwargs: Any
     ) -> RagPipeline:
         """
         Create a Scarb-specialized RAG Pipeline.
@@ -511,7 +542,7 @@ def create_scarb_pipeline(
         )
 
 
-def create_rag_pipeline(name: str, vector_store_config: VectorStoreConfig, **kwargs) -> RagPipeline:
+def create_rag_pipeline(name: str, vector_store_config: VectorStoreConfig, **kwargs: Any) -> RagPipeline:
     """
     Convenience function to create a RAG Pipeline.
 
 
@@ -15,6 +15,7 @@
     create_mcp_generation_program,
 )
 from .query_processor import QueryProcessorProgram, create_query_processor
+from .retrieval_judge import RetrievalJudge
 
 __all__ = [
     "QueryProcessorProgram",
@@ -24,4 +25,5 @@
     "McpGenerationProgram",
     "create_generation_program",
     "create_mcp_generation_program",
+    "RetrievalJudge",
 ]
@@ -19,8 +19,9 @@
 logger = structlog.get_logger()
 
 # Templates for different types of requests
+CONTRACT_TEMPLATE_TITLE = "Contract Template"
 CONTRACT_TEMPLATE = """
-contract>
+<contract>
 use starknet::ContractAddress;
 
 // Define the contract interface
@@ -61,15 +62,20 @@
 
     #[derive(Drop, starknet::Event)]
     pub struct DataRegistered {
-        user: ContractAddress,
-        data: felt252,
+        pub user: ContractAddress,
+        pub data: felt252,
     }
 
     #[derive(Drop, starknet::Event)]
     pub struct DataUpdated {
-        user: ContractAddress,
-        index: u64,
-        new_data: felt252,
+        pub user: ContractAddress,
+        pub index: u64,
+        pub new_data: felt252,
+    }
+
+    #[constructor]
+    fn constructor(ref self: ContractState, initial_data: usize) {
+        self.foo.write(initial_data);
     }
 
     // Implement the contract interface
@@ -137,8 +143,9 @@
 Never add comments with urls to sources in the code that you produce.
 """
 
+TEST_TEMPLATE_TITLE = "Contract Testing Template"
 TEST_TEMPLATE = """
-contract_test>
+<contract_test>
 // Import the contract module itself
 use registry::Registry;
 // Make the required inner structs available in scope
@@ -167,7 +174,7 @@
     // 4. Create a dispatcher to interact with the contract
     let contract = declare("Registry");
     let mut constructor_args = array![];
-    Serde::serialize(@1_u8, ref constructor_args);
+    Serde::serialize(@0_u8, ref constructor_args);
     let (contract_address, _err) = contract
         .unwrap()
         .contract_class()
@@ -194,11 +201,11 @@
 
     // Verify the data was stored correctly
     let stored_data = dispatcher.get_data(0);
-    assert(stored_data == 42, 'Wrong stored data');
+    assert_eq!(stored_data, 42);
 
     // Verify user-specific data
     let user_data = dispatcher.get_user_data(caller);
-    assert(user_data == 42, 'Wrong user data');
+    assert_eq!(user_data, 42);
 
     // Verify event emission:
     // 1. Create the expected event
@@ -231,11 +238,11 @@
 
     // Verify the update
     let updated_data = dispatcher.get_data(0);
-    assert(updated_data == 100, 'Wrong updated data');
+    assert_eq!(updated_data, 100);
 
     // Verify user data was updated
     let user_data = dispatcher.get_user_data(caller);
-    assert(user_data == 100, 'Wrong updated user data');
+    assert_eq!(user_data, 100);
 
     // Verify update event
     let expected_updated_event = Registry::Event::DataUpdated(
@@ -264,16 +271,16 @@
     let all_data = dispatcher.get_all_data();
 
     // Verify array contents
-    assert(*all_data.at(0) == 10, 'Wrong data at index 0');
-    assert(*all_data.at(1) == 20, 'Wrong data at index 1');
-    assert(*all_data.at(2) == 30, 'Wrong data at index 2');
-    assert(all_data.len() == 3, 'Wrong array length');
+    assert_eq!(*all_data.at(0), 10);
+    assert_eq!(*all_data.at(1), 20);
+    assert_eq!(*all_data.at(2), 30);
+    assert_eq!(all_data.len(), 3);
 
     stop_cheat_caller_address(dispatcher.contract_address);
 }
 
 #[test]
-#[should_panic(expected: "Index out of bounds")]
+#[should_panic(expected : "Index out of bounds")]
 fn test_get_data_out_of_bounds() {
     let dispatcher = deploy_contract();
 
@@ -690,7 +697,7 @@ def _enhance_context(self, processed_query: ProcessedQuery, context: list[Docume
             context.append(
                 Document(
                     page_content=CONTRACT_TEMPLATE,
-                    metadata={"title": "contract_template", "source": "contract_template"},
+                    metadata={"title": CONTRACT_TEMPLATE_TITLE, "source": CONTRACT_TEMPLATE_TITLE},
                 )
             )
 
@@ -699,7 +706,7 @@ def _enhance_context(self, processed_query: ProcessedQuery, context: list[Docume
             context.append(
                 Document(
                     page_content=TEST_TEMPLATE,
-                    metadata={"title": "test_template", "source": "test_template"},
+                    metadata={"title": TEST_TEMPLATE_TITLE, "source": TEST_TEMPLATE_TITLE},
                 )
             )
         return context
@@ -739,4 +746,4 @@ def create_document_retriever(
         max_source_count=max_source_count,
         similarity_threshold=similarity_threshold,
         embedding_model=embedding_model,
-    )
+    )
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@`
`15`	`15`	`create_mcp_generation_program,`
`16`	`16`	`)`
`17`	`17`	`from .query_processor import QueryProcessorProgram, create_query_processor`
	`18`	`+from .retrieval_judge import RetrievalJudge`
`18`	`19`
`19`	`20`	`__all__ = [`
`20`	`21`	`"QueryProcessorProgram",`
`@@ -24,4 +25,5 @@`
`24`	`25`	`"McpGenerationProgram",`
`25`	`26`	`"create_generation_program",`
`26`	`27`	`"create_mcp_generation_program",`
	`28`	`+ "RetrievalJudge",`
`27`	`29`	`]`