@@ -120,7 +120,12 @@ async def list_evaluators(self) -> list[dict[str, Any]]:
120120 return result .get ("evaluators" , []) # type: ignore
121121
122122 async def run_evaluation (
123- self , evaluator_id : str , request : str , response : str , contexts : list [str ] | None = None
123+ self ,
124+ evaluator_id : str ,
125+ request : str ,
126+ response : str ,
127+ contexts : list [str ] | None = None ,
128+ expected_output : str | None = None ,
124129 ) -> dict [str , Any ]:
125130 """Run a standard evaluation using a RootSignals evaluator by ID.
126131
@@ -129,6 +134,7 @@ async def run_evaluation(
129134 request: The user request/query
130135 response: The model's response to evaluate
131136 contexts: Optional list of contexts (policy files, examples, etc.) used for generation. Only used for evaluators that require contexts.
137+ expected_output: Optional expected LLM response. Only used for evaluators that require expected output.
132138
133139 Returns:
134140 Evaluation result with score and justification
@@ -138,12 +144,18 @@ async def run_evaluation(
138144 "request" : request ,
139145 "response" : response ,
140146 "contexts" : contexts ,
147+ "expected_output" : expected_output ,
141148 }
142149
143150 return await self .call_tool ("run_evaluation" , arguments )
144151
145152 async def run_evaluation_by_name (
146- self , evaluator_name : str , request : str , response : str , contexts : list [str ] | None = None
153+ self ,
154+ evaluator_name : str ,
155+ request : str ,
156+ response : str ,
157+ contexts : list [str ] | None = None ,
158+ expected_output : str | None = None ,
147159 ) -> dict [str , Any ]:
148160 """Run a standard evaluation using a RootSignals evaluator by name.
149161
@@ -152,6 +164,7 @@ async def run_evaluation_by_name(
152164 request: The user request/query
153165 response: The model's response to evaluate
154166 contexts: Optional list of contexts (policy files, examples, etc.) used for generation. Only used for evaluators that require contexts.
167+ expected_output: Optional expected LLM response. Only used for evaluators that require expected output.
155168
156169 Returns:
157170 Evaluation result with score and justification
@@ -161,6 +174,7 @@ async def run_evaluation_by_name(
161174 "request" : request ,
162175 "response" : response ,
163176 "contexts" : contexts ,
177+ "expected_output" : expected_output ,
164178 }
165179
166180 return await self .call_tool ("run_evaluation_by_name" , arguments )
0 commit comments