@@ -120,7 +120,12 @@ async def list_evaluators(self) -> list[dict[str, Any]]:
120
120
return result .get ("evaluators" , []) # type: ignore
121
121
122
122
async def run_evaluation (
123
- self , evaluator_id : str , request : str , response : str , contexts : list [str ] | None = None
123
+ self ,
124
+ evaluator_id : str ,
125
+ request : str ,
126
+ response : str ,
127
+ contexts : list [str ] | None = None ,
128
+ expected_output : str | None = None ,
124
129
) -> dict [str , Any ]:
125
130
"""Run a standard evaluation using a RootSignals evaluator by ID.
126
131
@@ -129,6 +134,7 @@ async def run_evaluation(
129
134
request: The user request/query
130
135
response: The model's response to evaluate
131
136
contexts: Optional list of contexts (policy files, examples, etc.) used for generation. Only used for evaluators that require contexts.
137
+ expected_output: Optional expected LLM response. Only used for evaluators that require expected output.
132
138
133
139
Returns:
134
140
Evaluation result with score and justification
@@ -138,12 +144,18 @@ async def run_evaluation(
138
144
"request" : request ,
139
145
"response" : response ,
140
146
"contexts" : contexts ,
147
+ "expected_output" : expected_output ,
141
148
}
142
149
143
150
return await self .call_tool ("run_evaluation" , arguments )
144
151
145
152
async def run_evaluation_by_name (
146
- self , evaluator_name : str , request : str , response : str , contexts : list [str ] | None = None
153
+ self ,
154
+ evaluator_name : str ,
155
+ request : str ,
156
+ response : str ,
157
+ contexts : list [str ] | None = None ,
158
+ expected_output : str | None = None ,
147
159
) -> dict [str , Any ]:
148
160
"""Run a standard evaluation using a RootSignals evaluator by name.
149
161
@@ -152,6 +164,7 @@ async def run_evaluation_by_name(
152
164
request: The user request/query
153
165
response: The model's response to evaluate
154
166
contexts: Optional list of contexts (policy files, examples, etc.) used for generation. Only used for evaluators that require contexts.
167
+ expected_output: Optional expected LLM response. Only used for evaluators that require expected output.
155
168
156
169
Returns:
157
170
Evaluation result with score and justification
@@ -161,6 +174,7 @@ async def run_evaluation_by_name(
161
174
"request" : request ,
162
175
"response" : response ,
163
176
"contexts" : contexts ,
177
+ "expected_output" : expected_output ,
164
178
}
165
179
166
180
return await self .call_tool ("run_evaluation_by_name" , arguments )
0 commit comments