8
8
from collections import defaultdict
9
9
import importlib
10
10
import sys
11
+ import shutil
12
+ import mlflow
11
13
12
14
from promptflow .client import load_flow
13
15
from azure .ai .evaluation import evaluate
@@ -30,6 +32,7 @@ def get_args():
30
32
default = "./preprocessed_data_output.jsonl" )
31
33
parser .add_argument ("--evaluated_data" , type = str , dest = "evaluated_data" , default = "./evaluated_data_output.jsonl" )
32
34
parser .add_argument ("--evaluators" , type = str , dest = "evaluators" )
35
+ parser .add_argument ("--evaluator_name_id_map" , type = str , dest = "evaluator_name_id_map" )
33
36
parser .add_argument ("--sampling_rate" , type = str , dest = "sampling_rate" , default = "1" )
34
37
35
38
args , _ = parser .parse_known_args ()
@@ -101,6 +104,24 @@ def download_evaluators_and_update_local_path(evaluators):
101
104
return evaluators
102
105
103
106
107
+ def copy_evaluator_files (command_line_args ):
108
+ """Copy the mounted evaluator files to the relative paths to enable read/write."""
109
+ evaluators = json .loads (command_line_args ["evaluators" ])
110
+ evaluator_name_id_map = json .loads (command_line_args ["evaluator_name_id_map" ])
111
+ for evaluator_name , evaluator_id in evaluator_name_id_map .items ():
112
+ dir_path = find_file_and_get_parent_dir (evaluator_id )
113
+ if dir_path :
114
+ shutil .copytree (dir_path , f"./{ evaluator_name } " )
115
+ logger .info (f"Copying { dir_path } to ./{ evaluator_name } " )
116
+ copied_dir = os .listdir (f"./{ evaluator_name } " )
117
+ logger .info (f"Directory ./{ evaluator_name } now contains: { copied_dir } " )
118
+ sys .path .append (os .path .abspath (f"./{ evaluator_name } " ))
119
+ evaluators [evaluator_name ]["local_path" ] = os .path .abspath (f"./{ evaluator_name } " )
120
+ else :
121
+ logger .info (f"Directory for evaluator { evaluator_name } not found." )
122
+ return evaluators
123
+
124
+
104
125
def load_evaluators (input_evaluators ):
105
126
"""Initialize the evaluators using correct parameters and credentials for rai evaluators."""
106
127
loaded_evaluators , loaded_evaluator_configs = {}, {}
@@ -112,16 +133,25 @@ def load_evaluators(input_evaluators):
112
133
init_params ["credential" ] = AzureMLOnBehalfOfCredential ()
113
134
loaded_evaluators [evaluator_name ] = flow (** init_params )
114
135
loaded_evaluator_configs [evaluator_name ] = {"column_mapping" : evaluator .get ("DataMapping" , {})}
136
+ logger .info (f"Loaded Evaluator: { flow } " )
137
+ logger .info (f"Using Evaluator: { loaded_evaluators [evaluator_name ]} " )
138
+ logger .info (f"Loaded evaluator config: { loaded_evaluator_configs [evaluator_name ]} " )
115
139
return loaded_evaluators , loaded_evaluator_configs
116
140
117
141
118
142
def run_evaluation (command_line_args , evaluators , evaluator_configs ):
119
143
"""Run the evaluation."""
120
144
# Todo: can we get only results back instead of the whole response?
145
+ logger .info (f"Running the evaluators: { list (evaluators .keys ())} " )
146
+ logger .info (f"With the evaluator config { evaluator_configs } " )
121
147
results = evaluate (data = command_line_args ["preprocessed_data" ], evaluators = evaluators ,
122
148
evaluator_config = evaluator_configs )
123
- logger .info ("Evaluation Completed" )
124
- logger .info ("results here" , results )
149
+ metrics = {}
150
+ for metric_name , metric_value in results ["metrics" ].items ():
151
+ logger .info (f"Logging metric added with name { metric_name } , and value { metric_value } " )
152
+ metrics [metric_name ] = metric_value
153
+ mlflow .log_metrics (metrics )
154
+ logger .info ("Evaluation Completed Successfully" )
125
155
final_results = defaultdict (list )
126
156
for result in results ["rows" ]:
127
157
for evaluator_name in evaluators :
@@ -130,11 +160,20 @@ def run_evaluation(command_line_args, evaluators, evaluator_configs):
130
160
if len (filtered_result ) == 1 :
131
161
final_results [evaluator_name ].append (filtered_result [list (filtered_result .keys ())[0 ]])
132
162
else :
163
+ if len (filtered_result ) == 0 :
164
+ logger .warning (f"No output score generated for current evaluator { evaluator_name } " )
133
165
logger .info (f"Found multiple results for { evaluator_name } . Adding as json string." )
134
166
final_results [evaluator_name ].append (json .dumps (filtered_result ))
135
167
final_results = pd .DataFrame (final_results )
136
168
logger .info (final_results )
137
169
final_results .to_json (command_line_args ["evaluated_data" ], orient = "records" , lines = True )
170
+ if results and results .get ("rows" ):
171
+ # Convert the results to a DataFrame
172
+ df = pd .DataFrame (results ["rows" ])
173
+
174
+ # Save the DataFrame as a JSONL file
175
+ df .to_json ("instance_results.jsonl" , orient = "records" , lines = True )
176
+ mlflow .log_artifact ("instance_results.jsonl" )
138
177
139
178
140
179
rai_evaluators = [
@@ -151,7 +190,8 @@ def run_evaluation(command_line_args, evaluators, evaluator_configs):
151
190
def run (args ):
152
191
"""Entry point of model prediction script."""
153
192
evaluators = json .loads (args ["evaluators" ])
154
- evaluators = download_evaluators_and_update_local_path (evaluators )
193
+ # evaluators = download_evaluators_and_update_local_path(evaluators)
194
+ evaluators = copy_evaluator_files (args )
155
195
evaluators , evaluator_configs = load_evaluators (evaluators )
156
196
run_evaluation (args , evaluators , evaluator_configs )
157
197
0 commit comments