Skip to content

Commit 3fcebcb

Browse files
apeddauppariAnkush Bhatia
andauthored
AOAI eval fixesOnline Evaluation Schedule AOAI Evaluators Support (#3568)
* AOAI eval fixes * Fix for evaluator issues and logger * Bug fixes * Remove debug logs * check-style fix and logger changes * Added logger to check query results --------- Co-authored-by: Ankush Bhatia <[email protected]>
1 parent 8efee95 commit 3fcebcb

File tree

4 files changed

+52
-11
lines changed

4 files changed

+52
-11
lines changed

assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/evaluate.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from collections import defaultdict
99
import importlib
1010
import sys
11+
import shutil
12+
import mlflow
1113

1214
from promptflow.client import load_flow
1315
from azure.ai.evaluation import evaluate
@@ -30,6 +32,7 @@ def get_args():
3032
default="./preprocessed_data_output.jsonl")
3133
parser.add_argument("--evaluated_data", type=str, dest="evaluated_data", default="./evaluated_data_output.jsonl")
3234
parser.add_argument("--evaluators", type=str, dest="evaluators")
35+
parser.add_argument("--evaluator_name_id_map", type=str, dest="evaluator_name_id_map")
3336
parser.add_argument("--sampling_rate", type=str, dest="sampling_rate", default="1")
3437

3538
args, _ = parser.parse_known_args()
@@ -101,6 +104,24 @@ def download_evaluators_and_update_local_path(evaluators):
101104
return evaluators
102105

103106

107+
def copy_evaluator_files(command_line_args):
108+
"""Copy the mounted evaluator files to the relative paths to enable read/write."""
109+
evaluators = json.loads(command_line_args["evaluators"])
110+
evaluator_name_id_map = json.loads(command_line_args["evaluator_name_id_map"])
111+
for evaluator_name, evaluator_id in evaluator_name_id_map.items():
112+
dir_path = find_file_and_get_parent_dir(evaluator_id)
113+
if dir_path:
114+
shutil.copytree(dir_path, f"./{evaluator_name}")
115+
logger.info(f"Copying {dir_path} to ./{evaluator_name}")
116+
copied_dir = os.listdir(f"./{evaluator_name}")
117+
logger.info(f"Directory ./{evaluator_name} now contains: {copied_dir}")
118+
sys.path.append(os.path.abspath(f"./{evaluator_name}"))
119+
evaluators[evaluator_name]["local_path"] = os.path.abspath(f"./{evaluator_name}")
120+
else:
121+
logger.info(f"Directory for evaluator {evaluator_name} not found.")
122+
return evaluators
123+
124+
104125
def load_evaluators(input_evaluators):
105126
"""Initialize the evaluators using correct parameters and credentials for rai evaluators."""
106127
loaded_evaluators, loaded_evaluator_configs = {}, {}
@@ -112,16 +133,25 @@ def load_evaluators(input_evaluators):
112133
init_params["credential"] = AzureMLOnBehalfOfCredential()
113134
loaded_evaluators[evaluator_name] = flow(**init_params)
114135
loaded_evaluator_configs[evaluator_name] = {"column_mapping": evaluator.get("DataMapping", {})}
136+
logger.info(f"Loaded Evaluator: {flow}")
137+
logger.info(f"Using Evaluator: {loaded_evaluators[evaluator_name]}")
138+
logger.info(f"Loaded evaluator config: {loaded_evaluator_configs[evaluator_name]}")
115139
return loaded_evaluators, loaded_evaluator_configs
116140

117141

118142
def run_evaluation(command_line_args, evaluators, evaluator_configs):
119143
"""Run the evaluation."""
120144
# Todo: can we get only results back instead of the whole response?
145+
logger.info(f"Running the evaluators: {list(evaluators.keys())}")
146+
logger.info(f"With the evaluator config {evaluator_configs}")
121147
results = evaluate(data=command_line_args["preprocessed_data"], evaluators=evaluators,
122148
evaluator_config=evaluator_configs)
123-
logger.info("Evaluation Completed")
124-
logger.info("results here", results)
149+
metrics = {}
150+
for metric_name, metric_value in results["metrics"].items():
151+
logger.info(f"Logging metric added with name {metric_name}, and value {metric_value}")
152+
metrics[metric_name] = metric_value
153+
mlflow.log_metrics(metrics)
154+
logger.info("Evaluation Completed Successfully")
125155
final_results = defaultdict(list)
126156
for result in results["rows"]:
127157
for evaluator_name in evaluators:
@@ -130,11 +160,20 @@ def run_evaluation(command_line_args, evaluators, evaluator_configs):
130160
if len(filtered_result) == 1:
131161
final_results[evaluator_name].append(filtered_result[list(filtered_result.keys())[0]])
132162
else:
163+
if len(filtered_result) == 0:
164+
logger.warning(f"No output score generated for current evaluator {evaluator_name}")
133165
logger.info(f"Found multiple results for {evaluator_name}. Adding as json string.")
134166
final_results[evaluator_name].append(json.dumps(filtered_result))
135167
final_results = pd.DataFrame(final_results)
136168
logger.info(final_results)
137169
final_results.to_json(command_line_args["evaluated_data"], orient="records", lines=True)
170+
if results and results.get("rows"):
171+
# Convert the results to a DataFrame
172+
df = pd.DataFrame(results["rows"])
173+
174+
# Save the DataFrame as a JSONL file
175+
df.to_json("instance_results.jsonl", orient="records", lines=True)
176+
mlflow.log_artifact("instance_results.jsonl")
138177

139178

140179
rai_evaluators = [
@@ -151,7 +190,8 @@ def run_evaluation(command_line_args, evaluators, evaluator_configs):
151190
def run(args):
152191
"""Entry point of model prediction script."""
153192
evaluators = json.loads(args["evaluators"])
154-
evaluators = download_evaluators_and_update_local_path(evaluators)
193+
# evaluators = download_evaluators_and_update_local_path(evaluators)
194+
evaluators = copy_evaluator_files(args)
155195
evaluators, evaluator_configs = load_evaluators(evaluators)
156196
run_evaluation(args, evaluators, evaluator_configs)
157197

assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/evaluate_online.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import preprocess
77
import evaluate
88
import postprocess
9+
import mlflow
910

1011

1112
def get_args():
@@ -23,6 +24,7 @@ def get_args():
2324
default="./preprocessed_data_output.jsonl")
2425
parser.add_argument("--evaluated_data", type=str, dest="evaluated_data", default="./evaluated_data_output.jsonl")
2526
parser.add_argument("--evaluators", type=str, dest="evaluators")
27+
parser.add_argument("--evaluator_name_id_map", type=str, dest="evaluator_name_id_map")
2628
parser.add_argument("--service_name", type=str, dest="service_name", default="evaluation.app")
2729

2830
args, _ = parser.parse_known_args()
@@ -40,4 +42,5 @@ def run():
4042

4143

4244
if __name__ == "__main__":
43-
run()
45+
with mlflow.start_run() as _run:
46+
run()

assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/postprocess.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from opentelemetry import _logs
1313
from opentelemetry.trace.span import TraceFlags
1414
from opentelemetry.sdk._logs import LoggerProvider
15-
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor, ConsoleLogExporter
15+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
1616
from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter
1717

1818
import logging
@@ -41,7 +41,6 @@ def configure_logging(args) -> LoggerProvider:
4141
logger.info("Configuring logging")
4242
provider = LoggerProvider()
4343
_logs.set_logger_provider(provider)
44-
provider.add_log_record_processor(BatchLogRecordProcessor(ConsoleLogExporter()))
4544
args["connection_string"] = None if args["connection_string"] == "" else args["connection_string"]
4645
provider.add_log_record_processor(
4746
BatchLogRecordProcessor(AzureMonitorLogExporter(connection_string=args["connection_string"])))
@@ -103,10 +102,7 @@ def get_combined_data(preprocessed_data, evaluated_data, service_name):
103102

104103
def run(args):
105104
"""Entry point of model prediction script."""
106-
logger.info(
107-
f"Sampling Rate: {args['sampling_rate']}, Connection String: {args['connection_string']}, "
108-
f"Service Name: {args['service_name']}"
109-
)
105+
logger.info(f"Commandline args:> Service Name: {args['service_name']}")
110106
provider = configure_logging(args)
111107
data = get_combined_data(args["preprocessed_data"], args["evaluated_data"],
112108
args["service_name"])

assets/evaluation_on_cloud/environments/evaluations-built-in/context/online_eval/preprocess.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def get_logs(client, resource_id: str, query: str, start_time: datetime, end_tim
6767
raise Exception(f"Unable to parse query results. Unexpected number of tables: {len(data)}.")
6868
table = data[0]
6969
df = pd.DataFrame(data=table.rows, columns=table.columns)
70+
logger.info(f"Query returned {len(df)} rows, {len(df.columns)} columns, and df.columns: {df.columns}")
7071
return df
7172
except Exception as e:
7273
logger.info("something fatal happened")
@@ -76,8 +77,9 @@ def get_logs(client, resource_id: str, query: str, start_time: datetime, end_tim
7677
def save_output(result, args):
7778
"""Save output."""
7879
try:
79-
logger.info("Saving output.")
8080
# Todo: One conversation will be split across multiple rows. how to combine them?
81+
logger.info(f"Saving output to {args['preprocessed_data']}")
82+
logger.info(f"First few rows of output: {result.head()}")
8183
result.to_json(args["preprocessed_data"], orient="records", lines=True)
8284
except Exception as e:
8385
logger.info("Unable to save output.")

0 commit comments

Comments
 (0)