From ae38b73fc21ba44a06c5092a73125e0a22036717 Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Thu, 12 Sep 2024 08:57:46 +0200 Subject: [PATCH 1/2] Update AVITI run stats parser to handle multiple lanes --- VERSIONLOG.md | 4 +++ scripts/aviti_run_parameter_parser.py | 39 ++++++++++++++------------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 39608378..7b2f1fd4 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20240912.1 + +Update AVITI run stats parser to handle multiple lanes + ## 20240910.5 Fix simple naming bug. diff --git a/scripts/aviti_run_parameter_parser.py b/scripts/aviti_run_parameter_parser.py index d914c328..1c7a85fc 100644 --- a/scripts/aviti_run_parameter_parser.py +++ b/scripts/aviti_run_parameter_parser.py @@ -55,7 +55,6 @@ def attach_json_files(process, run_dir): lims.upload_new_file(outart, f"{run_dir}/AvitiRunStats.json") except OSError: sys.stderr.write("No AvitiRunStats.json found") - sys.exit(2) def parse_run_parameters(run_dir): @@ -158,25 +157,27 @@ def calculate_mean(input_list, key): def set_run_stats(process, run_dir): + global lane_stats run_stats = parse_run_stats(run_dir) - art = process.input_output_maps[0][0]["uri"] - - for read in run_stats["RunStats"]["Reads"]: - read_key = read["Read"] - art[f"Reads PF (M) {read_key}"] = run_stats["RunStats"]["PFCount"] / 1000000 - art[f"%PF {read_key}"] = run_stats["RunStats"]["PercentPF"] - art[f"Yield PF (Gb) {read_key}"] = ( - run_stats["RunStats"]["TotalYield"] / 1000000000 - ) - art[f"% Aligned {read_key}"] = read["PhiXAlignmentRate"] - art[f"% Bases >=Q30 {read_key}"] = calculate_mean(read["Cycles"], "PercentQ30") - art[f"% Bases >=Q40 {read_key}"] = calculate_mean(read["Cycles"], "PercentQ40") - art[f"Avg Q Score {read_key}"] = calculate_mean(read["Cycles"], "AverageQScore") - art[f"% Error Rate {read_key}"] = calculate_mean( - read["Cycles"], "PercentPhixErrorRate" - ) - - art.put() + for art in process.all_outputs(): + if "Lane" in art.name: + lane_nbr = int(art.name.split(" ")[1]) + lane_stats = next(d for d in run_stats["LaneStats"] if d["Lane"] == lane_nbr) + for read in lane_stats["Reads"]: + read_key = read["Read"] + art.udf[f"Reads PF (M) {read_key}"] = lane_stats["PFCount"] / 1000000 + art.udf[f"%PF {read_key}"] = lane_stats["PercentPF"] + art.udf[f"Yield PF (Gb) {read_key}"] = ( + lane_stats["TotalYield"] / 1000000000 + ) + art.udf[f"% Aligned {read_key}"] = read["PhiXAlignmentRate"] + art.udf[f"% Bases >=Q30 {read_key}"] = calculate_mean(read["Cycles"], "PercentQ30") + art.udf[f"% Bases >=Q40 {read_key}"] = calculate_mean(read["Cycles"], "PercentQ40") + art.udf[f"Avg Q Score {read_key}"] = calculate_mean(read["Cycles"], "AverageQScore") + art.udf[f"% Error Rate {read_key}"] = calculate_mean( + read["Cycles"], "PercentPhixErrorRate" + ) + art.put() process.put() From f067cfb079208e3f0c7558c85aea5a15d1cbfafe Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Thu, 12 Sep 2024 08:59:31 +0200 Subject: [PATCH 2/2] ruff format --- scripts/aviti_run_parameter_parser.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scripts/aviti_run_parameter_parser.py b/scripts/aviti_run_parameter_parser.py index 1c7a85fc..19bd402a 100644 --- a/scripts/aviti_run_parameter_parser.py +++ b/scripts/aviti_run_parameter_parser.py @@ -162,18 +162,24 @@ def set_run_stats(process, run_dir): for art in process.all_outputs(): if "Lane" in art.name: lane_nbr = int(art.name.split(" ")[1]) - lane_stats = next(d for d in run_stats["LaneStats"] if d["Lane"] == lane_nbr) + lane_stats = next( + d for d in run_stats["LaneStats"] if d["Lane"] == lane_nbr + ) for read in lane_stats["Reads"]: read_key = read["Read"] art.udf[f"Reads PF (M) {read_key}"] = lane_stats["PFCount"] / 1000000 art.udf[f"%PF {read_key}"] = lane_stats["PercentPF"] - art.udf[f"Yield PF (Gb) {read_key}"] = ( - lane_stats["TotalYield"] / 1000000000 - ) + art.udf[f"Yield PF (Gb) {read_key}"] = lane_stats["TotalYield"] / 1000000000 art.udf[f"% Aligned {read_key}"] = read["PhiXAlignmentRate"] - art.udf[f"% Bases >=Q30 {read_key}"] = calculate_mean(read["Cycles"], "PercentQ30") - art.udf[f"% Bases >=Q40 {read_key}"] = calculate_mean(read["Cycles"], "PercentQ40") - art.udf[f"Avg Q Score {read_key}"] = calculate_mean(read["Cycles"], "AverageQScore") + art.udf[f"% Bases >=Q30 {read_key}"] = calculate_mean( + read["Cycles"], "PercentQ30" + ) + art.udf[f"% Bases >=Q40 {read_key}"] = calculate_mean( + read["Cycles"], "PercentQ40" + ) + art.udf[f"Avg Q Score {read_key}"] = calculate_mean( + read["Cycles"], "AverageQScore" + ) art.udf[f"% Error Rate {read_key}"] = calculate_mean( read["Cycles"], "PercentPhixErrorRate" )