From 791c6f6d5a093ce167d6a074419d3e42bffe70ac Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 11 Oct 2024 14:22:26 +0200 Subject: [PATCH 1/6] fix udf typo, remove unused func, clarify var names --- scripts/generate_aviti_run_manifest.py | 46 +++++++++----------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 37e2909f..fbce2bbd 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -203,8 +203,8 @@ def get_manifests(process: Process, manifest_root_name: str) -> list[tuple[str, else: row["Index1"] = idx # Assume long idx2 from recipe + no idx2 from label means idx2 is UMI - if int(process.udf.get("Index read 2", 0)) > 12: - row["Index2"] = "N" * int(process.udf["Index read 2"]) + if int(process.udf.get("Index Read 2", 0)) > 12: + row["Index2"] = "N" * int(process.udf["Index Read 2"]) else: row["Index2"] = "" row["Lane"] = lane @@ -333,24 +333,8 @@ def make_manifest( return (file_name, manifest_contents) -def fit_seq(seq: str, length: int, seq_extension: str | None = None) -> str: - """Fit a sequence to a given length by extending or truncating.""" - if len(seq) == length: - return seq - elif len(seq) > length: - return seq[:length] - else: - if seq_extension is None: - raise AssertionError("Can't extend sequence without extension string.") - else: - if length - len(seq) > len(seq_extension): - raise AssertionError( - "Extension string too short to fit sequence to desired length." - ) - return seq + seq_extension[: length - len(seq)] - - -def check_distances(rows: list[dict], threshold=3) -> None: +def check_distances(rows: list[dict], threshold=2) -> None: + """Iterator function to check index distances between all pairs of samples.""" for i in range(len(rows)): row = rows[i] @@ -369,26 +353,28 @@ def check_pair_distance(row, row_comp, check_flips: bool = False, threshold: int """ if check_flips: - flips = [] - for a1, _a1 in zip( - [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"] + flips: list[tuple[str, str, str]] = [] + for s1i1, s1i1_name in zip( + [row["Index1"], revcomp(row["Index1"])], + ["Index1", "Index1_rc"], ): - for a2, _a2 in zip( - [row["Index2"], revcomp(row["Index2"])], ["Index2", "Index2_rc"] + for s1i2, s1i2_name in zip( + [row["Index2"], revcomp(row["Index2"])], + ["Index2", "Index2_rc"], ): - for b1, _b1 in zip( + for s2i1, s2i1_name in zip( [row_comp["Index1"], revcomp(row_comp["Index1"])], ["Index1", "Index1_rc"], ): - for b2, _b2 in zip( + for s2i2, s2i2_name in zip( [row_comp["Index2"], revcomp(row_comp["Index2"])], ["Index2", "Index2_rc"], ): flips.append( ( - distance(a1, b1) + distance(a2, b2), - f"{a1}-{a2} {b1}-{b2}", - f"{_a1}-{_a2} {_b1}-{_b2}", + distance(s1i1, s2i1) + distance(s1i2, s2i2), + f"{s1i1}-{s1i2} {s2i1}-{s2i2}", + f"{s1i1_name}-{s1i2_name} {s2i1_name}-{s2i2_name}", ) ) dist, compared_seqs, flip_conf = min(flips, key=lambda x: x[0]) From db385ecee64e69c3e0241d93cdf41cbd9a042da1 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 14 Oct 2024 11:21:51 +0200 Subject: [PATCH 2/6] make phix manifest variant --- scripts/generate_aviti_run_manifest.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index fbce2bbd..309d1d25 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -259,7 +259,7 @@ def get_manifests(process: Process, manifest_root_name: str) -> list[tuple[str, # Start building manifests manifests: list[tuple[str, str]] = [] - for manifest_type in ["untrimmed", "trimmed", "empty"]: + for manifest_type in ["untrimmed", "trimmed", "phix", "empty"]: manifest_name, manifest_contents = make_manifest( df_samples_and_controls, process, @@ -292,6 +292,7 @@ def make_manifest( ].copy() file_name = f"{manifest_root_name}_{manifest_type}.csv" + runValues_section = "\n".join( [ "[RUNVALUES]", @@ -320,6 +321,10 @@ def make_manifest( samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}" + elif manifest_type == "phix": + df = df[df["Project"] == "Control"] + samples_section = f"[SAMPLES]\n{df.to_csv(index=None, header=True)}" + elif manifest_type == "empty": samples_section = "" From 4be364651844371c8528f2ba2545771e82e6d27a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 14 Oct 2024 11:22:57 +0200 Subject: [PATCH 3/6] bump vlog --- VERSIONLOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index fc4cce2f..5e59a8a8 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20241014.1 + +For AVITI manifest generation: make PhiX manifest variant, fix udf typo, remove unused func, clarify var names + ## 20241009.1 Improve AVITI run manifest generation with sample-level settings. No longer produce submanifests. From 6ef71b754ffb808e54e34cc6e1df5cfbe64e758c Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 14 Oct 2024 11:27:51 +0200 Subject: [PATCH 4/6] mypy --- scripts/generate_aviti_run_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 309d1d25..ce4adb36 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -358,7 +358,7 @@ def check_pair_distance(row, row_comp, check_flips: bool = False, threshold: int """ if check_flips: - flips: list[tuple[str, str, str]] = [] + flips: list[tuple[int, str, str]] = [] for s1i1, s1i1_name in zip( [row["Index1"], revcomp(row["Index1"])], ["Index1", "Index1_rc"], From 2bc45adbd4f70eb8443ab651c7d6609ced74ad8a Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 4 Nov 2024 14:27:29 +0100 Subject: [PATCH 5/6] add user-vs-inhouse library boolean and logic for rev-comping index2 of certain samples --- scripts/generate_aviti_run_manifest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index ce4adb36..4138cf81 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -189,6 +189,12 @@ def get_manifests(process: Process, manifest_root_name: str) -> list[tuple[str, if sample.project: project = sample.project.name.replace(".", "__").replace(",", "") seq_setup = sample.project.udf.get("Sequencing setup", "0-0") + user_library = ( + True + if sample.project.udf["Library construction method"] + == "Finished library (by user)" + else False + ) else: project = "Control" seq_setup = "0-0" @@ -200,6 +206,16 @@ def get_manifests(process: Process, manifest_root_name: str) -> list[tuple[str, row["SampleName"] = sample.name if isinstance(idx, tuple): row["Index1"], row["Index2"] = idx + # Special cases to reverse-complement index2 + if user_library or ( + not user_library + and ( + TENX_DUAL_PAT.findall(lims_label) + or SMARTSEQ_PAT.findall(lims_label) + ) + ): + logging.info(f"Reverse-complementing index2 of {sample.name}.") + row["Index2"] = revcomp(row["Index2"]) else: row["Index1"] = idx # Assume long idx2 from recipe + no idx2 from label means idx2 is UMI From ab77d5ee87a756e4a934b302c41d43ac2570139d Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 4 Nov 2024 14:44:34 +0100 Subject: [PATCH 6/6] invert logic --- scripts/generate_aviti_run_manifest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_aviti_run_manifest.py b/scripts/generate_aviti_run_manifest.py index 4138cf81..54a9ada2 100644 --- a/scripts/generate_aviti_run_manifest.py +++ b/scripts/generate_aviti_run_manifest.py @@ -207,8 +207,8 @@ def get_manifests(process: Process, manifest_root_name: str) -> list[tuple[str, if isinstance(idx, tuple): row["Index1"], row["Index2"] = idx # Special cases to reverse-complement index2 - if user_library or ( - not user_library + if not user_library or ( + user_library and ( TENX_DUAL_PAT.findall(lims_label) or SMARTSEQ_PAT.findall(lims_label)