Skip to content

Commit

Permalink
Merge pull request #409 from kedhammar/dev
Browse files Browse the repository at this point in the history
Update how Anglerfish is applied
  • Loading branch information
kedhammar authored Jan 30, 2024
2 parents 3076197 + 0baa97c commit 1912ea8
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 11 deletions.
4 changes: 4 additions & 0 deletions taca/analysis/analysis_nanopore.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ def process_qc_run(ont_qc_run: ONT_qc_run):
logger.info(
f"{ont_qc_run.run_name}: Could not find Anglerfish sample sheet, skipping."
)
elif not ont_qc_run.has_fastq_output():
logger.info(
f"{ont_qc_run.run_name}: Run has no fastq output, skipping."
)
else:
logger.info(f"{ont_qc_run.run_name}: Starting Anglerfish...")
ont_qc_run.run_anglerfish()
Expand Down
58 changes: 47 additions & 11 deletions taca/nanopore/ONT_run_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,32 @@ def fetch_anglerfish_samplesheet(self) -> bool:
raise RsyncError(
f"{self.run_name}: Error occured when copying anglerfish samplesheet to run dir."
)

def has_fastq_output(self) -> bool:
"""Check whether run has fastq output."""

reads_dir = os.path.join(self.run_abspath, "fastq_pass")

return os.path.exists(reads_dir)

def has_barcode_dirs(self) -> bool:

barcode_dir_pattern = r"barcode\d{2}"

for dir in os.listdir(os.path.join(self.run_abspath, "fastq_pass")):
if re.search(barcode_dir_pattern, dir):
return True

def run_anglerfish(self):
"""Run Anglerfish as subprocess within it's own Conda environment.
Dump files to indicate ongoing and finished processes.
"""

timestamp = datetime.now().strftime("%Y_%m_%d_%H%M%S")

# "anglerfish_run*" is the dir pattern recognized by the LIMS script parsing the results
anglerfish_run_name = "anglerfish_run"

n_threads = 2 # This could possibly be changed

anglerfish_command = [
Expand All @@ -475,9 +494,18 @@ def run_anglerfish(self):
f"--run_name {anglerfish_run_name}",
f"--threads {n_threads}",
"--lenient",
"--ont_barcodes",
"--skip_demux",
]
if self.has_barcode_dirs():
anglerfish_command.append("--barcoding")

# Create dir to trace TACA executing Anglerfish as a subprocess
taca_anglerfish_run_dir = f"taca_anglerfish_run_{timestamp}"
os.mkdir(taca_anglerfish_run_dir)
# Copy samplesheet used for traceability
shutil.copy(self.anglerfish_samplesheet, f"{taca_anglerfish_run_dir}/")
# Create files to dump subprocess std
stderr_relpath = f"{taca_anglerfish_run_dir}/stderr.txt"

full_command = [
# Dump subprocess PID into 'run-ongoing'-indicator file.
Expand All @@ -486,20 +514,28 @@ def run_anglerfish(self):
"conda run -n anglerfish " + " ".join(anglerfish_command),
# Dump Anglerfish exit code into file
f"echo $? > {self.anglerfish_done_abspath}",
# Copy the Anglerfish samplesheet used to start the run into the run dir, for traceability
# (The correct anglerfish run dir is identified by it being younger than the "run-ongoing" file)
f"new_runs=$(find . -type d -name 'anglerfish_run*' -newer {self.anglerfish_ongoing_abspath})",
f"if [[ $(echo '${{new_runs}}' | wc -l) -eq 1 ]] ; then cp {self.anglerfish_samplesheet} ${{new_runs}}/ ; fi",
# Regardless of exit status: Remove 'run-ongoing' file.
# Move run to subdir
# 1) Find the latest Anglerfish run dir (younger than the 'run-ongoing' file)
f'find {self.run_abspath} -name "anglerfish_run*" -type d -newer {self.run_abspath}/.anglerfish_ongoing '
# 2) Move the Anglerfish run dir into the TACA Anglerfish run dir
+ '-exec mv \{\} ' + f'{self.run_abspath}/{taca_anglerfish_run_dir}/ \; '
# 3) Only do this once
+ '-quit',
# Remove 'run-ongoing' file.
f"rm {self.anglerfish_ongoing_abspath}",
]

with open(f"{taca_anglerfish_run_dir}/command.sh", "w") as stream:
stream.write("\n".join(full_command))

# Start Anglerfish subprocess
process = subprocess.Popen(
"; ".join(full_command),
shell=True,
cwd=self.run_abspath,
)
with open(stderr_relpath, 'w') as stderr:
process = subprocess.Popen(
f"bash {taca_anglerfish_run_dir}/command.sh",
shell=True,
cwd=self.run_abspath,
stderr=stderr,
)
logger.info(
f"{self.run_name}: Anglerfish subprocess started with process ID {process.pid}."
)

0 comments on commit 1912ea8

Please sign in to comment.