Skip to content

Commit 4262fdc

Browse files
authored
Merge branch 'master' into hashing
2 parents 6554c7d + 0e502b2 commit 4262fdc

11 files changed

+847
-96
lines changed

VERSIONLOG.md

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,43 @@
11
# Scilifelab_epps Version Log
22

3-
## 20250403.1
3+
## 20250509.1
44

55
Create project automation to apply sample antibodies from CSV to UDFs.
66

7+
## 20250411.2
8+
9+
Change SQL query for finding sample-label mappings in a pool to use pool artifact backtracking rather than using a submitted sample global query.
10+
11+
## 20250411.1
12+
13+
Introduce EPP for generic UDF calculations.
14+
15+
## 20250410.3
16+
17+
Change get_epp_user query to account for differet epp triggers
18+
19+
## 20250410.2
20+
21+
Patch last PR, don't include file slot artifacts in logic for parsing demux artifacts.
22+
23+
## 20250410.1
24+
25+
Introduce new EPP to fix bugged demux artifacts with multiple samples in the name by using sample-label linkage to identify the "correct" sample and rename the demux artifact accordingly.
26+
Demux EPP: Add logic block to handle demux artifacts being tied to multiple samples, in which case try to choose a single sample matching the name of the demux artifact.
27+
Utility function for mapping samples to labels in pool: Move into main module
28+
29+
## 20250408.1
30+
31+
Set up fall-back for get_epp_user() failing in wrapper.
32+
33+
## 20250405.1
34+
35+
Convert list of artifacts to set to get unique values to avoid repeating lines in running notes
36+
37+
## 20250431.1
38+
39+
Update EPP wrapper to send log messages to stdout.
40+
741
## 20250328.1
842

943
Add script to add running notes from the step Load to Flowcell (NovaSeqXPlus) v1.0.

scilifelab_epps/epp.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
from shutil import copy
1717
from time import localtime, strftime
1818

19+
import psycopg2
20+
import yaml
1921
from genologics.config import MAIN_LOG
2022
from genologics.entities import Artifact, Process
2123
from genologics.lims import Lims
@@ -568,3 +570,78 @@ def upload_file(
568570
if remove:
569571
os.remove(file_path)
570572
logging.info(f"'{file_path}' removed from local filesystem.")
573+
574+
575+
def get_pool_sample_label_mapping(pool: Artifact) -> dict[str, str]:
576+
"""Given a pool artifact containing labeled samples, use database queries to
577+
build a dictionary mapping each sample name to its reagent label.
578+
"""
579+
with open("/opt/gls/clarity/users/glsai/config/genosqlrc.yaml") as f:
580+
config = yaml.safe_load(f)
581+
582+
# Setup DB connection
583+
connection = psycopg2.connect(
584+
user=config["username"],
585+
host=config["url"],
586+
database=config["db"],
587+
password=config["password"],
588+
)
589+
cursor = connection.cursor()
590+
591+
"""Supply a pool artifact ID and a sample name:
592+
1. Find the ancestor artifacts of the pool artifact.
593+
2. Filter for derived sample artifacts
594+
3. Filter for artifacts sharing a name with the target sample
595+
4. Filter for artifacts with reagent labels
596+
"""
597+
query = """--sql
598+
SELECT
599+
DISTINCT rl.name
600+
FROM
601+
-- Table mapping artifact IDs to ancestor artifact IDs
602+
artifact_ancestor_map aam
603+
-- Join artifact information on ancestor artifact IDs
604+
JOIN artifact parent ON aam.ancestorartifactid = parent.artifactid
605+
-- Join reagent label information on ancestor artifact IDs
606+
LEFT JOIN artifact_label_map alm ON parent.artifactid = alm.artifactid
607+
LEFT JOIN reagentlabel rl ON rl.labelid = alm.labelid
608+
WHERE
609+
-- The pool artifact ID is used to find its ancestors
610+
aam.artifactid = {}
611+
-- Filter for derived sample artifacts
612+
AND parent.artifacttypeid = 2
613+
-- Filter for artifacts sharing a name with the target sample
614+
AND parent.name = '{}'
615+
-- Filter for artifacts with reagent labels
616+
AND rl.name IS NOT NULL;
617+
"""
618+
619+
errors = False
620+
sample2label = {}
621+
pool_db_id = int(pool.id.split("-")[1])
622+
for sample in pool.samples:
623+
try:
624+
cursor.execute(query.format(pool_db_id, sample.name))
625+
query_results = cursor.fetchall()
626+
627+
assert len(query_results) != 0, (
628+
f"No reagent labels found for sample '{sample.name}'."
629+
)
630+
assert len(query_results) == 1, (
631+
f"Multiple reagent labels found for sample '{sample.name}'."
632+
)
633+
634+
label = query_results[0][0]
635+
sample2label[sample.name] = label
636+
except AssertionError as e:
637+
logging.error(str(e), exc_info=True)
638+
logging.warning(f"Skipping sample '{sample.name}' due to error.")
639+
errors = True
640+
continue
641+
642+
if errors:
643+
raise AssertionError(
644+
"Errors occurred when linking samples and indices. Please report this error."
645+
)
646+
else:
647+
return sample2label

scilifelab_epps/utils/get_epp_user.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,22 @@ def get_epp_user(lims, procid):
1515
with open("/opt/gls/clarity/users/glsai/config/genosqlrc.yaml") as f:
1616
config = yaml.safe_load(f)
1717

18-
query = (
19-
"SELECT ps.researcherid "
20-
"FROM principals ps "
21-
"JOIN externalprogramstatus eps ON ps.principalid = eps.ownerid "
22-
"JOIN auditchangelog acl ON acl.rowpk = CAST(eps.externalprogramstatusid AS TEXT) "
23-
"AND acl.tablename = 'externalprogramstatus' "
24-
"JOIN auditeventlog ael ON ael.eventid = acl.eventid "
25-
"WHERE ael.eventtype = 'EPP_EXECUTE' "
26-
f"AND eps.processid = {procid.split('-')[1]} "
27-
"AND eps.status = 'RUNNING';"
28-
)
18+
# When an epp is launched, 2 events are created in auditeventlog, one for event (i.e. advancing in a step,
19+
# clicking the button and so on) and one for consumption of the next EPP request by the API. Both of them map to the
20+
# the same rowpk in auditchangelog. This rowpk maps to the externalprogramstatusid in the table externalprogramstatus.
21+
# So by checking for 'EPP_CONSUME_NEXT_REQUEST' and the process thats currently running we should be able to get
22+
# the user that launched the EPP.
23+
query = f"""--sql
24+
SELECT ps.researcherid
25+
FROM principals ps
26+
JOIN externalprogramstatus eps ON ps.principalid = eps.ownerid
27+
JOIN auditchangelog acl ON acl.rowpk = CAST(eps.externalprogramstatusid AS TEXT)
28+
AND acl.tablename = 'externalprogramstatus'
29+
JOIN auditeventlog ael ON ael.eventid = acl.eventid
30+
WHERE ael.eventtype ='EPP_CONSUME_NEXT_REQUEST'
31+
AND eps.processid = {procid.split("-")[1]}
32+
AND eps.status = 'RUNNING';
33+
"""
2934

3035
with psycopg2.connect(
3136
user=config["username"],

scilifelab_epps/utils/udf_tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def fetch_last(
224224
logging.info(
225225
f"Found target UDF '{target_udf}'"
226226
+ f" with value '{current_art.udf[target_udf]}'"
227-
+ f" in process {steps_visited[-1]}"
227+
+ f" in process {steps_visited[-1] if pp else ''}"
228228
+ f" {'output' if pp else 'input'}"
229229
+ f" artifact '{current_art.name}' ({current_art.id})"
230230
)

scilifelab_epps/wrapper.py

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,29 @@
55
import sys
66

77
from genologics.config import BASEURI, PASSWORD, USERNAME
8-
from genologics.entities import Process
8+
from genologics.entities import Process, Researcher
99
from genologics.lims import Lims
1010

1111
from scilifelab_epps.epp import upload_file
12+
from scilifelab_epps.utils.get_epp_user import get_epp_user
13+
14+
15+
class TrackingRootLogger(logging.RootLogger):
16+
"""A root logger that tracks whether any errors or warnings have been emitted."""
17+
18+
def __init__(self, level):
19+
"""Initialize with the given level and errors_or_warnings set to False."""
20+
super().__init__(level)
21+
self.errors_or_warnings = False
22+
23+
def handle(self, record):
24+
"""Handle a log record and track if it's an error or warning."""
25+
# Check if the record is a warning or error before handling it
26+
if record.levelno >= logging.WARNING:
27+
self.errors_or_warnings = True
28+
29+
# Let the parent class handle the record normally
30+
return super().handle(record)
1231

1332

1433
def epp_decorator(script_path: str, timestamp: str):
@@ -28,29 +47,55 @@ def epp_wrapper(args):
2847
lims.check_version()
2948
process = Process(lims, id=args.pid)
3049

50+
# Get EPP user
51+
try:
52+
epp_user: Researcher = get_epp_user(lims, args.pid)
53+
except ValueError:
54+
epp_user = None
55+
3156
# Name log file
3257
log_filename: str = (
3358
"_".join(
3459
[
3560
script_name,
3661
process.id,
3762
timestamp,
38-
process.technician.name.replace(" ", ""),
63+
(epp_user or process.technician).name.replace(" ", ""),
3964
]
4065
)
4166
+ ".log"
4267
)
4368

4469
# Set up logging
45-
logging.basicConfig(
46-
filename=log_filename,
47-
filemode="w",
48-
format="%(levelname)s: %(message)s",
49-
level=logging.INFO,
50-
)
70+
71+
# Set custom subclass as root logger
72+
logger = TrackingRootLogger(level=logging.INFO)
73+
logging.root = logger
74+
75+
# Clear any existing handlers (to avoid duplicates)
76+
for handler in logger.handlers[:]:
77+
logger.removeHandler(handler)
78+
79+
# Create file handler
80+
file_handler = logging.FileHandler(log_filename, mode="w")
81+
file_handler.setLevel(logging.INFO)
82+
file_handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
83+
84+
# Create stdout handler
85+
stdout_handler = logging.StreamHandler(sys.stdout)
86+
stdout_handler.setLevel(logging.INFO)
87+
stdout_handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
88+
89+
# Add both handlers to logger
90+
logger.addHandler(file_handler)
91+
logger.addHandler(stdout_handler)
5192

5293
# Start logging
53-
logging.info(f"Script '{script_name}' started at {timestamp}.")
94+
if not epp_user:
95+
logging.warning("No EPP user found for process ID {args.pid}.")
96+
logging.info(
97+
f"Script '{script_name}' started at {timestamp} by {(epp_user.name if epp_user else 'unknown')}."
98+
)
5499
logging.info(
55100
f"Launched in step '{process.type.name}' ({process.id}) opened by {process.technician.name}."
56101
)
@@ -80,23 +125,23 @@ def epp_wrapper(args):
80125

81126
# On script success
82127
else:
83-
logging.info("Script completed successfully.")
128+
logging.info("Script finished successfully. Uploading log file.")
84129
logging.shutdown()
85130
upload_file(
86131
file_path=log_filename,
87132
file_slot=args.log,
88133
process=process,
89134
lims=lims,
135+
remove=True,
90136
)
91137
# Check log for errors and warnings
92-
log_content = open(log_filename).read()
93-
os.remove(log_filename)
94-
if "ERROR:" in log_content or "WARNING:" in log_content:
138+
if logger.errors_or_warnings:
95139
sys.stderr.write(
96140
"Script finished successfully, but log contains errors or warnings, please have a look."
97141
)
98142
sys.exit(2)
99143
else:
144+
sys.stdout.write("Script finished successfully.")
100145
sys.exit(0)
101146

102147
return epp_wrapper

0 commit comments

Comments
 (0)