Skip to content

Commit d5ae87d

Browse files
Merge pull request #9 from MatildaAslin/sample_with_multiple_sample_ids
Sample with multiple sample ids
2 parents 7b8957f + 1a35214 commit d5ae87d

File tree

5 files changed

+625
-29
lines changed

5 files changed

+625
-29
lines changed

projman_filler/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
__author__ = """Johan Dahlberg"""
66
__email__ = 'johan.dahlberg@medsci.uu.se'
7-
__version__ = '1.3.0'
7+
__version__ = '1.4.0'

projman_filler/sample_level_statistics.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@ def calculate_sample_statistics(flowcell_name, conversion_results, reads_and_cyc
4444
for sample_demux_result in sample_demux_results:
4545
# In samplesheets from the lims this is prefixed with the project name
4646
sample_name = sample_demux_result["SampleName"]
47+
sample_id = sample_demux_result["SampleId"]
4748

48-
sample_project = samplesheet.project_for_sample(sample_name, lane_nbr)
49-
sample_library_name = samplesheet.library_name_for_sample(sample_name, lane_nbr)
49+
sample_project = samplesheet.project_for_sample(sample_id, lane_nbr)
50+
sample_library_name = samplesheet.library_name_for_sample(sample_id, lane_nbr)
5051

5152
sample_yield = float(sample_demux_result["Yield"])
5253
fraction_of_lane = sample_yield / lane_yield

projman_filler/samplesheet.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,20 @@ def row_to_sample_row(index_and_row):
9393
samples = map(row_to_sample_row, samplesheet_df.iterrows())
9494
return list(samples)
9595

96-
def _get_matching_sample(self, sample_name, lane):
97-
matching = list(filter(lambda x: x.sample_name == sample_name and x.lane == lane, self.samples))
96+
def _get_matching_sample(self, sample_id, lane):
97+
98+
matching = list(filter(lambda x: x.sample_id == sample_id and x.lane == lane, self.samples))
9899

99100
if len(matching) != 1:
100101
raise KeyError()
101102
else:
102103
return matching[0]
103104

104-
def project_for_sample(self, sample_name, lane):
105-
return self._get_matching_sample(sample_name, lane).sample_project
105+
def project_for_sample(self, sample_id, lane):
106+
return self._get_matching_sample(sample_id, lane).sample_project
106107

107-
def library_name_for_sample(self, sample_name, lane):
108-
description = self._get_matching_sample(sample_name, lane).description
108+
def library_name_for_sample(self, sample_id, lane):
109+
description = self._get_matching_sample(sample_id, lane).description
109110
matching = list(filter(lambda x: "LIBRARY_NAME" in x, description.split(";")))
110111
if len(matching) != 1:
111112
return None

tests/test_sample_level_statistics.py

Lines changed: 86 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from projman_filler.models.db_models import SampleResult
44
from projman_filler.sample_level_statistics import calculate_sample_statistics
55

6-
from tests.test_utils import conversion_results, conversion_results_without_index_metrics, conversion_results_sample_with_no_reads
6+
from tests.test_utils import *
77

88

99
class TestSampleLevelStatistics(unittest.TestCase):
@@ -22,16 +22,16 @@ class TestSampleLevelStatistics(unittest.TestCase):
2222

2323
class SampleSheetMock(object):
2424
def __init__(self):
25-
self.project_dict = {"A": "Project1",
26-
"B": "Project2",
27-
"C": "Project1",
28-
"D": "Project2"}
25+
self.project_dict = {"Sample_A": "Project1",
26+
"Sample_B": "Project2",
27+
"Sample_C": "Project1",
28+
"Sample_D": "Project2"}
2929

30-
def project_for_sample(self, sample_name, lane):
31-
return self.project_dict[sample_name]
30+
def project_for_sample(self, sample_id, lane):
31+
return self.project_dict[sample_id]
3232

33-
def library_name_for_sample(self, sample_name, lane):
34-
return "{}.library".format(sample_name)
33+
def library_name_for_sample(self, sample_id, lane):
34+
return "{}.library".format(sample_id)
3535

3636
samplesheet_mock = SampleSheetMock()
3737

@@ -50,32 +50,32 @@ def test_calculate_sample_level_statistics(self):
5050
'tag_seq': 'GTAGAGGA-CTCTCTAT', 'lane_num': 1, 'read_num': 1, 'cycles': 151,
5151
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
5252
'pct_q30': 98.02429332249935, 'pct_tag_err': 0.671112157794024,
53-
'library_name': 'A.library', 'mean_q': 38.84148990743496},
53+
'library_name': 'Sample_A.library', 'mean_q': 38.84148990743496},
5454
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
5555
'tag_seq': 'GTAGAGGA-CTCTCTAT', 'lane_num': 1, 'read_num': 2, 'cycles': 151,
5656
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
5757
'pct_q30': 96.45192508767363, 'pct_tag_err': 0.671112157794024,
58-
'library_name': 'A.library', 'mean_q': 38.373262536376345},
58+
'library_name': 'Sample_A.library', 'mean_q': 38.373262536376345},
5959
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
6060
'tag_seq': 'TAGGCATG-CTCTCTAT', 'lane_num': 1, 'read_num': 1, 'cycles': 151,
6161
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
6262
'pct_q30': 98.02429332249935, 'pct_tag_err': 0.7880181078880083,
63-
'library_name': 'A.library', 'mean_q': 38.84148990743496},
63+
'library_name': 'Sample_A.library', 'mean_q': 38.84148990743496},
6464
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
6565
'tag_seq': 'TAGGCATG-CTCTCTAT', 'lane_num': 1, 'read_num': 2, 'cycles': 151,
6666
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
6767
'pct_q30': 96.45192508767363, 'pct_tag_err': 0.7880181078880083,
68-
'library_name': 'A.library', 'mean_q': 38.373262536376345},
68+
'library_name': 'Sample_A.library', 'mean_q': 38.373262536376345},
6969
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
7070
'tag_seq': 'TCCTGAGC-CTCTCTAT', 'lane_num': 1, 'read_num': 1, 'cycles': 151,
7171
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
7272
'pct_q30': 98.02429332249935, 'pct_tag_err': 0.7687463809335591,
73-
'library_name': 'A.library', 'mean_q': 38.84148990743496},
73+
'library_name': 'Sample_A.library', 'mean_q': 38.84148990743496},
7474
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
7575
'tag_seq': 'TCCTGAGC-CTCTCTAT', 'lane_num': 1, 'read_num': 2, 'cycles': 151,
7676
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
7777
'pct_q30': 96.45192508767363, 'pct_tag_err': 0.7687463809335591,
78-
'library_name': 'A.library', 'mean_q': 38.373262536376345}]
78+
'library_name': 'Sample_A.library', 'mean_q': 38.373262536376345}]
7979

8080
expected_sample_a = list(map(lambda x: SampleResult(**x), list_of_values_for_a))
8181
self.assertListEqual(expected_sample_a, actual_sample_a)
@@ -95,12 +95,12 @@ def test_calculate_sample_level_statistics_without_index_metrics(self):
9595
'tag_seq': 'unknown', 'lane_num': 1, 'read_num': 1, 'cycles': 151,
9696
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
9797
'pct_q30': 98.02429332249935, 'pct_tag_err': None,
98-
'library_name': 'A.library', 'mean_q': 38.84148990743496},
98+
'library_name': 'Sample_A.library', 'mean_q': 38.84148990743496},
9999
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
100100
'tag_seq': 'unknown', 'lane_num': 1, 'read_num': 2, 'cycles': 151,
101101
'pct_lane': 49.91040361971908, 'pf_clusters': 81217423.0,
102102
'pct_q30': 96.45192508767363, 'pct_tag_err': None,
103-
'library_name': 'A.library', 'mean_q': 38.373262536376345}]
103+
'library_name': 'Sample_A.library', 'mean_q': 38.373262536376345}]
104104

105105
expected_sample_a = list(map(lambda x: SampleResult(**x), list_of_values_for_a))
106106
self.assertListEqual(expected_sample_a, actual_sample_a)
@@ -121,12 +121,79 @@ def test_calculate_sample_level_statistics_sample_with_no_reads(self):
121121
'tag_seq': 'GTAGAGGA-CTCTCTAT', 'lane_num': 1, 'read_num': 1, 'cycles': 151,
122122
'pct_lane': 0, 'pf_clusters': 0,
123123
'pct_q30': None, 'pct_tag_err': None,
124-
'library_name': 'A.library', 'mean_q': None},
124+
'library_name': 'Sample_A.library', 'mean_q': None},
125125
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
126126
'tag_seq': 'GTAGAGGA-CTCTCTAT', 'lane_num': 1, 'read_num': 2, 'cycles': 151,
127127
'pct_lane': 0, 'pf_clusters': 0,
128128
'pct_q30': None, 'pct_tag_err': None,
129-
'library_name': 'A.library', 'mean_q': None}]
129+
'library_name': 'Sample_A.library', 'mean_q': None}]
130+
131+
expected_sample_a = list(map(lambda x: SampleResult(**x), list_of_values_for_a))
132+
self.assertListEqual(expected_sample_a, actual_sample_a)
133+
134+
def test_calculate_sample_level_statistics_samples_with_multiple_sample_ids(self):
135+
#For this test we need to update the list of Sample IDs
136+
self.samplesheet_mock_multiple_sampleIDs = self.SampleSheetMock()
137+
self.samplesheet_mock_multiple_sampleIDs.project_dict = {"SI-GA-D1_1": "Project1",
138+
"SI-GA-D1_2": "Project1",
139+
"SI-GA-D1_3": "Project1",
140+
"SI-GA-D1_4": "Project1",
141+
"SI-GA-F2_1": "Project2",
142+
"SI-GA-F2_2": "Project2",
143+
"SI-GA-F2_3": "Project2",
144+
"SI-GA-F2_4": "Project2",
145+
"SI-GA-E1_1": "Project3",
146+
"SI-GA-E1_2": "Project3",
147+
"SI-GA-E1_3": "Project3",
148+
"SI-GA-E1_4": "Project3",
149+
"SI-GA-F1_1": "Project4",
150+
"SI-GA-F1_2": "Project4",
151+
"SI-GA-F1_3": "Project4",
152+
"SI-GA-F1_4": "Project4"}
153+
154+
actual = list(calculate_sample_statistics(flowcell_name=self.flowcell_id,
155+
conversion_results=conversion_results_multiple_sampleIDs_per_sampleName,
156+
reads_and_cycles=self.reads_and_cycles,
157+
samplesheet=self.samplesheet_mock_multiple_sampleIDs))
158+
159+
# One row per sample, lane, index and read
160+
self.assertEqual(len(actual), 4*1*4*2)
161+
162+
actual_sample_a = list(filter(lambda x: x.sample_name == 'A', actual))
163+
164+
list_of_values_for_a = [
165+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
166+
'lane_num': 1, 'pct_lane': 6.115905919800121, 'library_name': 'SI-GA-D1_1.library',
167+
'tag_seq': 'CACTCGGA', 'pct_tag_err': 3.621424494761074, 'read_num': 1, 'cycles': 151,
168+
'mean_q': 39.16345840568938, 'pct_q30': 92.45344394187265, 'pf_clusters': 2360121.0},
169+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
170+
'lane_num': 1, 'pct_lane': 6.115905919800121, 'library_name': 'SI-GA-D1_1.library',
171+
'tag_seq': 'CACTCGGA', 'pct_tag_err': 3.621424494761074, 'read_num': 2, 'cycles': 151,
172+
'mean_q': 36.559150347300495, 'pct_q30': 84.04189940076341, 'pf_clusters': 2360121.0},
173+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
174+
'lane_num': 1, 'pct_lane': 5.757983102514638, 'library_name': 'SI-GA-D1_2.library',
175+
'tag_seq': 'GCTGAATT', 'pct_tag_err': 3.720388713046226, 'read_num': 1, 'cycles': 151,
176+
'mean_q': 39.23153569054157, 'pct_q30': 92.65040530937226, 'pf_clusters': 2221999.0},
177+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
178+
'lane_num': 1, 'pct_lane': 5.757983102514638, 'library_name': 'SI-GA-D1_2.library',
179+
'tag_seq': 'GCTGAATT', 'pct_tag_err': 3.720388713046226, 'read_num': 2, 'cycles': 151,
180+
'mean_q': 36.748773746773196, 'pct_q30': 84.63309285113054, 'pf_clusters': 2221999.0},
181+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
182+
'lane_num': 1, 'pct_lane': 5.187123651369359, 'library_name': 'SI-GA-D1_3.library',
183+
'tag_seq': 'TGAAGTAC', 'pct_tag_err': 3.5747525234737383, 'read_num': 1, 'cycles': 151,
184+
'mean_q': 39.18503968413285, 'pct_q30': 92.50719688617771, 'pf_clusters': 2001705.0},
185+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
186+
'lane_num': 1, 'pct_lane': 5.187123651369359, 'library_name': 'SI-GA-D1_3.library',
187+
'tag_seq': 'TGAAGTAC', 'pct_tag_err': 3.5747525234737383, 'read_num': 2, 'cycles': 151,
188+
'mean_q': 36.757323706043906, 'pct_q30': 84.6543801541636, 'pf_clusters': 2001705.0},
189+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
190+
'lane_num': 1, 'pct_lane': 6.201171788958993, 'library_name': 'SI-GA-D1_4.library',
191+
'tag_seq': 'ATGCTCCG', 'pct_tag_err': 3.622402607578274, 'read_num': 1, 'cycles': 151,
192+
'mean_q': 39.17663714585525, 'pct_q30': 92.50440798869728, 'pf_clusters': 2393025.0},
193+
{'flowcell_id': 'foo', 'project_id': 'Project1', 'sample_name': 'A',
194+
'lane_num': 1, 'pct_lane': 6.201171788958993, 'library_name': 'SI-GA-D1_4.library',
195+
'tag_seq': 'ATGCTCCG', 'pct_tag_err': 3.622402607578274, 'read_num': 2, 'cycles': 151,
196+
'mean_q': 36.580552589683414, 'pct_q30': 84.11983530225224, 'pf_clusters': 2393025.0}]
130197

131198
expected_sample_a = list(map(lambda x: SampleResult(**x), list_of_values_for_a))
132199
self.assertListEqual(expected_sample_a, actual_sample_a)

0 commit comments

Comments
 (0)