33from projman_filler .models .db_models import SampleResult
44from projman_filler .sample_level_statistics import calculate_sample_statistics
55
6- from tests .test_utils import conversion_results , conversion_results_without_index_metrics , conversion_results_sample_with_no_reads
6+ from tests .test_utils import *
77
88
99class TestSampleLevelStatistics (unittest .TestCase ):
@@ -22,16 +22,16 @@ class TestSampleLevelStatistics(unittest.TestCase):
2222
2323 class SampleSheetMock (object ):
2424 def __init__ (self ):
25- self .project_dict = {"A " : "Project1" ,
26- "B " : "Project2" ,
27- "C " : "Project1" ,
28- "D " : "Project2" }
25+ self .project_dict = {"Sample_A " : "Project1" ,
26+ "Sample_B " : "Project2" ,
27+ "Sample_C " : "Project1" ,
28+ "Sample_D " : "Project2" }
2929
30- def project_for_sample (self , sample_name , lane ):
31- return self .project_dict [sample_name ]
30+ def project_for_sample (self , sample_id , lane ):
31+ return self .project_dict [sample_id ]
3232
33- def library_name_for_sample (self , sample_name , lane ):
34- return "{}.library" .format (sample_name )
33+ def library_name_for_sample (self , sample_id , lane ):
34+ return "{}.library" .format (sample_id )
3535
3636 samplesheet_mock = SampleSheetMock ()
3737
@@ -50,32 +50,32 @@ def test_calculate_sample_level_statistics(self):
5050 'tag_seq' : 'GTAGAGGA-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 1 , 'cycles' : 151 ,
5151 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
5252 'pct_q30' : 98.02429332249935 , 'pct_tag_err' : 0.671112157794024 ,
53- 'library_name' : 'A .library' , 'mean_q' : 38.84148990743496 },
53+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.84148990743496 },
5454 {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
5555 'tag_seq' : 'GTAGAGGA-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 2 , 'cycles' : 151 ,
5656 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
5757 'pct_q30' : 96.45192508767363 , 'pct_tag_err' : 0.671112157794024 ,
58- 'library_name' : 'A .library' , 'mean_q' : 38.373262536376345 },
58+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.373262536376345 },
5959 {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
6060 'tag_seq' : 'TAGGCATG-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 1 , 'cycles' : 151 ,
6161 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
6262 'pct_q30' : 98.02429332249935 , 'pct_tag_err' : 0.7880181078880083 ,
63- 'library_name' : 'A .library' , 'mean_q' : 38.84148990743496 },
63+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.84148990743496 },
6464 {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
6565 'tag_seq' : 'TAGGCATG-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 2 , 'cycles' : 151 ,
6666 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
6767 'pct_q30' : 96.45192508767363 , 'pct_tag_err' : 0.7880181078880083 ,
68- 'library_name' : 'A .library' , 'mean_q' : 38.373262536376345 },
68+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.373262536376345 },
6969 {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
7070 'tag_seq' : 'TCCTGAGC-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 1 , 'cycles' : 151 ,
7171 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
7272 'pct_q30' : 98.02429332249935 , 'pct_tag_err' : 0.7687463809335591 ,
73- 'library_name' : 'A .library' , 'mean_q' : 38.84148990743496 },
73+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.84148990743496 },
7474 {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
7575 'tag_seq' : 'TCCTGAGC-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 2 , 'cycles' : 151 ,
7676 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
7777 'pct_q30' : 96.45192508767363 , 'pct_tag_err' : 0.7687463809335591 ,
78- 'library_name' : 'A .library' , 'mean_q' : 38.373262536376345 }]
78+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.373262536376345 }]
7979
8080 expected_sample_a = list (map (lambda x : SampleResult (** x ), list_of_values_for_a ))
8181 self .assertListEqual (expected_sample_a , actual_sample_a )
@@ -95,12 +95,12 @@ def test_calculate_sample_level_statistics_without_index_metrics(self):
9595 'tag_seq' : 'unknown' , 'lane_num' : 1 , 'read_num' : 1 , 'cycles' : 151 ,
9696 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
9797 'pct_q30' : 98.02429332249935 , 'pct_tag_err' : None ,
98- 'library_name' : 'A .library' , 'mean_q' : 38.84148990743496 },
98+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.84148990743496 },
9999 {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
100100 'tag_seq' : 'unknown' , 'lane_num' : 1 , 'read_num' : 2 , 'cycles' : 151 ,
101101 'pct_lane' : 49.91040361971908 , 'pf_clusters' : 81217423.0 ,
102102 'pct_q30' : 96.45192508767363 , 'pct_tag_err' : None ,
103- 'library_name' : 'A .library' , 'mean_q' : 38.373262536376345 }]
103+ 'library_name' : 'Sample_A .library' , 'mean_q' : 38.373262536376345 }]
104104
105105 expected_sample_a = list (map (lambda x : SampleResult (** x ), list_of_values_for_a ))
106106 self .assertListEqual (expected_sample_a , actual_sample_a )
@@ -121,12 +121,79 @@ def test_calculate_sample_level_statistics_sample_with_no_reads(self):
121121 'tag_seq' : 'GTAGAGGA-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 1 , 'cycles' : 151 ,
122122 'pct_lane' : 0 , 'pf_clusters' : 0 ,
123123 'pct_q30' : None , 'pct_tag_err' : None ,
124- 'library_name' : 'A .library' , 'mean_q' : None },
124+ 'library_name' : 'Sample_A .library' , 'mean_q' : None },
125125 {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
126126 'tag_seq' : 'GTAGAGGA-CTCTCTAT' , 'lane_num' : 1 , 'read_num' : 2 , 'cycles' : 151 ,
127127 'pct_lane' : 0 , 'pf_clusters' : 0 ,
128128 'pct_q30' : None , 'pct_tag_err' : None ,
129- 'library_name' : 'A.library' , 'mean_q' : None }]
129+ 'library_name' : 'Sample_A.library' , 'mean_q' : None }]
130+
131+ expected_sample_a = list (map (lambda x : SampleResult (** x ), list_of_values_for_a ))
132+ self .assertListEqual (expected_sample_a , actual_sample_a )
133+
134+ def test_calculate_sample_level_statistics_samples_with_multiple_sample_ids (self ):
135+ #For this test we need to update the list of Sample IDs
136+ self .samplesheet_mock_multiple_sampleIDs = self .SampleSheetMock ()
137+ self .samplesheet_mock_multiple_sampleIDs .project_dict = {"SI-GA-D1_1" : "Project1" ,
138+ "SI-GA-D1_2" : "Project1" ,
139+ "SI-GA-D1_3" : "Project1" ,
140+ "SI-GA-D1_4" : "Project1" ,
141+ "SI-GA-F2_1" : "Project2" ,
142+ "SI-GA-F2_2" : "Project2" ,
143+ "SI-GA-F2_3" : "Project2" ,
144+ "SI-GA-F2_4" : "Project2" ,
145+ "SI-GA-E1_1" : "Project3" ,
146+ "SI-GA-E1_2" : "Project3" ,
147+ "SI-GA-E1_3" : "Project3" ,
148+ "SI-GA-E1_4" : "Project3" ,
149+ "SI-GA-F1_1" : "Project4" ,
150+ "SI-GA-F1_2" : "Project4" ,
151+ "SI-GA-F1_3" : "Project4" ,
152+ "SI-GA-F1_4" : "Project4" }
153+
154+ actual = list (calculate_sample_statistics (flowcell_name = self .flowcell_id ,
155+ conversion_results = conversion_results_multiple_sampleIDs_per_sampleName ,
156+ reads_and_cycles = self .reads_and_cycles ,
157+ samplesheet = self .samplesheet_mock_multiple_sampleIDs ))
158+
159+ # One row per sample, lane, index and read
160+ self .assertEqual (len (actual ), 4 * 1 * 4 * 2 )
161+
162+ actual_sample_a = list (filter (lambda x : x .sample_name == 'A' , actual ))
163+
164+ list_of_values_for_a = [
165+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
166+ 'lane_num' : 1 , 'pct_lane' : 6.115905919800121 , 'library_name' : 'SI-GA-D1_1.library' ,
167+ 'tag_seq' : 'CACTCGGA' , 'pct_tag_err' : 3.621424494761074 , 'read_num' : 1 , 'cycles' : 151 ,
168+ 'mean_q' : 39.16345840568938 , 'pct_q30' : 92.45344394187265 , 'pf_clusters' : 2360121.0 },
169+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
170+ 'lane_num' : 1 , 'pct_lane' : 6.115905919800121 , 'library_name' : 'SI-GA-D1_1.library' ,
171+ 'tag_seq' : 'CACTCGGA' , 'pct_tag_err' : 3.621424494761074 , 'read_num' : 2 , 'cycles' : 151 ,
172+ 'mean_q' : 36.559150347300495 , 'pct_q30' : 84.04189940076341 , 'pf_clusters' : 2360121.0 },
173+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
174+ 'lane_num' : 1 , 'pct_lane' : 5.757983102514638 , 'library_name' : 'SI-GA-D1_2.library' ,
175+ 'tag_seq' : 'GCTGAATT' , 'pct_tag_err' : 3.720388713046226 , 'read_num' : 1 , 'cycles' : 151 ,
176+ 'mean_q' : 39.23153569054157 , 'pct_q30' : 92.65040530937226 , 'pf_clusters' : 2221999.0 },
177+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
178+ 'lane_num' : 1 , 'pct_lane' : 5.757983102514638 , 'library_name' : 'SI-GA-D1_2.library' ,
179+ 'tag_seq' : 'GCTGAATT' , 'pct_tag_err' : 3.720388713046226 , 'read_num' : 2 , 'cycles' : 151 ,
180+ 'mean_q' : 36.748773746773196 , 'pct_q30' : 84.63309285113054 , 'pf_clusters' : 2221999.0 },
181+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
182+ 'lane_num' : 1 , 'pct_lane' : 5.187123651369359 , 'library_name' : 'SI-GA-D1_3.library' ,
183+ 'tag_seq' : 'TGAAGTAC' , 'pct_tag_err' : 3.5747525234737383 , 'read_num' : 1 , 'cycles' : 151 ,
184+ 'mean_q' : 39.18503968413285 , 'pct_q30' : 92.50719688617771 , 'pf_clusters' : 2001705.0 },
185+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
186+ 'lane_num' : 1 , 'pct_lane' : 5.187123651369359 , 'library_name' : 'SI-GA-D1_3.library' ,
187+ 'tag_seq' : 'TGAAGTAC' , 'pct_tag_err' : 3.5747525234737383 , 'read_num' : 2 , 'cycles' : 151 ,
188+ 'mean_q' : 36.757323706043906 , 'pct_q30' : 84.6543801541636 , 'pf_clusters' : 2001705.0 },
189+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
190+ 'lane_num' : 1 , 'pct_lane' : 6.201171788958993 , 'library_name' : 'SI-GA-D1_4.library' ,
191+ 'tag_seq' : 'ATGCTCCG' , 'pct_tag_err' : 3.622402607578274 , 'read_num' : 1 , 'cycles' : 151 ,
192+ 'mean_q' : 39.17663714585525 , 'pct_q30' : 92.50440798869728 , 'pf_clusters' : 2393025.0 },
193+ {'flowcell_id' : 'foo' , 'project_id' : 'Project1' , 'sample_name' : 'A' ,
194+ 'lane_num' : 1 , 'pct_lane' : 6.201171788958993 , 'library_name' : 'SI-GA-D1_4.library' ,
195+ 'tag_seq' : 'ATGCTCCG' , 'pct_tag_err' : 3.622402607578274 , 'read_num' : 2 , 'cycles' : 151 ,
196+ 'mean_q' : 36.580552589683414 , 'pct_q30' : 84.11983530225224 , 'pf_clusters' : 2393025.0 }]
130197
131198 expected_sample_a = list (map (lambda x : SampleResult (** x ), list_of_values_for_a ))
132199 self .assertListEqual (expected_sample_a , actual_sample_a )
0 commit comments