nf-core · famosab · May 26, 2025 · May 26, 2025 · May 26, 2025 · May 26, 2025
@@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [1841](https://github.com/nf-core/sarek/pull/1841) - Add pcr-indel-model parameter for GATK HaplotypeCaller
 - [1848](https://github.com/nf-core/sarek/pull/1848) - Add parameter for setting pixel distance for GATK MarkDuplicates
 - [1856](https://github.com/nf-core/sarek/pull/1856) - Added early failure when more than 1 normal sample per patient is provided for somatic variant calling
+- [1902](https://github.com/nf-core/sarek/pull/1902) - Add checks for uniqueness of sample ids
 
 ### Changed
 

@@ -55,6 +55,29 @@ workflow  SAMPLESHEET_TO_CHANNEL{
             }
         }
 
+    ch_from_samplesheet
+        .map { meta, _fastq_1, _fastq_2, _spring_1, _spring_2, _table, _cram, _crai, _bam, _bai, _vcf, _variantcaller ->
+            // Get only the patient, sample and status fields from the meta map
+            [meta.patient, meta.subMap('sample', 'status')]
+        }
+        .unique()
+        .groupTuple()
+        .map { patient, samples ->
+            // Return the patient and the list of sample ids
+            [patient, samples.collect { it.sample }]
+        }
+        // Flatten to [sample_id, patient] pairs
+        .flatMap { patient, sample_ids -> sample_ids.collect { sample_id -> [sample_id, patient] } }
+        // Group by sample_id to collect all patient ids per sample
+        .groupTuple()
+        .map { sample_id, patient_ids ->
+            def unique_patients = patient_ids.unique()
+            if (unique_patients.size() > 1) {
+                System.err.println("Sample ID '${sample_id}' is associated with multiple patient IDs: ${unique_patients.join(', ')}. Please ensure each sample ID is unique to a single patient.")
+                error("Execution halted due to sample status inconsistency.")
+            }
+        }
+
     input_sample = ch_from_samplesheet.map{ meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller ->
         // generate patient_sample key to group lanes together
         [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, vcf, variantcaller] ]

@@ -0,0 +1,5 @@
+patient,sex,status,sample,lane,fastq_1,fastq_2
+test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz
+test,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz
+test2,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz
+test2,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz
diff --git a/tests/postprocess_concatenation.nf.test b/tests/postprocess_concatenation.nf.test
@@ -48,7 +48,7 @@ nextflow_pipeline {
                     // All cram files
                     cram_files.isEmpty() ? 'No CRAM files' : cram_files.collect { file -> file.getName() + ":md5," +  cram(file.toString(), fasta).readsMD5 },
                     // All vcf files
-                    vcf_files.isEmpty() ? 'No VCF files' : vcf_files.collect { file -> file.getName() + ":md5," + path(file.toString()).vcf.variantsMD5 }
+                    vcf_files.isEmpty() ? 'No VCF files' : vcf_files.collect { file -> file.getName() }
                 ).match() }
             )
         }

diff --git a/tests/postprocess_concatenation.nf.test.snap b/tests/postprocess_concatenation.nf.test.snap
@@ -212,14 +212,14 @@
             "No BAM files",
             "No CRAM files",
             [
-                "testN.germline.vcf.gz:md5,141a2cc53b0a9d4a0ab4d779cb1e487",
-                "testT.germline.vcf.gz:md5,7bac441a7c84790f43d26a73e10be9b5",
-                "testN.freebayes.vcf.gz:md5,1d98e39fe458af9020283de18c764055",
-                "testT.freebayes.vcf.gz:md5,387b7d48d04ad3b294f07173e6550fc7",
-                "testN.haplotypecaller.filtered.vcf.gz:md5,df2040bf1bee0252581824d11d5d87d1",
-                "testN.haplotypecaller.vcf.gz:md5,9d9d103327d59d17e778b663b56136fb",
-                "testT.haplotypecaller.filtered.vcf.gz:md5,d1e2d43d91858677d2b35fc389ee81b1",
-                "testT.haplotypecaller.vcf.gz:md5,5a2f49f0a41e890f9564a0baa11dc1c6"
+                "testN.germline.vcf.gz",
+                "testT.germline.vcf.gz",
+                "testN.freebayes.vcf.gz",
+                "testT.freebayes.vcf.gz",
+                "testN.haplotypecaller.filtered.vcf.gz",
+                "testN.haplotypecaller.vcf.gz",
+                "testT.haplotypecaller.filtered.vcf.gz",
+                "testT.haplotypecaller.vcf.gz"
             ]
         ],
         "meta": {

diff --git a/tests/samplesheets.nf.test b/tests/samplesheets.nf.test
@@ -18,7 +18,27 @@ nextflow_pipeline {
             assert workflow.failed
             assertAll(
                 { assert snapshot(
-                    workflow.stderr.toString().replace("[", "").replace("]", "").split(",")[0..1,3..5]
+                    workflow.stderr.toString().contains("Entry 2: Error for field 'sample' (test 2): \"test 2\" does not match regular expression ")
+                ).match() }
+            )
+        }
+    }
+
+    test("-profile test --input tests/csv/3.0/multiple_sample_ids.csv") {
+
+        when {
+            params {
+                modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
+                input = "${projectDir}/tests/csv/3.0/multiple_sample_ids.csv"
+                outdir = "$outputDir"
+            }
+        }
+
+        then {
+            assert workflow.failed
+            assertAll(
+                { assert snapshot(
+                    workflow.stderr.toString().contains("Sample ID 'test' is associated with multiple patient IDs: test, test2. Please ensure each sample ID is unique to a single patient.")
                 ).match() }
             )
         }
@@ -39,7 +59,7 @@ nextflow_pipeline {
             assert workflow.failed
             assertAll(
                 { assert snapshot(
-                    workflow.stderr.toString().replace("[", "").replace("]", "").split(",")[0]
+                    workflow.stderr.toString().contains("Patient [test] has more than one sample [2] with normal status [0] and one sample with tumor status [1].")
                 ).match() }
             )
         }

diff --git a/tests/samplesheets.nf.test.snap b/tests/samplesheets.nf.test.snap
@@ -1,28 +1,32 @@
 {
     "-profile test --step variant_calling --input tests/csv/3.0/recalibrated_somatic_two_normal_one_sample.csv": {
         "content": [
-            "Patient test has more than one sample 2 with normal status 0 and one sample with tumor status 1."
+            true
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "25.04.2"
+            "nextflow": "24.10.6"
         },
-        "timestamp": "2025-05-19T09:45:25.306281592"
+        "timestamp": "2025-05-26T14:50:40.890207286"
     },
     "-profile test --input tests/csv/3.0/sample_with_space.csv": {
         "content": [
-            [
-                "\u001b0;31mThe following invalid input values have been detected:",
-                " ",
-                " \t-> Entry 2: Error for field 'sample' (test 2): \"test 2\" does not match regular expression ^\\S+$ (Sample ID must be provided",
-                " cannot contain spaces and must be a string value)",
-                " \u001b0m"
-            ]
+            true
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "25.04.2"
+            "nextflow": "24.10.6"
         },
-        "timestamp": "2025-05-19T09:44:50.436290454"
+        "timestamp": "2025-05-26T14:49:49.015894562"
+    },
+    "-profile test --input tests/csv/3.0/multiple_sample_ids.csv": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.6"
+        },
+        "timestamp": "2025-05-26T14:50:17.254926436"
     }
 }