Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions lib/ReportFunctions.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@

class ReportFunctions {

enum FuncType {

GENERIC,
CONTIG,
READQUALITY,
AUTOFAIL,
COVERAGE

}

static def select_qc_func(java.util.LinkedHashMap qual_data, java.lang.String metric, java.util.ArrayList qc_message, java.util.LinkedHashMap meta_info, java.lang.String func) {
def check_failed = 0
def reisolate = 0
def resequence = 0
def check_ignored = 0
def failed_p = false
def checks = 0
def function = func as FuncType

switch (function) {
case FuncType.GENERIC:
(checks,
reisolate,
resequence,
failed_p,
check_failed,
check_ignored) = ReportFunctions.generic_qc_func(qual_data, metric, qc_message)
break
case FuncType.AUTOFAIL:
(checks,
reisolate,
resequence,
failed_p,
check_failed,
check_ignored) = ReportFunctions.autofail_reisolate(qual_data, metric, qc_message)
break
case FuncType.READQUALITY:
if (!meta_info.assembly) {
(checks,
reisolate,
resequence,
failed_p,
check_failed,
check_ignored) = ReportFunctions.generic_qc_func(qual_data, metric, qc_message)
}
break
case FuncType.COVERAGE:
if (!meta_info.assembly) {
(checks,
reisolate,
resequence,
failed_p,
check_failed,
check_ignored) = ReportFunctions.generic_qc_func(qual_data, metric, qc_message)
if (!failed_p && meta_info.downsampled) {
qc_message.add('The sample may have been downsampled too aggressively, if this is the cause please re-run sample with a different target depth.')
}
}
break
case FuncType.CONTIG:
(checks,
reisolate,
resequence,
failed_p,
check_failed,
check_ignored) = ReportFunctions.contig_qc_func(qual_data, metric, qc_message)
break
default:
throw NoSuchMethodExeption("No function for $func exists.")
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a default case here that raises an exception about an invalid FuncType (though I suspect you would likely also get an exception raised when casting the string to the enum FuncType anyways).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed in: e67cbdd


return [checks, reisolate, resequence, failed_p, check_failed, check_ignored]
}

static def contig_qc_func(java.util.LinkedHashMap qual_data, java.lang.String metric, java.util.ArrayList qc_message) {
def checks_failed = 0
def reisolate = 0
def resequence = 0
def checks_ignored = 0
def failed_p = false
def checks = 0

if (qual_data && qual_data.containsKey(metric) && !qual_data[metric].status) {
checks_failed = 1
failed_p = true
}else if (qual_data && (!qual_data.containsKey(metric) || !qual_data[metric].status)) {
checks_ignored = 1
}else if (qual_data == null) {
checks_ignored = 1
}
checks += 1
return [checks, reisolate, resequence, failed_p, checks_failed, checks_ignored]
}

static def generic_qc_func(java.util.LinkedHashMap qual_data, java.lang.String metric, java.util.ArrayList qc_message) {
def reisolate = 0
def resequence = 0
def failed_p = false
def checks_failed = 0
def checks_ignored = 0
def checks = 0
if (qual_data && qual_data.containsKey(metric) && !qual_data[metric].status) {
reisolate = 1
resequence = 1
failed_p = true
checks_failed = 1
}else if (qual_data && (!qual_data.containsKey(metric) || !qual_data[metric].status)) {
checks_ignored = 1
}else if (qual_data == null) {
checks_ignored = 1
}
checks += 1
return [checks, reisolate, resequence, failed_p, checks_failed, checks_ignored]
}

static def autofail_reisolate(java.util.LinkedHashMap qual_data, java.lang.String metric, java.util.ArrayList qc_message) {
def reisolate = 0
def resequence = 0
def failed_p = false
def checks_failed = 0
def checks_ignored = 0
def checks = 0
if (qual_data && qual_data.containsKey(metric) && !qual_data[metric].status) {
reisolate = 1
resequence = 1
failed_p = true
checks_failed = 1
}else if (qual_data && (!qual_data.containsKey(metric) || !qual_data[metric].status)) {
checks_ignored = 1
}else if (qual_data == null) {
checks_ignored = 1
}
checks += 1
return [checks, reisolate, resequence, failed_p, checks_failed, checks_ignored]
}

}
92 changes: 15 additions & 77 deletions modules/local/report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import groovy.json.JsonSlurper
import groovy.json.JsonBuilder
import java.nio.file.Paths


process REPORT{
tag "Report Generation"
label "process_single"
Expand Down Expand Up @@ -152,7 +151,7 @@ def n50_nrcontigs_decision(qual_data, nr_cont_p, n50_p, qual_message, reisolate,
*/

if(nr_cont_p && n50_p){
// both fialed :(
// both failed :(
if(qual_data && qual_data.containsKey("nr_contigs") && qual_data.nr_contigs.low){
if(qual_data.n50_value.low){

Expand Down Expand Up @@ -267,94 +266,33 @@ def create_action_call(sample_data, species_tag){
}

def qual_message = []
def failed_p = false
//def failed_p = false
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is now unused could you remove it.

def checks_failed = 0
def checks = 0
def checks_ignored = 0
def n50_failed = false
def nr_contigs_failed = false


for(qc_report_field in params.QCReportFields){

// ! TODO Summing of ignored checks is messy and the logic can likely be cleaned up
if(qual_data && qual_data.containsKey("checkm_contamination") && !qual_data.checkm_contamination.status){
reisolate = reisolate + contamination_fail
resequence += 1
failed_p = true
checks_failed += 1
}else if (qual_data && (!qual_data.containsKey("checkm_contamination") || !qual_data.checkm_contamination.status)){
checks_ignored += 1
}else if(qual_data == null){
checks_ignored += 1
}
checks += 1

if(!meta_data.assembly){
// We should have reads as we assembled it
if(qual_data && qual_data.containsKey("raw_average_quality") && !qual_data.raw_average_quality.status){
resequence += 1
checks_failed += 1
}else if (qual_data && (!qual_data.containsKey("raw_average_quality") || !qual_data.raw_average_quality.status)){
checks_ignored += 1
}else if(qual_data == null){
checks_ignored += 1
}
checks += 1

if(qual_data && qual_data.containsKey("average_coverage") && !qual_data.average_coverage.status){

if(meta_data.downsampled){
qual_message.add("The sample may have been downsampled too aggressively, if this is the cause please re-run sample with a different target depth.")
if(qc_report_field.value.on){
// Need to figure out how to handle the requirement of a category requiring reads...
// number is too hight as not excluding read checks
def (checked, rei, res, fail_p, chck_f, chck_i) = ReportFunctions.select_qc_func(qual_data, qc_report_field.key, qual_message, meta_data, qc_report_field.value.qc_func)
//reisolate = rei + contamination_fail
checks_failed += chck_f
resequence += res
failed_p = fail_p
if(failed_p && qc_report_field.value.qc_func as ReportFunctions.FuncType == ReportFunctions.FuncType.AUTOFAIL){
reisolate = rei + contamination_fail
}
checks_failed += 1
resequence += 1
}else if(qual_data && (!qual_data.containsKey("average_coverage") || !qual_data.average_coverage.status)){
checks_ignored += 1
}else if(qual_data == null){
checks_ignored += 1
checks_ignored += chck_i
checks += checked
}
checks += 1
}

if(qual_data && qual_data.containsKey("length") && !qual_data.length.status){
if(qual_data.length.low){
resequence += 1
checks_failed += 1
}else{
resequence += 1
reisolate = reisolate + contamination_fail
checks_failed += 1
}
}else if (qual_data && (!qual_data.containsKey("length") || !qual_data.length.status)){
checks_ignored += 1
}else if(qual_data == null){
checks_ignored += 1
}
checks += 1

if(qual_data && qual_data.containsKey("nr_contigs") && !qual_data.nr_contigs.status){
checks_failed += 1
nr_contigs_failed = true
}else if (qual_data && (!qual_data.containsKey("nr_contigs") || !qual_data.nr_contigs.status)){
checks_ignored += 1
}else if(qual_data == null){
checks_ignored += 1
}
checks += 1

if(qual_data && qual_data.containsKey("n50_value") && !qual_data.n50_value.status){
checks_failed += 1
n50_failed = true
}else if (qual_data && (!qual_data.containsKey("n50_value") || !qual_data.n50_value.status)){
checks_ignored += 1
}else if(qual_data == null){
checks_ignored += 1
}
checks += 1


(reisolate, resequence) = n50_nrcontigs_decision(qual_data, nr_contigs_failed, n50_failed, qual_message, reisolate, resequence)
//qual_message.add("Quality Conclusion")

add_secondary_message(params.assembly_status.report_tag,
"Assembly failed, this may be an issue with your data or the pipeline. Please check the log or the outputs in the samples work directory.",
Expand Down
8 changes: 8 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -1083,6 +1083,7 @@ params {
SecondaryTypeIDMethod = null
}
}


QCReportFields {
// Configure paths if final report where relevant data is required
Expand All @@ -1093,6 +1094,7 @@ params {
comp_type = "ge"
on = true
low_msg = "Base quality is poor, resequencing is recommended."
qc_func = "READQUALITY"
}
average_coverage {
/*
Expand All @@ -1109,20 +1111,23 @@ params {
comp_type = 'ge'
on = true
low_msg = "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed."
qc_func = "COVERAGE"
}
metagenomic {
path = [params.mash_meta.report_tag]
coerce_type = 'Bool'
compare_fields = []
comp_type = "bool"
on = false
qc_func = "GENERIC"
}
n50_value {
path = [params.quast.report_tag, "0", "N50"]
coerce_type = 'Integer'
compare_fields = ['min_n50', 'max_n50']
comp_type = "range"
on = true
qc_func = "CONTIG"
low_msg = "N50 value is low, this could be due to many reasons involving contamination, poor template quality or insufficient template quantity. Reisolation and reseqeuncing may be needed."
high_msg = "N50 value is high, this is likely a good thing if you have fewer contigs than expected."
}
Expand All @@ -1132,6 +1137,7 @@ params {
compare_fields = ['min_nr_contigs', 'max_nr_contigs']
comp_type = "range"
on = true
qc_func = "CONTIG"
low_msg = "Fewer contigs than expected, if your genome length is of an expected size and you have a high N50 you likely just have a high quality assembly."
high_msg = "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample."
}
Expand All @@ -1141,6 +1147,7 @@ params {
compare_fields = ['min_length', 'max_length']
comp_type = "range"
on = true
qc_func = "GENERIC"
low_msg = "Genome length lower than expected, you may need to resequence the sample."
high_msg = "Genome length is higher than expected, contmination is potentially present."
}
Expand All @@ -1150,6 +1157,7 @@ params {
compare_fields = ['max_checkm_contamination']
comp_type = "le"
on = true
qc_func = "AUTOFAIL"
high_msg = "Potential contamination is present in your sample. You may need to reisolate and resequence your sample."
}
}
Expand Down
2 changes: 2 additions & 0 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,7 @@ nextflow_pipeline {
assert sample.n50_value == 214772
assert sample.assembly_num_contigs == 1
assert sample.assembly_length == 214772
assert sample.qc_summary == "FAILED Species ID: No Species Identified; Passed Tests: 3/6; Organism QC Criteria: No organism specific QC data available."
}
}

Expand Down Expand Up @@ -1089,6 +1090,7 @@ nextflow_pipeline {
assert sample.n50_value == 214466
assert sample.assembly_num_contigs == 1
assert sample.assembly_length == 214466
assert sample.qc_summary == "FAILED Species ID: No Species Identified; Passed Tests: 3/6; Organism QC Criteria: No organism specific QC data available."



Expand Down
1 change: 1 addition & 0 deletions tests/pipelines/main.from_assemblies.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,7 @@ nextflow_pipeline {
assert vibrio_metadata."predicted_primary_type_method" == ""
assert vibrio_metadata."predicted_secondary_type_name" == ""
assert vibrio_metadata."predicted_secondary_type_method" == ""
assert vibrio_metadata.qc_summary == "FAILED Species ID: Vibrio cholerae; Passed Tests: 1/4; Organism QC Criteria: Vibrio cholerae"
}
}

Expand Down