broadinstitute · bshifaw · Apr 27, 2023 · May 3, 2023 · May 4, 2023 · May 4, 2023
diff --git a/.github/workflows/carrot_push.yml b/.github/workflows/carrot_push.yml
@@ -0,0 +1,36 @@
+# Runs workflow tests from the branch when commits have been pushed to a PR
+# the workflows to be tested are specified by the "test_names"
+# parameter as string seperated by space.
+
+name: carrot-test-on-push
+on: [push]
+jobs:
+    publish-test:
+        runs-on: ubuntu-latest
+        steps:
+
+        # https://github.com/google-github-actions/setup-gcloud#service-account-key-json
+        - id: auth
+          uses: google-github-actions/auth@v0
+          with:
+            credentials_json: ${{ secrets.CARROT_SA_KEY }}
+
+        - name: Set up Cloud SDK
+          uses: google-github-actions/setup-gcloud@v0
+
+        # https://cloud.google.com/pubsub/docs/publisher#publish_messages
+        - name: Use gcloud CLI
+          run: >
+            gcloud pubsub topics publish ${{ secrets.CARROT_TOPIC_NAME }}
+            --message='{"source":"github",
+            "author":"${{ github.triggering_actor }}",
+            "owner":"${{ github.repository_owner }}",
+            "wdl_tests_dir":"wdl_test",
+            "repo_url":"${{ github.repositoryUrl }}",
+            "branch_name":"${{ github.ref_name }}",
+            "commit":"${{ github.sha }}",
+            "repo":"${{ github.repository }}",
+            "test_names": "PBCCSWholeGenome",
+            "issue_number":"",
+            "software_name":""
+            }'
diff --git a/.github/workflows/carrot_weekly.yml b/.github/workflows/carrot_weekly.yml
@@ -0,0 +1,36 @@
+# Runs all workflow tests every Sunday
+
+name: carrot-test-weekly
+on:
+  schedule:
+    - cron: '0 7 * * 0'  # Run every Sunday at 7am
+jobs:
+    publish-test:
+        runs-on: ubuntu-latest
+        steps:
+
+        # https://github.com/google-github-actions/setup-gcloud#service-account-key-json
+        - id: auth
+          uses: google-github-actions/auth@v0
+          with:
+            credentials_json: ${{ secrets.CARROT_SA_KEY }}
+
+        - name: Set up Cloud SDK
+          uses: google-github-actions/setup-gcloud@v0
+
+        # https://cloud.google.com/pubsub/docs/publisher#publish_messages
+        - name: Use gcloud CLI
+          run: >
+            gcloud pubsub topics publish ${{ secrets.CARROT_TOPIC_NAME }}
+            --message='{"source":"github",
+            "author":"${{ github.triggering_actor }}",
+            "owner":"${{ github.repository_owner }}",
+            "wdl_tests_dir":"wdl_test",
+            "repo_url":"${{ github.repositoryUrl }}",
+            "branch_name":"${{ github.ref_name }}",
+            "commit":"${{ github.sha }}",
+            "repo":"${{ github.repository }}",
+            "test_names": "",
+            "issue_number":"",
+            "software_name":""
+            }'
diff --git a/wdl_test/FileMetadata.wdl b/wdl_test/FileMetadata.wdl
@@ -0,0 +1,53 @@
+version 1.0
+
+task CheckFileUpdatedDateGCP {
+
+    meta {
+        description: "Checks if file was updated within a specified time (default: 1 day)"
+        note: "Specific to GCP"
+    }
+
+
+    input {
+        Array[String] file_paths
+        Int days_back = 1
+        String image_to_use
+    }
+
+    #FILE_DATE description: get file info | grep the 'Update Time' row | parse the date info | reformat date info
+
+    command <<<
+        set -eu pipefail
+
+        GS_BUCKET_PATHS=("~{sep='" "' file_paths}")
+        MINIMUM_DATE=`date -d '~{days_back} day ago' +%Y-%m-%d`
+        return_code=0
+        echo -e "FileName\tFileUpdatedDate\tMinimumUpdateDate\tValid"
+
+        for GS_FILE in ${GS_BUCKET_PATHS[@]};
+        do
+          FILE_DATE=`gsutil  stat ${GS_FILE} | grep "Update time" | awk -F '[,][ .]+' '{print $2}' | awk '{print "date -d\""$1FS$2FS$3"\" +%Y-%m-%d"}'| bash`
+
+          if [[ $FILE_DATE >= $MINIMUM_DATE ]] ; then
+            echo -e "$GS_FILE\t$FILE_DATE\t$MINIMUM_DATE\tTrue"
+          else
+            echo "ERROR: Date for $GS_FILE" >&2
+            echo "$FILE_DATE is less than $MINIMUM_DATE" >&2
+
+            echo -e "$GS_FILE\t$FILE_DATE\t$MINIMUM_DATE\tFalse"
+            return_code=1
+          fi
+        done
+
+        if [ $return_code == 1 ]; then
+            exit 1
+        fi
+
+    >>>
+    runtime {
+        docker: image_to_use
+    }
+    output {
+        File file_date_result = stdout()
+    }
+}
diff --git a/wdl_test/PBCCSWholeGenome/basic_output_valdation/NA24385_downsampled/eval_input.json b/wdl_test/PBCCSWholeGenome/basic_output_valdation/NA24385_downsampled/eval_input.json
@@ -0,0 +1 @@
+{}
diff --git a/wdl_test/PBCCSWholeGenome/basic_output_valdation/NA24385_downsampled/test_input.json b/wdl_test/PBCCSWholeGenome/basic_output_valdation/NA24385_downsampled/test_input.json
@@ -0,0 +1,21 @@
+{
+  "PBCCSWholeGenome.aligned_bais": [
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64020e_220303_2002560.01.downsample.bai",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64218e_220328_1613170.01.downsample.bai",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64218e_220330_0132120.01.downsample.bai",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220215_1930240.01.downsample.bai",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220218_1550340.01.downsample.bai",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220220_0052040.01.downsample.bai",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220303_1959350.01.downsample.bai"
+  ],
+
+  "PBCCSWholeGenome.aligned_bams": [
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64020e_220303_2002560.01.downsample.bam",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64218e_220328_1613170.01.downsample.bam",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64218e_220330_0132120.01.downsample.bam",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220215_1930240.01.downsample.bam",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220218_1550340.01.downsample.bam",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220220_0052040.01.downsample.bam",
+    "gs://broad-dsp-lrma-pipeline-test-data/NA24385/downsampled_bam_01/m64297e_220303_1959350.01.downsample.bam"
+  ]
+}
diff --git a/wdl_test/PBCCSWholeGenome/basic_output_valdation/eval.wdl b/wdl_test/PBCCSWholeGenome/basic_output_valdation/eval.wdl
@@ -0,0 +1,229 @@
+version 1.0
+
+workflow eval_workflow {
+    input {
+
+        #Floats
+
+        Float aligned_read_length_N50
+        Float aligned_num_reads
+        Float aligned_frac_bases
+        Float aligned_num_bases
+        Float aligned_read_length_stdev
+        Float average_identity
+        Float aligned_est_fold_cov
+        Float aligned_read_length_mean
+        Float median_identity
+        Float aligned_read_length_median
+
+        #Files
+
+        String pbsv_tbi
+        String sniffles_vcf
+        String clair_gtbi
+        String dvp_tbi
+        String dvp_g_tbi
+        String dvp_vcf
+        String clair_vcf
+        String pbsv_vcf
+        String aligned_pbi
+        String aligned_bai
+        String dvp_phased_vcf
+        #String bed_cov_summary # this ends up being 'null' so not including in array of files for now
+        String dvp_phased_tbi
+        String clair_tbi
+        String clair_gvcf
+        String aligned_bam
+        String sniffles_tbi
+        String dvp_g_vcf
+
+    }
+
+    Array[Float] workflow_out_floats = [
+        aligned_read_length_N50,
+        aligned_num_reads,
+        aligned_frac_bases,
+        aligned_num_bases,
+        aligned_read_length_stdev,
+        average_identity,
+        aligned_est_fold_cov,
+        aligned_read_length_mean,
+        median_identity,
+        aligned_read_length_median
+    ]
+    Array[String] workflow_out_files = [
+        pbsv_tbi,
+        sniffles_vcf,
+        clair_gtbi,
+        dvp_tbi,
+        dvp_g_tbi,
+        dvp_vcf,
+        clair_vcf,
+        pbsv_vcf,
+        aligned_pbi,
+        aligned_bai,
+        dvp_phased_vcf,
+        dvp_phased_tbi,
+        clair_tbi,
+        clair_gvcf,
+        aligned_bam,
+        sniffles_tbi,
+        dvp_g_vcf
+    ]
+
+    String ubuntu_image = "marketplace.gcr.io/google/ubuntu2004:latest"
+    String gcloud_slim_image = "gcr.io/google.com/cloudsdktool/cloud-sdk:slim"
+
+################
+    ## Compairing test and expected Floats
+    ## Json inputs like "eval_workflow.workflow_out_floats": [{"Left":1.0,"Right":1.0},{"Left":2.2,"Right":3.2}]
+    ## With Array[Pair[Float,Float]] workflow_out_floats
+
+#    Array[Boolean] scattered_float_match = []
+#    scatter (pair in workflow_out_floats){
+#        if (pair.left != pair.right) {
+#            Boolean scattered_float_match = false
+#            call CheckerWorkflowError{
+#                input:
+#                    message = "Expected Float "+pair.right+" but got "+pair.left,
+#                    image_to_use = ubuntu_image
+#            }
+#        }
+#    }
+################
+
+    ## Confirm float does not equal to zero test and expected Floats
+    Array[Boolean] scattered_float_match = []
+    scatter (in_float in workflow_out_floats){
+        if (in_float == 0.0) {
+            Boolean scattered_float_match = false
+            call CheckerWorkflowError{
+                input:
+                    message = "Error: Expected a non-zero float but got "+in_float+" .",
+                    image_to_use = ubuntu_image
+            }
+        }
+    }
+
+
+    call CheckFileUpdatedDateGCP {
+        input:
+        file_paths = workflow_out_files,
+        image_to_use = gcloud_slim_image
+    }
+}
+
+task CheckFileUpdatedDateGCP {
+
+    meta {
+        description: "Checks if file was updated within a specified time (default: 1 day)"
+        note: "Specific to GCP"
+    }
+
+
+    input {
+        Array[String] file_paths
+        Int days_back = 1
+        String image_to_use
+    }
+
+    #FILE_DATE description: get file info | grep the 'Update Time' row | parse the date info | reformat date info
+
+    command <<<
+        set -eu pipefail
+
+        GS_BUCKET_PATHS=("~{sep='" "' file_paths}")
+        EMPTY_MD5="d41d8cd98f00b204e9800998ecf8427e"
+        return_code=0
+        echo -e "FilePath\tFileMD5\tValid"
+
+        for GS_FILE in ${GS_BUCKET_PATHS[@]};
+        do
+          FILE_MD5=`gsutil hash -hm  ${GS_FILE} | grep "md5" | awk -F '[:][\t]+' '{print $2}'`
+
+          if [[ $FILE_MD5 != $EMPTY_MD5 ]] ; then
+            echo -e "$GS_FILE\t$FILE_MD5\tTrue"
+          else
+            echo "ERROR: MD5 for $GS_FILE equals md5sum of an empty file: $EMPTY_MD5" >&2
+
+            echo -e "$GS_FILE\t$FILE_MD5\tFalse"
+            return_code=1
+          fi
+        done
+
+        if [ $return_code == 1 ]; then
+            exit 1
+        fi
+
+    >>>
+    runtime {
+        docker: image_to_use
+    }
+    output {
+        File file_date_result = stdout()
+    }
+}
+
+task CheckerWorkflowError {
+
+    input {
+        String message
+        String image_to_use
+    }
+    command <<<
+        set -eu pipefail
+
+        echo ~{message}
+        exit 1
+
+    >>>
+    runtime {
+        docker: image_to_use
+    }
+    output {
+        Boolean errmessage = stdout()
+    }
+}
+
+task ValidFloatOutput {
+
+    input {
+        Array[Pair[Float,Float]] workflow_out_floats
+        String image_to_use
+    }
+    command <<<
+
+        echo ~{workflow_out_floats}
+
+    >>>
+    runtime {
+        docker: image_to_use
+    }
+    output {
+        Boolean comparison_result = read_boolean(stdout())
+    }
+}
+
+task ValidMd5SumOutput {
+    input {
+        File data_file
+        String expectedMd5sum
+    }
+    command <<<
+
+        md5sum helloworld.txt | sed "s/|/ /" | awk "{print $1, $8}" | read filemd5
+
+        if [$filemd5 == $expectedMd5sum]
+        then
+        echo "true"
+        else
+        echo "false"
+        fi
+    >>>
+    runtime {
+        docker: "quay.io/agduncan94/my-md5sum"
+    }
+    output {
+        File comparison_result = stdout()
+    }
+}