@@ -873,6 +873,12 @@ task BigQueryLoadRawVepAndLofteeAnnotations {
873873 File raw_data_table_schema
874874 }
875875
876+ parameter_meta {
877+ vep_loftee_raw_output : {
878+ localization_optional : true
879+ }
880+ }
881+
876882 command <<<
877883 # Prepend date, time and pwd to xtrace log entries.
878884 PS4 = '\D{+%F %T} \w $ '
@@ -891,26 +897,35 @@ task BigQueryLoadRawVepAndLofteeAnnotations {
891897 bq --apilog =false mk --expiration =$DATE --project_id = ~{project_id } ~{dataset_name }.~{raw_data_table } ~{raw_data_table_schema }
892898 fi
893899
894- for file in ~{sep =' ' vep_loftee_raw_output }
895- do
896- if [ -s $file ]
897- then
898- # Do a wee bit of processing of the raw output to create a load file for raw VEP + LOFTEE data
899- # - Remove lines beginning with '##'.
900- # - Remove the leading '#' from the one line that should be left with a single leading '#' so the line can
901- # serve as a TSV header.
902- sed -E '/^##/d' $file | sed -E 's/^#//' > vep_loftee_load_file.txt
903-
904- bq --apilog =false load --project_id = ~{project_id } --source_format = CSV --field_delimiter = '\t' --skip_leading_rows = 1 \
905- --null_marker = "-" ~{dataset_name }.~{raw_data_table } vep_loftee_load_file.txt
906- else
907- echo "File $file is empty, skipping."
908- fi
909- done
900+ num_rows = $(bq --apilog =false show --project_id = ~{project_id } ~{dataset_name }.~{raw_data_table } --format json | jq -r .numRows )
901+ if ((num_rows != 0 ))
902+ then
903+ echo "Found preexisting table with data, not adding more raw data."
904+ else
905+ echo "Raw data table is empty, copying VEP output to be loaded."
906+ gcloud storage cp ~{sep =' ' vep_loftee_raw_output } .
907+ for file in ~{sep =' ' vep_loftee_raw_output }
908+ do
909+ filename = $(basename $file )
910+ if [ ! -e load_file.txt ]
911+ then
912+ # Do a wee bit of processing of the raw output to create a load file for raw VEP + LOFTEE data
913+ # - Remove lines beginning with '##'.
914+ # - Remove the leading '#' from the one line that should be left with a single leading '#' so
915+ # the line can serve as a TSV header.
916+ sed -E '/^##/d' $filename | sed -E 's/^#//' > load_file.txt
917+ fi
918+ grep -E -v '^#' $filename >> load_file.txt
919+ done
920+
921+ bq --apilog =false load --project_id = ~{project_id } --source_format = CSV --field_delimiter = '\t' \
922+ --skip_leading_rows = 1 --null_marker = "-" ~{dataset_name }.~{raw_data_table } load_file.txt
923+ fi
910924 >>>
911925
912926 runtime {
913927 docker : variants_docker
928+ preemptible : 2
914929 memory : "7 GB"
915930 disks : "local-disk 1000 HDD"
916931 }
@@ -949,6 +964,12 @@ task BigQueryCookVepAndLofteeRawAnnotations {
949964 bq --apilog =false mk --expiration =$DATE --project_id = ~{project_id } ~{dataset_name }.~{cooked_data_table } ~{cooked_data_table_schema }
950965 fi
951966
967+ num_rows = $(bq --apilog =false show --project_id = ~{project_id } ~{dataset_name }.~{cooked_data_table } --format json | jq -r .numRows )
968+ if ((num_rows != 0 ))
969+ then
970+ echo "Found preexisting table with data, not adding more cooked data."
971+ else
972+
952973 bq --apilog =false query --nouse_legacy_sql --destination_table = ~{dataset_name }.~{cooked_data_table } --replace \
953974 --project_id = ~{project_id } '
954975
@@ -1021,6 +1042,7 @@ task BigQueryCookVepAndLofteeRawAnnotations {
10211042 WHERE ROW_NUMBER = 1
10221043
10231044 '
1045+ fi
10241046
10251047 >>>
10261048
0 commit comments