#!/usr/bin/env cwl-runner cwlVersion: v1.0 class: Workflow label: "Detect Variants workflow for WGS pipeline" requirements: - class: SubworkflowFeatureRequirement - class: SchemaDefRequirement types: - $import: ../types/vep_custom_annotation.yml - class: StepInputExpressionRequirement - class: InlineJavascriptRequirement inputs: reference: type: string tumor_bam: type: File secondaryFiles: [.bai,^.bai] normal_bam: type: File secondaryFiles: [.bai,^.bai] roi_intervals: type: File strelka_exome_mode: type: boolean strelka_cpu_reserved: type: int? default: 8 readcount_minimum_base_quality: type: int? readcount_minimum_mapping_quality: type: int? scatter_count: type: int? doc: "scatters each supported variant detector (varscan, pindel, mutect) into this many parallel jobs" varscan_strand_filter: type: int? default: 0 varscan_min_coverage: type: int? default: 8 varscan_min_var_freq: type: float? default: 0.1 varscan_p_value: type: float? default: 0.99 varscan_max_normal_freq: type: float? pindel_insert_size: type: int default: 400 docm_vcf: type: File secondaryFiles: [.tbi] filter_docm_variants: type: boolean? default: true vep_cache_dir: type: - string - Directory vep_ensembl_assembly: type: string doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38" vep_ensembl_version: type: string doc: "ensembl version - Must be present in the cache directory. Example: 95" vep_ensembl_species: type: string doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus" synonyms_file: type: File? annotate_coding_only: type: boolean? vep_pick: type: - "null" - type: enum symbols: ["pick", "flag_pick", "pick_allele", "per_gene", "pick_allele_gene", "flag_pick_allele", "flag_pick_allele_gene"] vep_plugins: type: string[]? default: [Downstream, Wildtype] filter_gnomADe_maximum_population_allele_frequency: type: float? default: 0.001 filter_mapq0_threshold: type: float? default: 0.15 filter_minimum_depth: type: int? default: 1 filter_somatic_llr_threshold: type: float default: 5 doc: "Sets the stringency (log-likelihood ratio) used to filter out non-somatic variants. Typical values are 10=high stringency, 5=normal, 3=low stringency. Low stringency may be desirable when read depths are low (as in WGS) or when tumor samples are impure." filter_somatic_llr_tumor_purity: type: float default: 1 doc: "Sets the purity of the tumor used in the somatic llr filter, used to remove non-somatic variants. Probably only needs to be adjusted for low-purity (< 50%). Range is 0 to 1" filter_somatic_llr_normal_contamination_rate: type: float default: 0 doc: "Sets the fraction of tumor present in the normal sample (range 0 to 1), used in the somatic llr filter. Useful for heavily contaminated adjacent normals. Range is 0 to 1" cle_vcf_filter: type: boolean? default: false variants_to_table_fields: type: string[]? default: [CHROM,POS,ID,REF,ALT,set,AC,AF] variants_to_table_genotype_fields: type: string[]? default: [GT,AD] vep_to_table_fields: type: string[]? default: [HGVSc,HGVSp] tumor_sample_name: type: string normal_sample_name: type: string vep_custom_annotations: type: ../types/vep_custom_annotation.yml#vep_custom_annotation[] doc: "custom type, check types directory for input format" validated_variants: type: File? secondaryFiles: [.tbi] doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" outputs: mutect_unfiltered_vcf: type: File outputSource: mutect/unfiltered_vcf secondaryFiles: [.tbi] mutect_filtered_vcf: type: File outputSource: mutect/filtered_vcf secondaryFiles: [.tbi] strelka_unfiltered_vcf: type: File outputSource: strelka/unfiltered_vcf secondaryFiles: [.tbi] strelka_filtered_vcf: type: File outputSource: strelka/filtered_vcf secondaryFiles: [.tbi] varscan_unfiltered_vcf: type: File outputSource: varscan/unfiltered_vcf secondaryFiles: [.tbi] varscan_filtered_vcf: type: File outputSource: varscan/filtered_vcf secondaryFiles: [.tbi] docm_filtered_vcf: type: File outputSource: docm/docm_variants_vcf secondaryFiles: [.tbi] final_vcf: type: File outputSource: index/indexed_vcf secondaryFiles: [.tbi] final_filtered_vcf: type: File outputSource: annotated_filter_index/indexed_vcf secondaryFiles: [.tbi] final_tsv: type: File outputSource: add_vep_fields_to_table/annotated_variants_tsv vep_summary: type: File outputSource: annotate_variants/vep_summary tumor_snv_bam_readcount_tsv: type: File outputSource: tumor_bam_readcount/snv_bam_readcount_tsv tumor_indel_bam_readcount_tsv: type: File outputSource: tumor_bam_readcount/indel_bam_readcount_tsv normal_snv_bam_readcount_tsv: type: File outputSource: normal_bam_readcount/snv_bam_readcount_tsv normal_indel_bam_readcount_tsv: type: File outputSource: normal_bam_readcount/indel_bam_readcount_tsv steps: mutect: run: ../subworkflows/mutect.cwl in: reference: reference tumor_bam: tumor_bam normal_bam: normal_bam interval_list: roi_intervals scatter_count: scatter_count tumor_sample_name: tumor_sample_name out: [unfiltered_vcf, filtered_vcf] strelka: run: ../subworkflows/strelka_and_post_processing.cwl in: reference: reference tumor_bam: tumor_bam normal_bam: normal_bam interval_list: roi_intervals exome_mode: strelka_exome_mode cpu_reserved: strelka_cpu_reserved tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name out: [unfiltered_vcf, filtered_vcf] varscan: run: ../subworkflows/varscan_pre_and_post_processing.cwl in: reference: reference tumor_bam: tumor_bam normal_bam: normal_bam interval_list: roi_intervals scatter_count: scatter_count strand_filter: varscan_strand_filter min_coverage: varscan_min_coverage min_var_freq: varscan_min_var_freq p_value: varscan_p_value max_normal_freq: varscan_max_normal_freq tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name out: [unfiltered_vcf, filtered_vcf] docm: run: ../subworkflows/docm_cle.cwl in: reference: reference tumor_bam: tumor_bam normal_bam: normal_bam docm_vcf: docm_vcf interval_list: roi_intervals filter_docm_variants: filter_docm_variants out: [docm_variants_vcf] combine: run: ../tools/combine_variants_wgs.cwl in: reference: reference mutect_vcf: mutect/filtered_vcf strelka_vcf: strelka/filtered_vcf varscan_vcf: varscan/filtered_vcf out: [combined_vcf] add_docm_variants: run: ../tools/docm_add_variants.cwl in: reference: reference docm_vcf: docm/docm_variants_vcf callers_vcf: combine/combined_vcf out: [merged_vcf] decompose: run: ../tools/vt_decompose.cwl in: vcf: add_docm_variants/merged_vcf out: [decomposed_vcf] decompose_index: run: ../tools/index_vcf.cwl in: vcf: decompose/decomposed_vcf out: [indexed_vcf] annotate_variants: run: ../tools/vep.cwl in: vcf: decompose_index/indexed_vcf cache_dir: vep_cache_dir ensembl_assembly: vep_ensembl_assembly ensembl_version: vep_ensembl_version ensembl_species: vep_ensembl_species synonyms_file: synonyms_file coding_only: annotate_coding_only reference: reference custom_annotations: vep_custom_annotations pick: vep_pick plugins: vep_plugins out: [annotated_vcf, vep_summary] tumor_bam_readcount: run: ../tools/bam_readcount.cwl in: vcf: annotate_variants/annotated_vcf sample: default: 'TUMOR' reference_fasta: reference bam: tumor_bam min_base_quality: readcount_minimum_base_quality min_mapping_quality: readcount_minimum_mapping_quality out: [snv_bam_readcount_tsv, indel_bam_readcount_tsv] normal_bam_readcount: run: ../tools/bam_readcount.cwl in: vcf: annotate_variants/annotated_vcf sample: default: 'NORMAL' reference_fasta: reference bam: normal_bam min_base_quality: readcount_minimum_base_quality min_mapping_quality: readcount_minimum_mapping_quality out: [snv_bam_readcount_tsv, indel_bam_readcount_tsv] add_tumor_bam_readcount_to_vcf: run: ../subworkflows/vcf_readcount_annotator.cwl in: vcf: annotate_variants/annotated_vcf snv_bam_readcount_tsv: tumor_bam_readcount/snv_bam_readcount_tsv indel_bam_readcount_tsv: tumor_bam_readcount/indel_bam_readcount_tsv data_type: default: 'DNA' sample_name: default: 'TUMOR' out: [annotated_bam_readcount_vcf] add_normal_bam_readcount_to_vcf: run: ../subworkflows/vcf_readcount_annotator.cwl in: vcf: add_tumor_bam_readcount_to_vcf/annotated_bam_readcount_vcf snv_bam_readcount_tsv: normal_bam_readcount/snv_bam_readcount_tsv indel_bam_readcount_tsv: normal_bam_readcount/indel_bam_readcount_tsv data_type: default: 'DNA' sample_name: default: 'NORMAL' out: [annotated_bam_readcount_vcf] index: run: ../tools/index_vcf.cwl in: vcf: add_normal_bam_readcount_to_vcf/annotated_bam_readcount_vcf out: [indexed_vcf] filter_vcf: run: ../subworkflows/filter_vcf.cwl in: vcf: index/indexed_vcf filter_gnomADe_maximum_population_allele_frequency: filter_gnomADe_maximum_population_allele_frequency filter_mapq0_threshold: filter_mapq0_threshold filter_somatic_llr_threshold: filter_somatic_llr_threshold filter_somatic_llr_tumor_purity: filter_somatic_llr_tumor_purity filter_somatic_llr_normal_contamination_rate: filter_somatic_llr_normal_contamination_rate filter_minimum_depth: filter_minimum_depth tumor_bam: tumor_bam do_cle_vcf_filter: cle_vcf_filter reference: reference normal_sample_name: normal_sample_name tumor_sample_name: tumor_sample_name gnomad_field_name: source: vep_custom_annotations valueFrom: | ${ if(self){ for(var i=0; i