#!/usr/bin/env cwl-runner cwlVersion: v1.0 class: Workflow requirements: SchemaDefRequirement: types: - $import: ../types/ExomeseqStudyType.yml inputs: study_type: type: ../types/ExomeseqStudyType.yml#ExomeseqStudyType name: string intervals: File[]? interval_padding: int? # NOTE: Should be at least 20 samples for exome raw_variants: File[] # reference genome, fasta reference_genome: File # Number of threads to use threads: int? # GATK GATKJar: File # Variant calling # Variant Recalibration - SNPs snp_resource_hapmap: File snp_resource_omni: File snp_resource_1kg: File # Variant Recalibration - Common resource_dbsnp: File # Variant Recalibration - Indels indel_resource_mills: File outputs: joint_raw_variants: type: File outputSource: joint_genotyping/output_GenotypeGVCFs doc: "VCF file from joint genotyping calling" variant_recalibration_snps_tranches: type: File outputSource: variant_recalibration_snps/tranches_File doc: "The output tranches file used by ApplyRecalibration in SNP mode" variant_recalibration_snps_recal: type: File outputSource: variant_recalibration_snps/recal_File doc: "The output recal file used by ApplyRecalibration in SNP mode" variant_recalibration_snps_rscript: type: File outputSource: variant_recalibration_snps/vqsr_rscript doc: "The output rscript file generated by the VQSR in SNP mode to aid in visualization of the input data and learned model" variant_recalibration_snps_vcf: type: File outputSource: apply_recalibration_snps/output_recalibrated_vcf doc: "The output filtered and recalibrated VCF file in SNP mode in which each variant is annotated with its VQSLOD value" variant_recalibration_snps_indels_tranches: type: File outputSource: variant_recalibration_indels/tranches_File doc: "The output tranches file used by ApplyRecalibration in INDEL mode" variant_recalibration_snps_indels_recal: type: File outputSource: variant_recalibration_indels/recal_File doc: "The output recal file used by ApplyRecalibration in INDEL mode" variant_recalibration_snps_indels_rscript: type: File outputSource: variant_recalibration_indels/vqsr_rscript doc: "The output rscript file generated by the VQSR in INDEL mode to aid in visualization of the input data and learned model" variant_recalibration_snps_indels_vcf: type: File outputSource: apply_recalibration_indels/output_recalibrated_vcf doc: "The output filtered and recalibrated VCF file in in which each variant is annotated with its VQSLOD value" steps: generate_joint_filenames: run: ../tools/generate-joint-filenames.cwl in: name: name out: - joint_genotype_raw_variants_output_filename - snps_vqsr_recal_output_filename - snps_vqsr_tranches_output_filename - snps_vqsr_rscript_output_filename - snps_recalibrated_output_filename - snps_indels_vqsr_recal_output_filename - snps_indels_vqsr_tranches_output_filename - snps_indels_vqsr_rscript_output_filename - snps_indels_recalibrated_output_filename # TODO: We may want to merge VCFs if we had lots of them # See Merge (optional) on https://software.broadinstitute.org/gatk/best-practices/bp_3step.php?case=GermShortWGS&p=2 joint_genotyping: run: ../tools/GATK-GenotypeGVCFs.cwl requirements: - class: ResourceRequirement coresMin: 1 ramMin: 12288 in: GATKJar: GATKJar # https://gatkforums.broadinstitute.org/wdl/discussion/8718/concurrentmodificationexception-in-gatk-3-7-genotypegvcfs # Hi all, it looks like some race conditions were introduced in 3.7. Unfortunately we can't devote effort to addressing that since we are moving away from using this type of parallelism altogether threads: default: 1 intervals: intervals interval_padding: interval_padding # NOTE: GATK best practices recommends at least 30 samples for exome - how to deal? variants: raw_variants reference: reference_genome group: default: ['StandardAnnotation'] dbsnp: resource_dbsnp outputfile_GenotypeGVCFs: generate_joint_filenames/joint_genotype_raw_variants_output_filename out: - output_GenotypeGVCFs # Recommendations from https://software.broadinstitute.org/gatk/documentation/article?id=1259 generate_annotations_snps: run: ../tools/generate-variant-recalibration-annotation-set.cwl in: study_type: study_type base_annotations: default: ["QD","FS","MQ","SOR","MQRankSum","ReadPosRankSum"] out: - annotations variant_recalibration_snps: run: ../tools/GATK-VariantRecalibrator-SNPs.cwl in: GATKJar: GATKJar reference: reference_genome variants: joint_genotyping/output_GenotypeGVCFs threads: default: 1 outputfile_recal: generate_joint_filenames/snps_vqsr_recal_output_filename outputfile_tranches: generate_joint_filenames/snps_vqsr_tranches_output_filename outputfile_rscript: generate_joint_filenames/snps_vqsr_rscript_output_filename resource_hapmap: snp_resource_hapmap resource_omni: snp_resource_omni resource_1kg: snp_resource_1kg resource_dbsnp: resource_dbsnp annotations: generate_annotations_snps/annotations out: - tranches_File - recal_File - vqsr_rscript apply_recalibration_snps: run: ../tools/GATK-ApplyRecalibration.cwl in: GATKJar: GATKJar reference: reference_genome variants: joint_genotyping/output_GenotypeGVCFs threads: threads tranches_file: variant_recalibration_snps/tranches_File recal_file: variant_recalibration_snps/recal_File outputfile_recalibrated_vcf: generate_joint_filenames/snps_recalibrated_output_filename ts_filter_level: default: 99.0 mode: default: "SNP" out: - output_recalibrated_vcf generate_annotations_indels: run: ../tools/generate-variant-recalibration-annotation-set.cwl in: study_type: study_type base_annotations: default: ["QD","FS","MQ","MQRankSum","ReadPosRankSum"] out: - annotations variant_recalibration_indels: run: ../tools/GATK-VariantRecalibrator-Indels.cwl in: GATKJar: GATKJar reference: reference_genome variants: apply_recalibration_snps/output_recalibrated_vcf threads: default: 1 outputfile_recal: generate_joint_filenames/snps_indels_vqsr_recal_output_filename outputfile_tranches: generate_joint_filenames/snps_indels_vqsr_tranches_output_filename outputfile_rscript: generate_joint_filenames/snps_indels_vqsr_rscript_output_filename resource_mills: indel_resource_mills resource_dbsnp: resource_dbsnp annotations: generate_annotations_indels/annotations out: - tranches_File - recal_File - vqsr_rscript apply_recalibration_indels: run: ../tools/GATK-ApplyRecalibration.cwl in: GATKJar: GATKJar reference: reference_genome variants: apply_recalibration_snps/output_recalibrated_vcf threads: threads tranches_file: variant_recalibration_indels/tranches_File recal_file: variant_recalibration_indels/recal_File outputfile_recalibrated_vcf: generate_joint_filenames/snps_indels_recalibrated_output_filename ts_filter_level: default: 99.0 mode: default: "INDEL" out: - output_recalibrated_vcf