#!/usr/bin/env cwl-runner cwlVersion: v1.0 class: Workflow doc: | This workflow will run OxoG, variantbam, and annotate. Run this as `dockstore --script --debug workflow launch --descriptor cwl --local-entry --entry ./oxog_varbam_annotate_wf.cwl --json oxog_varbam_annotat_wf.input.json ` dct:creator: foaf:name: "Solomon Shorser" foaf:mbox: "solomon.shorser@oicr.on.ca" requirements: - class: SchemaDefRequirement types: - $import: PreprocessedFilesType.yaml - $import: TumourType.yaml - class: ScatterFeatureRequirement - class: StepInputExpressionRequirement - class: MultipleInputFeatureRequirement - class: InlineJavascriptRequirement expressionLib: - { $include: oxog_varbam_annotate_util.js } # Shouldn't have to *explicitly* include these but there's # probably a bug somewhere that makes it necessary - { $include: preprocess_util.js } - { $include: vcf_merge_util.js } - class: SubworkflowFeatureRequirement inputs: inputFileDirectory: type: Directory refFile: type: File out_dir: type: string normalBam: type: File snv-padding: type: string sv-padding: type: string indel-padding: type: string refDataDir: type: Directory minibamName: type: string vcfdir: type: Directory # "tumours" is an array of records. Each record contains the tumour ID, BAM # file name, and an array of VCFs. tumours: type: type: array items: "TumourType.yaml#TumourType" outputs: #oxog_filtered_files: # type: File[] # outputSource: flatten_oxog_output/oxogVCFs # secondaryFiles: "*.tbi" minibams: type: File[] outputSource: gather_minibams/minibams secondaryFiles: "*.bai" #annotated_files: # type: File[] # outputSource: gather_annotated_vcfs/annotated_vcfs steps: ######################################## # Preprocessing # ######################################## # # Execute the preprocessor subworkflow. preprocess_vcfs: in: vcfdir: inputFileDirectory ref: refFile out_dir: out_dir filesToPreprocess: source: [ tumours ] valueFrom: | ${ // Put all VCFs into an array. var VCFs = [] for (var i in self) { for (var j in self[i].associatedVcfs) { VCFs.push(self[i].associatedVcfs[j]) } } return VCFs; //return self[0].associatedVcfs } run: preprocess_vcf.cwl out: [preprocessedFiles] get_merged_vcfs: in: in_record: preprocess_vcfs/preprocessedFiles run: class: ExpressionTool inputs: in_record: "PreprocessedFilesType.yaml#PreprocessedFileset" outputs: merged_vcfs: File[] expression: | $( { merged_vcfs: inputs.in_record.mergedVcfs } ) out: [merged_vcfs] get_cleaned_vcfs: in: in_record: preprocess_vcfs/preprocessedFiles run: class: ExpressionTool inputs: in_record: "PreprocessedFilesType.yaml#PreprocessedFileset" outputs: cleaned_vcfs: File[] expression: | $( { cleaned_vcfs: inputs.in_record.cleanedVcfs } ) out: [cleaned_vcfs] get_normalized_vcfs: in: in_record: preprocess_vcfs/preprocessedFiles run: class: ExpressionTool inputs: in_record: "PreprocessedFilesType.yaml#PreprocessedFileset" outputs: normalized_vcfs: File[] expression: | $( { normalized_vcfs: inputs.in_record.normalizedVcfs } ) out: [normalized_vcfs] get_extracted_snvs: in: in_record: preprocess_vcfs/preprocessedFiles run: class: ExpressionTool inputs: in_record: "PreprocessedFilesType.yaml#PreprocessedFileset" outputs: extracted_snvs: File[]? expression: | $( { extracted_snvs: inputs.in_record.extractedSnvs } ) out: [extracted_snvs] filter_merged_snv: in: in_vcfs: get_merged_vcfs/merged_vcfs run: class: ExpressionTool inputs: in_vcfs: File[] outputs: merged_snv_vcf: File expression: | $({ merged_snv_vcf: filterFileArray("snv",inputs.in_vcfs) }) out: [merged_snv_vcf] filter_merged_indel: in: in_vcfs: get_merged_vcfs/merged_vcfs run: class: ExpressionTool inputs: in_vcfs: File[] outputs: merged_indel_vcf: File expression: | $({ merged_indel_vcf: filterFileArray("indel",inputs.in_vcfs) }) out: [merged_indel_vcf] filter_merged_sv: in: in_vcfs: get_merged_vcfs/merged_vcfs run: class: ExpressionTool inputs: in_vcfs: File[] outputs: merged_sv_vcf: File expression: | $({ merged_sv_vcf: filterFileArray("sv",inputs.in_vcfs) }) out: [merged_sv_vcf] ######################################## # Do Variantbam # ######################################## # This needs to be run for each tumour, using VCFs that are merged pipelines per tumour. run_variant_bam: in: in_data: source: tumours indel-padding: indel-padding snv-padding: snv-padding sv-padding: sv-padding input-snv: filter_merged_snv/merged_snv_vcf input-sv: filter_merged_sv/merged_sv_vcf input-indel: filter_merged_indel/merged_indel_vcf inputFileDirectory: inputFileDirectory out: [minibam] scatter: [in_data] run: ./minibam_sub_wf.cwl # Create minibam for normal BAM. It would be nice to figure out how to get this into # the main run_variant_bam step that currently only does tumour BAMs. run_variant_bam_normal: in: indel-padding: indel-padding snv-padding: snv-padding sv-padding: sv-padding input-snv: filter_merged_snv/merged_snv_vcf input-sv: filter_merged_sv/merged_sv_vcf input-indel: filter_merged_indel/merged_indel_vcf inputFileDirectory: inputFileDirectory input-bam: normalBam outfile: source: normalBam valueFrom: $("mini-".concat(self.basename)) run: Variantbam-for-dockstore/variantbam.cwl out: [minibam] # Gather all minibams into a single output array. gather_minibams: in: tumour_minibams: run_variant_bam/minibam normal_minibam: run_variant_bam_normal/minibam run: class: ExpressionTool inputs: tumour_minibams: File[] normal_minibam: File outputs: minibams: File[] expression: | $( { minibams: inputs.tumour_minibams.concat(inputs.normal_minibam) } ) out: [minibams] ### Prepare for OxoG! # First we need to zip and index the VCFs - the OxoG filter requires them to be # zipped and index. zip_and_index_files_for_oxog: in: vcf: source: get_cleaned_vcfs/cleaned_vcfs scatter: [vcf] out: [zipped_file] run: zip_and_index_vcf.cwl # Gather the appropriate VCFS. # All SNVs, and all SNVs extracted from INDELs. gather_vcfs_for_oxog: in: vcf: source: [zip_and_index_files_for_oxog/zipped_file] valueFrom: | ${ var snvs = [] for (var i in self) { if (self[i].basename.indexOf("snv") !== -1) { snvs.push(self[i]) } } return snvs } extractedSNVs: source: get_extracted_snvs/extracted_snvs run: class: ExpressionTool inputs: vcf: File[] extractedSNVs: File[]? outputs: vcfs: File[] expression: | $( { vcfs: inputs.vcf.concat(inputs.extractedSNVs) } ) out: [vcfs] ######################################## # Do OxoG Filtering # ######################################## # # OxoG only runs on SNV VCFs run_oxog: in: in_data: source: tumours inputFileDirectory: inputFileDirectory refDataDir: refDataDir vcfsForOxoG: gather_vcfs_for_oxog/vcfs out: [oxogVCF] scatter: [in_data] run: oxog_sub_wf.cwl flatten_oxog_output: in: array_of_arrays: run_oxog/oxogVCF run: class: ExpressionTool inputs: array_of_arrays: type: { type: array, items: { type: array, items: File } } expression: | $( { oxogVCFs: flatten_nested_arrays(inputs.array_of_arrays) } ) outputs: oxogVCFs: File[] out: [oxogVCFs] ######################################## # Do Annotation. # ######################################## # # we need OxoG filtered files, and minibams (tumour and normal). # Then we need to scatter. We can scatter on minibams, and perform all annotations # for each minibam at a time. run_annotator_snvs: in: tumourMinibams: run_variant_bam/minibam VCFs: flatten_oxog_output/oxogVCFs tumour_record: source: tumours normalMinibam: run_variant_bam_normal/minibam variantType: default: "SNV" out: [ annotated_vcfs ] scatter: [tumour_record] run: annotator_sub_wf.cwl # Annotation must also be performed on INDELs but since INDELs don't get OxoG-filtered, # we will use the normalized INDELs. run_annotator_indels: in: tumourMinibams: run_variant_bam/minibam VCFs: get_normalized_vcfs/normalized_vcfs tumour_record: source: tumours normalMinibam: run_variant_bam_normal/minibam variantType: default: "INDEL" out: [annotated_vcfs] scatter: [tumour_record] run: annotator_sub_wf.cwl #gather_annotated_vcfs: # in: # annotated_snvs: run_annotator_snvs/annotated_vcfs # annotated_indels: run_annotator_indels/annotated_vcfs # run: # class: ExpressionTool # inputs: # annotated_snvs: # type: { type: array, items: { type: array, items: File } } # annotated_indels: # type: { type: array, items: { type: array, items: File } } # outputs: # annotated_vcfs: File[] # expression: | # $( # { annotated_vcfs: flatten_nested_arrays(inputs.annotated_snvs).concat(flatten_nested_arrays(inputs.annotated_indels)) } # ) # out: # [annotated_vcfs] # Now run the QA check. #qa_check: # in: # tumourMinibams: run_variant_bam/minibam # tumour_record: # source: tumours # normal_bam: normalBam # vcfs: flatten_oxog_output/oxogVCFs # normalMinibam: run_variant_bam_normal/minibam # inputFileDirectory: inputFileDirectory # scatter: [tumour_record] # run: # class: Workflow # inputs: # inputFileDirectory: # type: Directory # tumour_record: # type: "TumourType.yaml#TumourType" # vcfs: # type: File[] # normal_bam: # type: File # secondaryFiles: .bai # tumourMinibams: # type: File[] # normalMinibam: # type: File # steps: # run_qa_check: # in: # tumour_record: tumour_record # vcfs: vcfs # normal_bam: normal_bam # normal_minibam: normalMinibam # tumour_minibam: # source: [tumour_record, tumourMinibams] # valueFrom: | # ${ # for (var i in self[1]) # { # var tumourMinibam = self[1][i] # if (tumourMinibam.basename.indexOf( self[0].bamFileName ) !== -1) # { # return tumourMinibam # } # } # } # tumour_bam: # source: [inputFileDirectory, tumour_record] # valueFrom: | # ${ # return { "class":"File", "location": self[0].location + "/" + self[1].bamFileName } # } # out: [qa_result] # run: qa_check_subwf.cwl # outputs: # qa_result: # type: File # out: # [qa_result]