cwlVersion: v1.0 class: Workflow doc: "DNAseq pipeline from fastq to vcf in distributed mode" requirements: - class: ShellCommandRequirement - class: ScatterFeatureRequirement - class: MultipleInputFeatureRequirement - class: StepInputExpressionRequirement - class: InlineJavascriptRequirement - class: SubworkflowFeatureRequirement - class: ResourceRequirement coresMin: $(inputs.threads) inputs: reference: type: [string, File] secondaryFiles: - .fai - .bwt - .sa - .ann - .amb - .pac input_reads: type: type: array items: [string, File] input_reads_index_file: type: [string, File] extract_chunks: type: string[] sort_output_bam: type: string minimum_seed_length: type: int? min_std_max_min: type: int[]? readgroup: type: string? sample: type: string? library: type: string? default: library platform: type: string? default: ILLUMINA mark_secondary: type: boolean? chunk_size: type: int bam_compression: type: int? sort_reference: type: ["null", string, File] secondaryFiles: - .fai dedup_output_bam: type: string dedup_metrics_output_file: type: string realign_known_sites: type: - type: array items: [string, File] secondaryFiles: - .tbi bqsr_known_sites: type: - type: array items: [string, File] secondaryFiles: - .tbi realign_output_bam: type: string qcal_output_file: type: string dbsnp: type: ["null", string, File] secondaryFiles: - .tbi output_file: type: string interval: type: ["null", string, File] threads: type: ["null", int] shard: type: string[] outputs: sort_output: type: File outputSource: sort_merge/output dedup_output: type: File outputSource: dedup/output dedup_metric_output: type: ["null", File] outputSource: dedup/metrics_output realign_output: type: File outputSource: realign/output qcal_output: type: File outputSource: bqsr/qcal_output qcal_plot_output: type: ["null", File] outputSource: bqsr/plot_output qcal_plot_csv_output: type: ["null", File] outputSource: bqsr/plot_csv_output output: type: File outputSource: hc/output steps: bwa: in: reference: reference sort_reference: sort_reference reads: input_reads reads_index_file: input_reads_index_file mark_secondary: mark_secondary min_std_max_min: min_std_max_min minimum_seed_length: minimum_seed_length chunk_size: chunk_size extract_chunks: extract_chunks _output: sort_output_bam # to provide base name for output_file _extract_chunks: extract_chunks _ext: {"default":".bam"} output_file: source: extract_chunks linkMerge: merge_flattened valueFrom: | ${ var len = inputs._extract_chunks.length; var i = 0; for (; i < len; ++i) { if (self == inputs._extract_chunks[i]) break; } return inputs._output + "_part_" + i + inputs._ext; } threads: threads sort_threads: threads bam_compression: bam_compression _readgroup: readgroup _sample: sample _platform: platform _library: library readgroup: valueFrom: | ${ var rg = "@RG\tID:" + inputs._readgroup + "\tSM:" + inputs._sample + "\tPL:" + inputs._platform; if ( inputs._library != null ) rg += "\tLB:" + inputs._library; return rg; } out: [output] scatter: [extract_chunks, output_file] scatterMethod: dotproduct run: ../algo/bwa-mem-sort-with-fastq-slicer.cwl sort_merge: in: threads: threads mergemode: valueFrom: ${ return 0; } input_bam: source: bwa/output output_file: sort_output_bam out: [output] run: ../algo/bam-merge.cwl dedup: in: reference: reference input_bam: source: bwa/output metrics_output_file: dedup_metrics_output_file output_file: dedup_output_bam shard: shard threads: threads interval: interval out: [output, metrics_output] run: ../stage/dedup-2-pass-distr.cwl realign: in: reference: reference input_bam: source: dedup/output valueFrom: ${ return [ self ]; } # convert one element to array known_sites: realign_known_sites output_file: realign_output_bam shard: shard threads: threads interval_list: interval out: [output] run: ../stage/realign-distr.cwl bqsr: in: reference: reference input_bam: source: realign/output valueFrom: ${ return [ self ]; } # convert one element to array known_sites: bqsr_known_sites output_file: qcal_output_file plot_output_file: source: qcal_output_file valueFrom: ${ return self + ".pdf"; } plot_output_csv_file: source: qcal_output_file valueFrom: ${ return self + ".csv"; } shard: shard threads: threads interval: interval out: [qcal_output, plot_output, plot_csv_output] run: ../stage/bqsr-flow-distr.cwl hc: in: reference: reference input_bam: source: realign/output valueFrom: ${ return [ self ]; } # convert one element to array dbsnp: dbsnp output_file: output_file shard: shard threads: threads interval: interval qcal: source: bqsr/qcal_output valueFrom: ${ return [ self ]; } # convert one element to array out: [output] run: ../stage/hc-distr.cwl