#!/usr/bin/env cwl-runner class: Workflow id: "multi-lane-sample-workflow" label: "workflow to generate mapping stats and gene counts" cwlVersion: v1.0 requirements: - class: ScatterFeatureRequirement - class: SubworkflowFeatureRequirement - class: InlineJavascriptRequirement - class: StepInputExpressionRequirement inputs: raw_reads: doc: "RAW read input, can be BAM files or pairs of FastQ files (optionally gzip compressed). Each element of this array will be treated as one read group in the aligned BAM file. Within each element, only either BAM files or FastQ files are allowed." type: type: array items: type: array items: File map_reference: type: File doc: "The core STAR reference and a GTF file bundled in a tar.gz." sample_name: type: string doc: "Sample name, which will used to prefix output file names and SM tag in the BAM file header." default: '' stats_reference: type: File doc: "The reference files bundled in a tar.gz." count_reference: type: File doc: "A reference GTF file." bigwig_reference: type: File doc: "FASTA file of a reference file, which the input BAM file was mapped to." secondaryFiles: - .fai bigwig_threads: type: int? default: 1 doc: "Number of threads to use for generating bigwig." map_threads: type: int? default: 1 doc: "Number of threads to use for each mapping process." merge_threads: type: int? default: 1 doc: "Number of threads to use for merging step." rg_id_tags: type: type: array items: ["null", string] doc: "Readgroup ID tag values. It should have one value for each group of input raw files. Use empty string to use defaults or existing RG ID in the input BAM. It only uses the RG ID value in the first BAM file of a group." lb_tags: type: type: array items: ["null", string] doc: "Sequencing library tag values in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing LB tag in the input BAM. It only uses the LB tag value in the first BAM file of a group." ds_tags: type: type: array items: ["null", string] doc: "Description tag value in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing DS tag in the input BAM. It only uses the DS tag value in the first BAM file of a group." pl_tags: type: type: array items: ["null", string] doc: "Platform tag value in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing PL tag in the input BAM. It only uses the PL tag value in the first BAM file of a group." pu_tags: type: type: array items: ["null", string] doc: "Platform unit tag value in the output BAM header. It should have one value for each group of input raw files. Use empty string to set it to none or existing PU tag in the input BAM. It only uses the PU tag value in the first BAM file of a group." outputs: dup_marked_bam: type: File outputSource: merge/dup_marked_merged_bam dup_marked_bam_md5: type: File outputSource: merge/dup_marked_bam_md5 dup_marked_bam_dup_met: type: File outputSource: merge/dup_marked_bam_dup_met transcriptome_lane_bams: type: File[] outputSource: map_and_stats/transcriptome_bam dup_marked_lane_bam_dup_mets: type: File[] outputSource: map_and_stats/dup_marked_bam_dup_met rna_bas_files: type: File[] outputSource: map_and_stats/rna_bas gene_cover_pngs: type: File[] outputSource: map_and_stats/gene_cover_png gene_body_coverage_rscripts: type: File[] outputSource: map_and_stats/gene_body_coverage_rscript gene_body_coverage_txts: type: File[] outputSource: map_and_stats/gene_body_coverage_txt gene_body_coverage_updated_rscripts: type: File[] outputSource: map_and_stats/gene_body_coverage_updated_rscript read_dists: type: File[] outputSource: map_and_stats/read_dist out_bw: type: File outputSource: bigwig/out_bw out_count: type: File outputSource: count/out_count steps: map_and_stats: in: raw_reads: source: raw_reads map_reference: source: map_reference sample_name: source: sample_name stats_reference: source: stats_reference map_threads: source: map_threads rg_id_tag: source: rg_id_tags lb_tag: source: lb_tags ds_tag: source: ds_tags pl_tag: source: pl_tags pu_tag: source: pu_tags out: [dup_marked_bam, dup_marked_bam_dup_met, transcriptome_bam, rna_bas, gene_cover_png, gene_body_coverage_rscript, gene_body_coverage_txt, gene_body_coverage_updated_rscript, read_dist] scatter: [raw_reads, rg_id_tag, lb_tag, ds_tag, pl_tag, pu_tag] scatterMethod: dotproduct run: tools/lane_map_and_stats.cwl merge: in: sorted_bams: source: map_and_stats/dup_marked_bam threads: source: merge_threads out_bam_name: source: sample_name valueFrom: $(self).bam out_bam_index_name: source: sample_name valueFrom: $(self).bam.bai md5_file_name: source: sample_name valueFrom: $(self).bam.md5 dup_met_file_name: source: sample_name valueFrom: $(self).bam.met out: [dup_marked_merged_bam, dup_marked_bam_dup_met, dup_marked_bam_md5] run: tools/merge_and_mark_dups.cwl bigwig: in: sample_bam: source: merge/dup_marked_merged_bam reference: source: bigwig_reference threads: source: bigwig_threads out: [out_bw] run: tools/run-cgprna_bigwig.cwl count: in: sample_bam: source: merge/dup_marked_merged_bam reference: source: count_reference out: [out_count] run: tools/run-cgprna_htseq-count.cwl doc: | A workflow to generate mapping stats and gene counts from RNA-seq data using cgpRna container. See the [cgpRna](https://github.com/cancerit/cgpRna) website for more information. $schemas: - https://schema.org/version/latest/schema.rdf $namespaces: s: http://schema.org/ s:codeRepository: https://github.com/cancerit/cgpRna s:license: https://spdx.org/licenses/AGPL-3.0 s:author: - class: s:Person s:email: mailto:yx2@sanger.ac.uk s:name: Yaobo Xu