#!/usr/bin/env cwl-runner doc: | TOPMed RNA-seq CWL workflow. Documentation on the workflow can be found [here](https://github.com/heliumdatacommons/cwl_workflows/blob/master/topmed-workflows/TOPMed_RNAseq_pipeline/README.md). Example input files: [Dockstore.json](https://github.com/heliumdatacommons/cwl_workflows/blob/master/topmed-workflows/TOPMed_RNAseq_pipeline/input-examples/Dockstore.json) and [rnaseq_pipeline_fastq-example.yml](https://github.com/heliumdatacommons/cwl_workflows/blob/master/topmed-workflows/TOPMed_RNAseq_pipeline/input-examples/rnaseq_pipeline_fastq-example.yml). Quickstart instructions are [here](https://github.com/heliumdatacommons/cwl_workflows/blob/master/topmed-workflows/TOPMed_RNAseq_pipeline/README.md#Quick Start). [GitHub Repo](https://github.com/heliumdatacommons/cwl_workflows) Pipeline steps: 1. Align RNA-seq reads with [STAR v2.5.3a](https://github.com/alexdobin/STAR). 2. Run [Picard](https://github.com/broadinstitute/picard) [MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates). 2a. Create BAM index for MarkDuplicates BAM with [Samtools 1.6](https://github.com/samtools/samtools/releases) index. 3. Transcript quantification with [RSEM 1.3.0](https://deweylab.github.io/RSEM/) 4. Gene quantification and quality control with [RNA-SeQC 1.1.9](https://github.com/francois-a/rnaseqc) cwlVersion: v1.0 class: Workflow label: "TOPMed_RNA-seq" requirements: SubworkflowFeatureRequirement: {} StepInputExpressionRequirement: {} # hints: # ResourceRequirement: # coresMin: 4 # ramMin: 16 # #tmpdirMin: 51200 inputs: star_index: type: Directory fastqs: type: File[] prefix_str: type: string rsem_ref_dir: type: Directory max_frag_len: type: int estimate_rspd: type: boolean is_stranded: type: boolean paired_end: type: boolean genes_gtf: type: File genome_fasta: type: File secondaryFiles: - .fai - ^.dict rnaseqc_flags: type: string[] # gatk_flags: # type: # type: "null" # type: array # items: string outputs: star_output_bam: outputSource: sort_bam/output_file type: File star_output_bam_index: outputSource: index_bam/bam_index type: File star_output_transcriptome_bam: outputSource: run_star/transcriptome_bam type: File star_output_chimeric_junctions: outputSource: run_star/chimeric_junctions type: File star_output_chimeric_bam_file: outputSource: sort_chimeras/output_file type: File star_output_chimeric_bam_index: outputSource: index_chimeras/bam_index type: File star_output_read_counts: outputSource: run_star/read_counts type: File star_output_junctions: outputSource: run_star/junctions type: File star_output_junctions_pass1: outputSource: run_star/junctions_pass1 type: File star_output_logs: outputSource: run_star/logs type: File[] markduplicates_output_bam: outputSource: run_markduplicates/bam_file type: File markduplicates_output_metrics: outputSource: run_markduplicates/metrics type: File markduplicates_bam_index: outputSource: run_index_markduplicates_bam/bam_index type: File rsem_output_gene_results: outputSource: run_rsem/gene_results type: File rsem_output_isoforms_results: outputSource: run_rsem/isoforms_results type: File rna-seqc_output_gene_rpkm: outputSource: run_rna-seqc/gene_rpkm type: File rna-seqc_output_gene_counts: outputSource: run_rna-seqc/gene_counts type: File rna-seqc_output_exon_counts: outputSource: run_rna-seqc/exon_counts type: File rna-seqc_output_count_metrics: outputSource: run_rna-seqc/count_metrics type: File rna-seqc_output_count_outputs: outputSource: run_rna-seqc/count_outputs type: File steps: run_star: run: star.cwl in: star_index: star_index fastqs: fastqs prefix: prefix_str out: [ bam, transcriptome_bam, chimeric_junctions, chimeric_bam, read_counts, junctions, junctions_pass1, logs ] sort_bam: run: samtools-sort.cwl in: input: source: run_star/bam output_name: source: prefix_str valueFrom: $(self).Aligned.sortedByCoord.out.bam out: [ output_file ] sort_chimeras: run: samtools-sort.cwl in: input: source: run_star/chimeric_bam output_name: source: prefix_str valueFrom: $(self).Chimeric.out.sorted.bam out: [ output_file ] index_bam: run: indexbam.cwl in: input_bam: sort_bam/output_file out: [bam_index] index_chimeras: run: indexbam.cwl in: input_bam: sort_chimeras/output_file out: [bam_index] run_markduplicates: run: markduplicates.cwl in: input_bam: sort_bam/output_file prefix_str: prefix_str out: [ bam_file, metrics ] run_index_markduplicates_bam: run: indexbam.cwl in: input_bam: run_markduplicates/bam_file out: [bam_index] run_rsem: run: rsem.cwl in: rsem_ref_dir: rsem_ref_dir transcriptome_bam: run_star/transcriptome_bam prefix_str: prefix_str max_frag_len: max_frag_len estimate_rspd: estimate_rspd is_stranded: is_stranded paired_end: paired_end out: [ gene_results, isoforms_results ] run_rna-seqc: run: rna_seqc.cwl in: bam_file: run_index_markduplicates_bam/bam_index genes_gtf: genes_gtf genome_fasta: genome_fasta prefix_str: prefix_str rnaseqc_flags: rnaseqc_flags # gatk_flags: gatk_flags out: [ gene_rpkm, gene_counts, exon_counts, count_metrics, count_outputs ] $namespaces: s: http://schema.org/ $schemas: - http://dublincore.org/2012/06/14/dcterms.rdf - http://xmlns.com/foaf/spec/20140114.rdf - https://schema.org/docs/schema_org_rdfa.html s:author: - class: s:Person s:id: https://orcid.org/0000-0003-3523-5312 s:email: christopherball@rti.org s:name: Christopher Ball s:codeRepository: https://github.com/heliumdatacommons/cwl_workflows