cwlVersion: v1.0 class: Workflow requirements: - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement - class: InlineJavascriptRequirement - class: MultipleInputFeatureRequirement 'sd:upstream': genome_indices: - "genome-indices.cwl" inputs: alias: type: string label: "Experiment short name/Alias" sd:preview: position: 1 fastq_file_1: type: File format: "http://edamontology.org/format_1930" label: "FASTQ file 1 (optionally compressed)" doc: "FASTQ file 1 (optionally compressed)" fastq_file_2: type: File format: "http://edamontology.org/format_1930" label: "FASTQ file 2 (optionally compressed)" doc: "FASTQ file 2 (optionally compressed)" sc_technology: type: - type: enum name: "sc_technology" symbols: - 10XV2 # 2 input files - 10XV3 # 2 input files - CELSEQ # 2 input files - CELSEQ2 # 2 input files - DROPSEQ # 2 input files - INDROPSV1 # 2 input files - INDROPSV2 # 2 input files - SCRUBSEQ # 2 input files - SURECELL # 2 input files label: "Single-cell technology used" doc: "Single-cell technology used" workflow_type: type: - "null" - type: enum name: "workflow_type" symbols: - standard - lamanno - nucleus - kite default: "standard" label: "Workflow type" doc: | Type of workflow. Use lamanno to calculate RNA velocity based on La Manno et al. 2018 logic. Use nucleus to calculate RNA velocity on single-nucleus RNA-seq reads. Default: standard genome_fasta_file: type: File format: "http://edamontology.org/format_1929" label: "Reference genome FASTA file" doc: "Reference genome FASTA file that includes all chromosomes" 'sd:upstreamSource': "genome_indices/fasta_output" annotation_gtf_file: type: File format: "http://edamontology.org/format_2306" label: "GTF annotation file" doc: "GTF annotation file that includes refGene and mitochondrial DNA annotations" 'sd:upstreamSource': "genome_indices/annotation_gtf" threads: type: int? default: 4 label: "Number of threads" doc: "Number of threads for those steps that support multithreading" 'sd:layout': advanced: true memory_limit: type: string? default: "4G" label: "Maximum memory used" doc: "Maximum memory used" 'sd:layout': advanced: true outputs: fastqc_report_fastq_1: type: File outputSource: run_fastqc_for_fastq_1/html_file label: "FastqQC report for FASTQ file 1" doc: "FastqQC report for FASTQ file 1" fastqc_report_fastq_2: type: File outputSource: run_fastqc_for_fastq_2/html_file label: "FastqQC report for FASTQ file 2" doc: "FastqQC report for FASTQ file 2" counts_unfiltered_folder: type: File outputSource: compress_counts_folder/compressed_folder label: "Compressed folder with count matrix files" doc: | Compressed folder with count matrix files generated by bustools count whitelist_file: type: File outputSource: generate_counts_matrix/whitelist_file label: "Whitelisted barcodes" doc: | Whitelisted barcodes that correspond to the used single-cell technology bustools_inspect_report: type: File outputSource: generate_counts_matrix/bustools_inspect_report label: "Report summarizing BUS file content" doc: | Report summarizing BUS file content generated by bustools inspect collected_statistics: type: File outputSource: collect_statistics/collected_statistics label: "Collected statistics in Markdown format" doc: "Collected statistics in Markdown format" 'sd:visualPlugins': - markdownView: tab: 'Overview' kallisto_bus_report: type: File outputSource: generate_counts_matrix/kallisto_bus_report label: "Pseudoalignment report" doc: | Pseudoalignment report generated by kallisto bus ec_mapping_file: type: File outputSource: generate_counts_matrix/ec_mapping_file label: "Mapping equivalence classes to transcripts" doc: | Mapping equivalence classes to transcripts generated by kallisto bus transcripts_file: type: File outputSource: generate_counts_matrix/transcripts_file label: "Transcript names" doc: | Transcript names file generated by kallisto bus not_sorted_bus_file: type: File outputSource: generate_counts_matrix/not_sorted_bus_file label: "Not sorted BUS file" doc: | Not sorted BUS file generated by kallisto bus corrected_sorted_bus_file: type: File outputSource: generate_counts_matrix/corrected_sorted_bus_file label: "Sorted BUS file with corrected barcodes" doc: | Sorted BUS file with corrected barcodes generated by bustools correct prepare_indices_stdout_log: type: File outputSource: prepare_indices/stdout_log label: stdout log generated by kb ref doc: | stdout log generated by kb ref prepare_indices_stderr_log: type: File outputSource: prepare_indices/stderr_log label: stderr log generated by kb ref doc: | stderr log generated by kb ref generate_counts_matrix_stdout_log: type: File outputSource: generate_counts_matrix/stdout_log label: stdout log generated by kb count doc: | stdout log generated by kb count generate_counts_matrix_stderr_log: type: File outputSource: generate_counts_matrix/stderr_log label: stderr log generated by kb count doc: | stderr log generated by kb count steps: prepare_indices: run: ../tools/kb-ref.cwl in: genome_fasta_file: genome_fasta_file annotation_gtf_file: annotation_gtf_file workflow_type: workflow_type out: - kallisto_index_file - tx_to_gene_mapping_file - tx_fasta_file - intron_fasta_file - tx_to_capture_mapping_file - intron_tx_to_capture_mapping_file - stdout_log - stderr_log extract_fastq_1: run: ../tools/extract-fastq.cwl in: compressed_file: fastq_file_1 out: - fastq_file extract_fastq_2: run: ../tools/extract-fastq.cwl in: compressed_file: fastq_file_2 out: - fastq_file run_fastqc_for_fastq_1: run: ../tools/fastqc.cwl in: reads_file: extract_fastq_1/fastq_file out: - html_file run_fastqc_for_fastq_2: run: ../tools/fastqc.cwl in: reads_file: extract_fastq_2/fastq_file out: - html_file generate_counts_matrix: run: ../tools/kb-count.cwl in: fastq_file_1: extract_fastq_1/fastq_file fastq_file_2: extract_fastq_2/fastq_file kallisto_index_file: prepare_indices/kallisto_index_file tx_to_gene_mapping_file: prepare_indices/tx_to_gene_mapping_file sc_technology: sc_technology workflow_type: workflow_type h5ad: default: true tx_to_capture_mapping_file: prepare_indices/tx_to_capture_mapping_file intron_tx_to_capture_mapping_file: prepare_indices/intron_tx_to_capture_mapping_file threads: threads memory_limit: memory_limit out: - counts_unfiltered_folder - whitelist_file - bustools_inspect_report - kallisto_bus_report - ec_mapping_file - transcripts_file - not_sorted_bus_file - corrected_sorted_bus_file - stdout_log - stderr_log compress_counts_folder: run: ../tools/tar-compress.cwl in: folder_to_compress: generate_counts_matrix/counts_unfiltered_folder out: - compressed_folder collect_statistics: run: cwlVersion: v1.0 class: CommandLineTool hints: - class: DockerRequirement dockerPull: rackspacedot/python37 inputs: script: type: string? default: | #!/usr/bin/env python3 import sys, json, os, yaml kallisto_name = os.path.splitext(os.path.basename(sys.argv[1]))[0] bustools_name = os.path.splitext(os.path.basename(sys.argv[2]))[0] with open(sys.argv[1], "r") as kallisto_stream: with open(sys.argv[2], "r") as bustools_stream: with open("collected_statistics.md", "w") as report_stream: combined_data = { "Pseudoalignment statistics": json.load(kallisto_stream), "BUS statistics": json.load(bustools_stream) } for line in yaml.dump(combined_data, width=1000, sort_keys=False).split("\n"): if not line.strip(): continue if line.startswith(" - "): report_stream.write(line+"\n") elif line.startswith(" "): report_stream.write("
"+line+"\n") elif line.startswith(" "): report_stream.write("- "+line+"\n") else: report_stream.write("### "+line+"\n") inputBinding: position: 5 kallisto_report: type: File inputBinding: position: 6 bustools_report: type: File inputBinding: position: 7 outputs: collected_statistics: type: File outputBinding: glob: "*" baseCommand: ["python3", "-c"] in: kallisto_report: generate_counts_matrix/kallisto_bus_report bustools_report: generate_counts_matrix/bustools_inspect_report out: - collected_statistics $namespaces: s: http://schema.org/ $schemas: - http://schema.org/version/9.0/schemaorg-current-http.rdf s:name: "Single-Cell Preprocessing Pipeline" label: "Single-Cell Preprocessing Pipeline" s:alternateName: "Single-Cell Preprocessing Pipeline" s:downloadUrl: https://raw.githubusercontent.com/datirium/workflows/master/workflows/single-cell-preprocess.cwl s:codeRepository: https://github.com/datirium/workflows s:license: http://www.apache.org/licenses/LICENSE-2.0 s:isPartOf: class: s:CreativeWork s:name: Common Workflow Language s:url: http://commonwl.org/ s:creator: - class: s:Organization s:legalName: "Cincinnati Children's Hospital Medical Center" s:location: - class: s:PostalAddress s:addressCountry: "USA" s:addressLocality: "Cincinnati" s:addressRegion: "OH" s:postalCode: "45229" s:streetAddress: "3333 Burnet Ave" s:telephone: "+1(513)636-4200" s:logo: "https://www.cincinnatichildrens.org/-/media/cincinnati%20childrens/global%20shared/childrens-logo-new.png" s:department: - class: s:Organization s:legalName: "Allergy and Immunology" s:department: - class: s:Organization s:legalName: "Barski Research Lab" s:member: - class: s:Person s:name: Michael Kotliar s:email: mailto:misha.kotliar@gmail.com s:sameAs: - id: http://orcid.org/0000-0002-6486-3898 doc: | Devel version of Single-Cell Preprocessing Pipeline ===================================================