cwlVersion: v1.0 class: Workflow requirements: - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement - class: InlineJavascriptRequirement - class: MultipleInputFeatureRequirement 'sd:upstream': genome_indices: - "cellranger-mkref.cwl" inputs: alias: type: string label: "Experiment short name/Alias" sd:preview: position: 1 indices_folder: type: Directory label: "Genome Type" doc: "Cell Ranger generated genome indices folder" 'sd:upstreamSource': "genome_indices/indices_folder" 'sd:localLabel': true fastq_file_r1: type: File format: "http://edamontology.org/format_1930" label: "FASTQ file R1 (optionally compressed)" doc: "FASTQ file R1 (optionally compressed)" fastq_file_r2: type: File format: "http://edamontology.org/format_1930" label: "FASTQ file R2 (optionally compressed)" doc: "FASTQ file R2 (optionally compressed)" threads: type: int? default: 4 label: "Number of threads" doc: "Number of threads for those steps that support multithreading" 'sd:layout': advanced: true memory_limit: type: int? default: 30 label: "Genome Type" doc: | Maximum memory used (GB). The same as was used for generating indices. The same will be applied to virtual memory 'sd:upstreamSource': "genome_indices/memory_limit" 'sd:localLabel': true outputs: fastqc_report_fastq_r1: type: File outputSource: run_fastqc_for_fastq_r1/html_file label: "FastqQC report for FASTQ file R1" doc: | FastqQC report for FASTQ file R1 fastqc_report_fastq_r2: type: File outputSource: run_fastqc_for_fastq_r2/html_file label: "FastqQC report for FASTQ file R2" doc: | FastqQC report for FASTQ file R2 web_summary_report: type: File outputSource: generate_counts_matrix/web_summary_report label: "Run summary metrics and charts in HTML format" doc: | Run summary metrics and charts in HTML format metrics_summary_report: type: File outputSource: generate_counts_matrix/metrics_summary_report label: "Run summary metrics in CSV format" doc: | Run summary metrics in CSV format possorted_genome_bam_bai: type: File outputSource: generate_counts_matrix/possorted_genome_bam_bai label: "Aligned to the genome indexed reads BAM+BAI files" doc: | Indexed reads aligned to the genome and transcriptome annotated with barcode information 'sd:visualPlugins': - igvbrowser: tab: 'IGV Genome Browser' id: 'igvbrowser' optional: true type: 'alignment' format: 'bam' name: "BAM Track" displayMode: "SQUISHED" filtered_feature_bc_matrix_folder: type: File outputSource: compress_filtered_feature_bc_matrix_folder/compressed_folder label: "Compressed folder with filtered feature-barcode matrices" doc: | Compressed folder with filtered feature-barcode matrices containing only cellular barcodes in MEX format. When implemented, in Targeted Gene Expression samples, the non-targeted genes won't be present. filtered_feature_bc_matrix_h5: type: File outputSource: generate_counts_matrix/filtered_feature_bc_matrix_h5 label: "Filtered feature-barcode matrices in HDF5 format" doc: | Filtered feature-barcode matrices containing only cellular barcodes in HDF5 format. When implemented, in Targeted Gene Expression samples, the non-targeted genes won't be present. raw_feature_bc_matrices_folder: type: File outputSource: compress_raw_feature_bc_matrices_folder/compressed_folder label: "Compressed folder with unfiltered feature-barcode matrices" doc: | Compressed folder with unfiltered feature-barcode matrices containing all barcodes in MEX format raw_feature_bc_matrices_h5: type: File outputSource: generate_counts_matrix/raw_feature_bc_matrices_h5 label: "Unfiltered feature-barcode matrices in HDF5 format" doc: | Unfiltered feature-barcode matrices containing all barcodes in HDF5 format adjusted_feature_bc_matrices_folder: type: File outputSource: estimate_contamination/adjusted_feature_bc_matrices_folder label: "Compressed folder with SoupX adjusted feature-barcode matrices" doc: | Compressed folder with SoupX adjusted feature-barcode matrices in MEX format adjusted_feature_bc_matrices_h5: type: File outputSource: estimate_contamination/adjusted_feature_bc_matrices_h5 label: "SoupX adjusted feature-barcode matrices in HDF5 format" doc: | SoupX adjusted feature-barcode matrices in HDF5 format contamination_estimation_plot: type: File outputSource: estimate_contamination/contamination_estimation_plot label: "SoupX contamination estimation plot" doc: | SoupX contamination estimation plot secondary_analysis_report_folder: type: File outputSource: compress_secondary_analysis_report_folder/compressed_folder label: "Compressed folder with secondary analysis results" doc: | Compressed folder with secondary analysis results including dimensionality reduction, cell clustering, and differential expression molecule_info_h5: type: File outputSource: generate_counts_matrix/molecule_info_h5 label: "Molecule-level information for aggregating samples into larger datasets" doc: | Molecule-level information used by cellranger aggr to aggregate samples into larger datasets loupe_browser_track: outputSource: generate_counts_matrix/loupe_browser_track label: "Loupe Browser visualization and analysis file" type: File doc: | Loupe Browser visualization and analysis file collected_statistics: type: File outputSource: collect_statistics/collected_statistics label: "Collected statistics in Markdown format" doc: "Collected statistics in Markdown format" 'sd:visualPlugins': - markdownView: tab: 'Overview' generate_counts_matrix_stdout_log: type: File outputSource: generate_counts_matrix/stdout_log label: stdout log generated by cellranger count doc: | stdout log generated by cellranger count generate_counts_matrix_stderr_log: type: File outputSource: generate_counts_matrix/stderr_log label: stderr log generated by cellranger count doc: | stderr log generated by cellranger count steps: extract_fastq_r1: run: ../tools/extract-fastq.cwl in: compressed_file: fastq_file_r1 out: - fastq_file extract_fastq_r2: run: ../tools/extract-fastq.cwl in: compressed_file: fastq_file_r2 out: - fastq_file run_fastqc_for_fastq_r1: run: ../tools/fastqc.cwl in: reads_file: extract_fastq_r1/fastq_file out: - html_file run_fastqc_for_fastq_r2: run: ../tools/fastqc.cwl in: reads_file: extract_fastq_r2/fastq_file out: - html_file generate_counts_matrix: run: ../tools/cellranger-count.cwl in: fastq_file_r1: extract_fastq_r1/fastq_file fastq_file_r2: extract_fastq_r2/fastq_file indices_folder: indices_folder threads: threads memory_limit: memory_limit virt_memory_limit: memory_limit out: - web_summary_report - metrics_summary_report - possorted_genome_bam_bai - filtered_feature_bc_matrix_folder - filtered_feature_bc_matrix_h5 - raw_feature_bc_matrices_folder - raw_feature_bc_matrices_h5 - secondary_analysis_report_folder - molecule_info_h5 - loupe_browser_track - stdout_log - stderr_log compress_filtered_feature_bc_matrix_folder: run: ../tools/tar-compress.cwl in: folder_to_compress: generate_counts_matrix/filtered_feature_bc_matrix_folder out: - compressed_folder compress_raw_feature_bc_matrices_folder: run: ../tools/tar-compress.cwl in: folder_to_compress: generate_counts_matrix/raw_feature_bc_matrices_folder out: - compressed_folder compress_secondary_analysis_report_folder: run: ../tools/tar-compress.cwl in: folder_to_compress: generate_counts_matrix/secondary_analysis_report_folder out: - compressed_folder estimate_contamination: run: ../tools/soupx-subworkflow.cwl in: raw_feature_bc_matrices_folder: compress_raw_feature_bc_matrices_folder/compressed_folder filtered_feature_bc_matrix_folder: compress_filtered_feature_bc_matrix_folder/compressed_folder secondary_analysis_report_folder: compress_secondary_analysis_report_folder/compressed_folder out: - adjusted_feature_bc_matrices_folder - adjusted_feature_bc_matrices_h5 - contamination_estimation_plot collect_statistics: run: cwlVersion: v1.0 class: CommandLineTool hints: - class: DockerRequirement dockerPull: rackspacedot/python37 inputs: script: type: string? default: | #!/usr/bin/env python3 import sys, csv with open(sys.argv[1], "r") as input_stream: with open("collected_statistics.md", "w") as output_stream: output_stream.write("### Cell Ranger Statistics\n") keys, values = None, None for i, row in enumerate(csv.reader(input_stream)): if i==0: keys = row else: values = row for k,v in zip(keys, values): output_stream.write("- "+k+": "+v+"\n") inputBinding: position: 5 metrics_summary_report: type: File inputBinding: position: 6 outputs: collected_statistics: type: File outputBinding: glob: "*" baseCommand: ["python3", "-c"] in: metrics_summary_report: generate_counts_matrix/metrics_summary_report out: - collected_statistics $namespaces: s: http://schema.org/ $schemas: - https://github.com/schemaorg/schemaorg/raw/main/data/releases/11.01/schemaorg-current-http.rdf s:name: "Single-Cell Preprocessing Cell Ranger Pipeline" label: "Single-Cell Preprocessing Cell Ranger Pipeline" s:alternateName: "Single-Cell Preprocessing Cell Ranger Pipeline" s:downloadUrl: https://raw.githubusercontent.com/datirium/workflows/master/workflows/single-cell-preprocess-cellranger.cwl s:codeRepository: https://github.com/datirium/workflows s:license: http://www.apache.org/licenses/LICENSE-2.0 s:isPartOf: class: s:CreativeWork s:name: Common Workflow Language s:url: http://commonwl.org/ s:creator: - class: s:Organization s:legalName: "Cincinnati Children's Hospital Medical Center" s:location: - class: s:PostalAddress s:addressCountry: "USA" s:addressLocality: "Cincinnati" s:addressRegion: "OH" s:postalCode: "45229" s:streetAddress: "3333 Burnet Ave" s:telephone: "+1(513)636-4200" s:logo: "https://www.cincinnatichildrens.org/-/media/cincinnati%20childrens/global%20shared/childrens-logo-new.png" s:department: - class: s:Organization s:legalName: "Allergy and Immunology" s:department: - class: s:Organization s:legalName: "Barski Research Lab" s:member: - class: s:Person s:name: Michael Kotliar s:email: mailto:misha.kotliar@gmail.com s:sameAs: - id: http://orcid.org/0000-0002-6486-3898 doc: | Devel version of Single-Cell Preprocessing Cell Ranger Pipeline ===============================================================