cwlVersion: v1.0
class: Workflow


requirements:
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
- class: InlineJavascriptRequirement
- class: MultipleInputFeatureRequirement


inputs:

  gem_well_labels:
    type: string[]
    doc: |
      Array of GEM well identifiers to be used for labeling purposes only

  gex_molecule_info_h5:
    type: File[]
    doc: |
      Array of GEX molecule-level information files in HDF5 format.
      Outputs from "cellranger-arc count" command.

  atac_fragments_file_from_count:
    type: File[]
    secondaryFiles:
    - .tbi
    doc: |
      Array of files containing count and barcode information for every ATAC
      fragment observed in the experiment in TSV format. Outputs from
      "cellranger-arc count" command.

  barcode_metrics_report:
    type: File[]
    doc: |
      Array of files with the ATAC and GEX read count summaries generated
      for every barcode observed in the experiment.
      Outputs from "cellranger-arc count" command.

  indices_folder:
    type: Directory
    doc: |
      Compatible with Cell Ranger ARC reference folder that includes
      STAR and BWA indices. Should be generated by "cellranger-arc mkref"
      command

  normalization_mode:
    type:
    - "null"
    - type: enum
      symbols: ["none", "depth"]
    default: "none"
    doc: |
      Library depth normalization mode

  threads:
    type: int?
    default: 4
    doc: |
      Number of threads for those steps that support multithreading

  memory_limit:
    type: int?
    default: 20
    doc: |
      Maximum memory used (GB). The same will be applied to virtual memory


outputs:

  web_summary_report:
    type: File
    outputSource: aggregate_counts/web_summary_report
    doc: |
      Aggregated run summary metrics and charts in HTML format

  metrics_summary_report:
    type: File
    outputSource: aggregate_counts/metrics_summary_report
    doc: |
      Aggregated run summary metrics in CSV format

  atac_fragments_file:
    type: File
    outputSource: aggregate_counts/atac_fragments_file
    doc: |
      Count and barcode information for every ATAC fragment observed in the
      aggregated experiment in TSV format

  atac_peaks_bed_file:
    type: File
    outputSource: aggregate_counts/atac_peaks_bed_file
    doc: |
      Count and barcode information for every ATAC fragment observed in the
      aggregated experiment in TSV format

  atac_peak_annotation_file:
    type: File
    outputSource: aggregate_counts/atac_peak_annotation_file
    doc: |
      Annotations of peaks based on genomic proximity alone (for aggregated
      experiment). Note that these are not functional annotations and they
      do not make use of linkage with GEX data.

  secondary_analysis_report_folder:
    type: File
    outputSource: compress_secondary_analysis_report_folder/compressed_folder
    doc: |
      Compressed folder with secondary analysis results including dimensionality
      reduction, cell clustering, and differential expression for aggregated
      results

  filtered_feature_bc_matrix_folder:
    type: File
    outputSource: compress_filtered_feature_bc_matrix_folder/compressed_folder
    doc: |
      Compressed folder with aggregated filtered feature-barcode matrices containing
      only cellular barcodes in MEX format

  filtered_feature_bc_matrix_h5:
    type: File
    outputSource: aggregate_counts/filtered_feature_bc_matrix_h5
    doc: |
      Aggregated filtered feature-barcode matrices containing only cellular barcodes
      in HDF5 format
  
  raw_feature_bc_matrices_folder:
    type: File
    outputSource: compress_raw_feature_bc_matrices_folder/compressed_folder
    doc: |
      Compressed folder with aggregated unfiltered feature-barcode matrices containing
      all barcodes in MEX format

  raw_feature_bc_matrices_h5:
    type: File
    outputSource: aggregate_counts/raw_feature_bc_matrices_h5
    doc: |
      Aggregated unfiltered feature-barcode matrices containing all barcodes
      in HDF5 format

  loupe_browser_track:
    type: File
    outputSource: aggregate_counts/loupe_browser_track
    doc: |
      Loupe Browser visualization and analysis file for aggregated results

  aggregation_metadata:
    type: File
    outputSource: aggregate_counts/aggregation_metadata
    doc: |
      Aggregation metadata in CSV format

  compressed_html_data_folder:
    type: File
    outputSource: compress_html_data_folder/compressed_folder
    doc: |
      Compressed folder with CellBrowser formatted results

  aggregate_counts_stdout_log:
    type: File
    outputSource: aggregate_counts/stdout_log
    doc: |
      stdout log generated by cellranger-arc aggr

  aggregate_counts_stderr_log:
    type: File
    outputSource: aggregate_counts/stderr_log
    doc: |
      stderr log generated by cellranger-arc aggr


steps:

  aggregate_counts:
    run: ../tools/cellranger-arc-aggr.cwl
    in:
      atac_fragments_file_from_count: atac_fragments_file_from_count
      barcode_metrics_report: barcode_metrics_report
      gex_molecule_info_h5: gex_molecule_info_h5
      gem_well_labels: gem_well_labels
      indices_folder: indices_folder
      normalization_mode: normalization_mode
      threads: threads
      memory_limit: memory_limit
      virt_memory_limit: memory_limit
    out:
    - web_summary_report
    - metrics_summary_report
    - atac_fragments_file
    - atac_peaks_bed_file
    - atac_peak_annotation_file
    - secondary_analysis_report_folder
    - filtered_feature_bc_matrix_folder
    - filtered_feature_bc_matrix_h5
    - raw_feature_bc_matrices_folder
    - raw_feature_bc_matrices_h5
    - aggregation_metadata
    - loupe_browser_track
    - stdout_log
    - stderr_log

  cellbrowser_build:
    run: ../tools/cellbrowser-build-cellranger-arc.cwl
    in:
      secondary_analysis_report_folder: aggregate_counts/secondary_analysis_report_folder
      filtered_feature_bc_matrix_folder: aggregate_counts/filtered_feature_bc_matrix_folder
      aggregation_metadata: aggregate_counts/aggregation_metadata
    out:
    - html_data

  compress_filtered_feature_bc_matrix_folder:
    run: ../tools/tar-compress.cwl
    in:
      folder_to_compress: aggregate_counts/filtered_feature_bc_matrix_folder
    out:
    - compressed_folder

  compress_raw_feature_bc_matrices_folder:
    run: ../tools/tar-compress.cwl
    in:
      folder_to_compress: aggregate_counts/raw_feature_bc_matrices_folder
    out:
    - compressed_folder

  compress_secondary_analysis_report_folder:
    run: ../tools/tar-compress.cwl
    in:
      folder_to_compress: aggregate_counts/secondary_analysis_report_folder
    out:
    - compressed_folder

  compress_html_data_folder:
    run: ../tools/tar-compress.cwl
    in:
      folder_to_compress: cellbrowser_build/html_data
    out:
    - compressed_folder


$namespaces:
  s: http://schema.org/

$schemas:
- https://github.com/schemaorg/schemaorg/raw/main/data/releases/11.01/schemaorg-current-http.rdf


s:name: "Single-cell Multiome ATAC and RNA-Seq Aggregate"
label: "Single-cell Multiome ATAC and RNA-Seq Aggregate"
s:alternateName: |
  Aggregates data from multiple Single-cell Multiome ATAC and
  RNA-Seq Alignment experiments

s:downloadUrl: https://raw.githubusercontent.com/Barski-lab/scRNA-Seq-Analysis/main/workflows/sc-multiome-aggregate-wf.cwl
s:codeRepository: https://github.com/Barski-lab/scRNA-Seq-Analysis
s:license: http://www.apache.org/licenses/LICENSE-2.0

s:isPartOf:
  class: s:CreativeWork
  s:name: Common Workflow Language
  s:url: http://commonwl.org/

s:creator:
- class: s:Organization
  s:legalName: "Cincinnati Children's Hospital Medical Center"
  s:location:
  - class: s:PostalAddress
    s:addressCountry: "USA"
    s:addressLocality: "Cincinnati"
    s:addressRegion: "OH"
    s:postalCode: "45229"
    s:streetAddress: "3333 Burnet Ave"
    s:telephone: "+1(513)636-4200"
  s:logo: "https://www.cincinnatichildrens.org/-/media/cincinnati%20childrens/global%20shared/childrens-logo-new.png"
  s:department:
  - class: s:Organization
    s:legalName: "Allergy and Immunology"
    s:department:
    - class: s:Organization
      s:legalName: "Barski Research Lab"
      s:member:
      - class: s:Person
        s:name: Michael Kotliar
        s:email: mailto:misha.kotliar@gmail.com
        s:sameAs:
        - id: http://orcid.org/0000-0002-6486-3898


doc: |
  Single-cell Multiome ATAC and RNA-Seq Aggregate
  ===========================================================

  Aggregates data from multiple Single-cell Multiome ATAC and
  RNA-Seq Alignment experiments