#!/usr/bin/env cwl-runner label: "PGAP Pipeline" cwlVersion: v1.2 class: Workflow doc: PGAP pipeline for external usage, powered via containers requirements: - class: SubworkflowFeatureRequirement - class: MultipleInputFeatureRequirement - class: LoadListingRequirement loadListing: deep_listing - class: NetworkAccess networkAccess: true inputs: # # User specific input # go: type: boolean[] entries: File? seq_submit: File? taxid: int gc_assm_name: string locus_tag_prefix: string? dbname: string? report_usage: boolean # # User independent, static input # scatter_gather_nchunks: type: string default: '1' supplemental_data: type: Directory blast_hits_cache_data: type: Directory? submol_block_json: File ignore_all_errors: boolean? contact_as_author_possible: boolean? xpath_fail_initial_asndisc: type: string? doc: 'The default: setting is for standard call of pgap.cwl (for example, from pgap.py)' default: //*[@severity="FATAL"] xpath_fail_initial_asnvalidate: type: string? doc: 'The default: setting is for standard call of pgap.cwl (for example, from pgap.py)' default: > //*[ ( @severity="ERROR" or @severity="REJECT" ) and not(contains(@code, "GENERIC_MissingPubRequirement")) and not(contains(@code, "SEQ_DESCR_ChromosomeLocation")) and not(contains(@code, "SEQ_DESCR_MissingLineage")) and not(contains(@code, "SEQ_DESCR_NoTaxonID")) and not(contains(@code, "SEQ_DESCR_OrganismIsUndefinedSpecies")) and not(contains(@code, "SEQ_DESCR_StrainWithEnvironSample")) and not(contains(@code, "SEQ_DESCR_BacteriaMissingSourceQualifier")) and not(contains(@code, "SEQ_DESCR_UnwantedCompleteFlag")) and not(contains(@code, "SEQ_FEAT_BadCharInAuthorLastName")) and not(contains(@code, "SEQ_FEAT_ShortIntron")) and not(contains(@code, "SEQ_INST_InternalNsInSeqRaw")) and not(contains(@code, "SEQ_INST_ProteinsHaveGeneralID")) and not(contains(@code, "SEQ_PKG_NucProtProblem")) and not(contains(@code, "SEQ_PKG_ComponentMissingTitle")) ] xpath_fail_final_asndisc: type: string? doc: 'The default: setting is for standard call of pgap.cwl (for example, from pgap.py)' default: //*[@severity="FATAL"] xpath_fail_final_asnvalidate: type: string? doc: 'The default: setting is for standard call of pgap.cwl (for example, from pgap.py)' default: > //*[( @severity="ERROR" or @severity="REJECT" ) and not(contains(@code, "GENERIC_MissingPubRequirement")) and not(contains(@code, "SEQ_DESCR_ChromosomeLocation")) and not(contains(@code, "SEQ_DESCR_MissingLineage")) and not(contains(@code, "SEQ_DESCR_NoTaxonID")) and not(contains(@code, "SEQ_DESCR_OrganismIsUndefinedSpecies")) and not(contains(@code, "SEQ_DESCR_StrainWithEnvironSample")) and not(contains(@code, "SEQ_DESCR_BacteriaMissingSourceQualifier")) and not(contains(@code, "SEQ_DESCR_UnwantedCompleteFlag")) and not(contains(@code, "SEQ_FEAT_BadCharInAuthorLastName")) and not(contains(@code, "SEQ_FEAT_ShortIntron")) and not(contains(@code, "SEQ_INST_InternalNsInSeqRaw")) and not(contains(@code, "SEQ_INST_ProteinsHaveGeneralID")) and not(contains(@code, "SEQ_PKG_ComponentMissingTitle")) and not(contains(@code, "SEQ_PKG_NucProtProblem")) ] no_internet: type: boolean? make_uuid: type: boolean? default: true uuid_in: type: File? steps: ping_start: run: progs/pinger.cwl in: report_usage: report_usage make_uuid: make_uuid uuid_in: uuid_in state: default: "start" workflow: default: "pgap" instring: gc_assm_name out: [stdout, outstring, uuid_out] passdata: in: data: supplemental_data run: expr/supplemental_data_split_dir.cwl out: - 5s_model_path - 16s_model_path - 23s_model_path - AntiFamLib - all_order_specific_blastdb_file - asn2pas_xsl - identification_db_dir - CDDdata2 - CDDdata - checkm_data_path - defline_cleanup_rules - filter_for_raw_checkm - gc_cache - gene_master_ini - hmm_path - hmms_tab - naming_hmms_combined - naming_hmms_tab - naming_sqlite - package_versions - rfam_amendments - rfam_model_path - rfam_stockholm - selenoproteins - species_genome_size - taxon_db - thresholds - uniColl_cache - uniColl_nuc_cache - univ_prot_xml - val_res_den_xml - wp_hashes # # massage passdata output here # Get_Order_Specific_Strings: label: "Get List of Order Specific Databases in the form of string[]" run: progs/file2basenames.cwl in: input: passdata/all_order_specific_blastdb_file out: [values] log_package_versions: run: progs/catlog.cwl in: input: source: - passdata/package_versions linkMerge: merge_flattened out: [] blast_hits_cache_data_split_dir: in: data: blast_hits_cache_data run: expr/blast_hits_cache_data_split_dir.cwl out: - blast_hits_cache - genus_list genus_list_file2ints: run: progs/file2ints.cwl in: input: blast_hits_cache_data_split_dir/genus_list out: [values] # end of massaging passdata output genomic_source: # PLANE run: genomic_source/wf_genomic_source_asn.cwl in: entries: entries seq_submit: seq_submit # taxid: taxid gc_assm_name: ping_start/outstring taxon_db: passdata/taxon_db out: [gencoll_asn, seqid_list, stats_report, asncache, ids_out, submit_block_template, order] # # Pseudo plane "default 1" # Prepare_Unannotated_Sequences: # ORIGINAL TASK NAME: Prepare Unannotated Sequences # default 1 label: "Prepare Unannotated Sequences" run: bacterial_prepare_unannotated.cwl in: asn_cache: genomic_source/asncache gc_assembly: genomic_source/gencoll_asn ids: genomic_source/seqid_list submit_block: genomic_source/submit_block_template taxon_db: passdata/taxon_db no_internet: no_internet out: [master_desc, sequences, plasmids] Prepare_Unannotated_Sequences_pgapx_input_check: run: progs/pgapx_input_check.cwl in: input: Prepare_Unannotated_Sequences/sequences max_size: { default: 15000000 } min_size: { default: 300 } species_genome_size: passdata/species_genome_size taxon_db: passdata/taxon_db ignore_all_errors: ignore_all_errors out: [] Prepare_Unannotated_Sequences_text: run: progs/asn_translator.cwl in: input: Prepare_Unannotated_Sequences/sequences output_output: {default: 'sequences.text.asn'} out: [output] Prepare_Unannotated_Sequences_asndisc_cpp: run: progs/asndisc_cpp.cwl in: XML: {default: true} genbank: {default: false} P: {default: 't'} a: {default: 'c'} asn_cache: genomic_source/asncache o_output: {default: 'sequences.disc.xml'} i: Prepare_Unannotated_Sequences_text/output d: default: - AUTODEF_USER_OBJECT - FEATURE_LIST - BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS - PARTIAL_CDS_COMPLETE_SEQUENCE - MISSING_AFFIL - OVERLAPPING_CDS - BAD_BGPIPE_QUALS - FLATFILE_FIND - COMMENT_PRESENT - SHORT_PROT_SEQUENCES - OVERLAPPING_GENES - EXTRA_GENES - N_RUNS - TAX_LOOKUP_MISMATCH - TAX_LOOKUP_MISSING out: [o] Prepare_Unannotated_Sequences_asndisc_evaluate: run: progs/xml_evaluate.cwl in: input: Prepare_Unannotated_Sequences_asndisc_cpp/o xpath_fail: xpath_fail_initial_asndisc ignore_all_errors: ignore_all_errors stdout_redir: default: 'initial_asndisc_diag.xml' out: [success, xml_output] Prepare_Unannotated_Sequences_asnvalidate: run: progs/asnvalidate.cwl in: Q: default: 0 R: default: 5 i: Prepare_Unannotated_Sequences/sequences o_output: default: 'sequences.val' v: { default: 4 } A: default: true U: default: true Z: default: true y: default: true out: [o] Prepare_Unannotated_Sequences_asnvalidate_evaluate: run: progs/xml_evaluate.cwl in: input: Prepare_Unannotated_Sequences_asnvalidate/o xpath_fail: xpath_fail_initial_asnvalidate ignore_all_errors: ignore_all_errors stdout_redir: default: 'initial_asnval_diag.xml' out: [success, xml_output] Cache_Entrez_Gene: # ORIGINAL TASK NAME: Cache Entrez Gene # default 1 label: "Cache Entrez Gene" run: cache_entrez_gene.cwl in: asn_cache: [genomic_source/asncache, passdata/uniColl_cache] egene_ini: passdata/gene_master_ini input: Prepare_Unannotated_Sequences/sequences go: - Prepare_Unannotated_Sequences_asndisc_evaluate/success - Prepare_Unannotated_Sequences_asnvalidate_evaluate/success out: [prok_entrez_gene_stuff] Create_Genomic_BLASTdb: # default 1 label: "Create Genomic BLASTdb" # default 1 run: progs/gp_makeblastdb.cwl in: ids: genomic_source/ids_out title: default: 'BLASTdb created by GPipe' asn_cache: source: [ genomic_source/asncache ] linkMerge: merge_flattened dbtype: default: 'nucl' out: [blastdb] # # end of pseudo plane "default 1" # Get_Proteins: label: "Get Proteins" run: wf_bacterial_prot_src.cwl in: uniColl_asn_cache: passdata/uniColl_cache naming_sqlite: passdata/naming_sqlite taxid: taxid tax_sql_file: passdata/taxon_db blastdb_dir: passdata/identification_db_dir all_order_specific_blastdb: Get_Order_Specific_Strings/values out: [universal_clusters, all_prots, selected_blastdb] bacterial_ncrna: # PLANE run: bacterial_ncrna/wf_gcmsearch.cwl in: go: - Prepare_Unannotated_Sequences_asndisc_evaluate/success - Prepare_Unannotated_Sequences_asnvalidate_evaluate/success asn_cache: genomic_source/asncache seqids: genomic_source/seqid_list model_path: passdata/rfam_model_path rfam_amendments: passdata/rfam_amendments rfam_stockholm: passdata/rfam_stockholm taxon_db: passdata/taxon_db out: [annots] bacterial_mobile_elem: # PLANE run: bacterial_mobile_elem/wf_bacterial_mobile_elem.cwl in: go: - Prepare_Unannotated_Sequences_asndisc_evaluate/success - Prepare_Unannotated_Sequences_asnvalidate_evaluate/success asn_cache: genomic_source/asncache seqids: genomic_source/seqid_list out: [annots] bacterial_noncoding: # PLANE run: bacterial_noncoding/wf_bacterial_noncoding.cwl in: go: - Prepare_Unannotated_Sequences_asndisc_evaluate/success - Prepare_Unannotated_Sequences_asnvalidate_evaluate/success asn_cache: genomic_source/asncache seqids: genomic_source/seqid_list model_path_5s: passdata/5s_model_path model_path_16s: passdata/16s_model_path model_path_23s: passdata/23s_model_path rfam_amendments: passdata/rfam_amendments rfam_stockholm: passdata/rfam_stockholm taxon_db: passdata/taxon_db out: [ annotations_5s, annotations_16s, annotations_23s ] bacterial_trna: # PLANE run: bacterial_trna/wf_trnascan.cwl in: go: - Prepare_Unannotated_Sequences_asndisc_evaluate/success - Prepare_Unannotated_Sequences_asnvalidate_evaluate/success asn_cache: genomic_source/asncache seqids: genomic_source/seqid_list taxid: taxid taxon_db: passdata/taxon_db scatter_gather_nchunks: scatter_gather_nchunks out: [annots] bacterial_annot: # PLANE run: bacterial_annot/wf_bacterial_annot_pass1.cwl in: go: - Prepare_Unannotated_Sequences_asndisc_evaluate/success - Prepare_Unannotated_Sequences_asnvalidate_evaluate/success asn_cache: genomic_source/asncache inseq: Prepare_Unannotated_Sequences/sequences hmm_path: passdata/hmm_path hmms_tab: passdata/hmms_tab selenoproteins: passdata/selenoproteins scatter_gather_nchunks: scatter_gather_nchunks uniColl_cache: passdata/uniColl_cache naming_sqlite: passdata/naming_sqlite trna_annots: bacterial_trna/annots ncrna_annots: bacterial_ncrna/annots nogenbank: default: true Execute_CRISPRs_annots: bacterial_mobile_elem/annots Generate_16S_rRNA_Annotation_annotation: bacterial_noncoding/annotations_16s Generate_23S_rRNA_Annotation_annotation: bacterial_noncoding/annotations_23s Post_process_CMsearch_annotations_annots_5S: bacterial_noncoding/annotations_5s genemark_path: default: /netmnt/vast01/gp/ThirdParty/GeneMark/ thresholds: passdata/thresholds out: [lds2,seqids,proteins, aligns, annotation, out_hmm_params, outseqs, prot_ids, models1] spurious_annot_prelim: # PLANE run: spurious_annot/wf_spurious_annot_pass1.cwl in: Extract_ORF_Proteins_proteins: bacterial_annot/proteins Extract_ORF_Proteins_seqids: bacterial_annot/seqids Extract_ORF_Proteins_lds2: bacterial_annot/lds2 AntiFamLib: passdata/AntiFamLib sequence_cache: genomic_source/asncache scatter_gather_nchunks: scatter_gather_nchunks out: [AntiFam_tainted_proteins_I___oseqids] bacterial_annot_1st_pass: # PLANE run: bacterial_annot/wf_bacterial_annot_pass2.cwl in: lds2: bacterial_annot/lds2 proteins: bacterial_annot/proteins prot_ids_A: bacterial_annot/seqids prot_ids_B1: bacterial_annot/prot_ids prot_ids_B2: spurious_annot_prelim/AntiFam_tainted_proteins_I___oseqids identification_db_dir: passdata/identification_db_dir blastdb: Get_Proteins/selected_blastdb annotation: bacterial_annot/outseqs sequence_cache: genomic_source/asncache unicoll_cache: passdata/uniColl_cache raw_seqs: Prepare_Unannotated_Sequences/sequences plasmids: Prepare_Unannotated_Sequences/plasmids scatter_gather_nchunks: scatter_gather_nchunks taxid: taxid blast_hits_cache: blast_hits_cache_data_split_dir/blast_hits_cache taxon_db: passdata/taxon_db genus_list: genus_list_file2ints/values out: [aligns] # label: "goes to protein_alignment/Seed Search Compartments/compartments" protein_alignment: # PLANE run: protein_alignment/wf_protein_alignment.cwl in: go: - Prepare_Unannotated_Sequences_asndisc_evaluate/success - Prepare_Unannotated_Sequences_asnvalidate_evaluate/success asn_cache: genomic_source/asncache uniColl_asn_cache: passdata/uniColl_cache blastdb_dir: Create_Genomic_BLASTdb/blastdb taxid: taxid tax_sql_file: passdata/taxon_db gc_assembly: genomic_source/gencoll_asn compartments: bacterial_annot_1st_pass/aligns all_prots: Get_Proteins/all_prots out: [align, align_non_match] bacterial_annot_misc: run: bacterial_annot/wf_bacterial_annot_pass3.cwl in: AntiFamLib: passdata/AntiFamLib uniColl_cache: passdata/uniColl_cache sequence_cache: genomic_source/asncache hmm_aligns: bacterial_annot/aligns scatter_gather_nchunks: scatter_gather_nchunks prot_aligns: protein_alignment/align # label: "Filter Protein Alignments/align" annotation: bacterial_annot/annotation models1: bacterial_annot/models1 raw_seqs: Prepare_Unannotated_Sequences/sequences thresholds: passdata/thresholds naming_sqlite: passdata/naming_sqlite hmm_params: bacterial_annot/out_hmm_params # Run GeneMark Training/hmm_params (EXTERNAL, put to input/ selenoproteins: passdata/selenoproteins naming_hmms_combined: passdata/naming_hmms_combined hmms_tab: passdata/naming_hmms_tab wp_hashes: passdata/wp_hashes taxon_db: passdata/taxon_db genemark_path: default: /netmnt/vast01/gp/ThirdParty/GeneMark/ out: - id: Find_Best_Evidence_Alignments_aligns - id: Run_GeneMark_Post_models - id: Extract_Model_Proteins_seqids - id: Extract_Model_Proteins_lds2 - id: Extract_Model_Proteins_proteins - id: Search_Naming_HMMs_hmm_hits - id: Assign_Naming_HMM_to_Proteins_assignments - id: Name_by_WPs_names - id: PGAP_plus_ab_initio_annotation spurious_annot_final: run: spurious_annot/wf_spurious_annot_pass2.cwl in: Extract_Model_Proteins_proteins: bacterial_annot_misc/Extract_Model_Proteins_proteins Extract_Model_Proteins_seqids: bacterial_annot_misc/Extract_Model_Proteins_seqids Extract_Model_Proteins_lds2: bacterial_annot_misc/Extract_Model_Proteins_lds2 AntiFamLib: passdata/AntiFamLib sequence_cache: genomic_source/asncache scatter_gather_nchunks: scatter_gather_nchunks input_models: bacterial_annot_misc/PGAP_plus_ab_initio_annotation out: - AntiFam_tainted_proteins___oseqids - Good_AntiFam_filtered_annotations_out - Good_AntiFam_filtered_proteins_output bacterial_annot_2nd_pass: run: bacterial_annot/wf_bacterial_annot_pass4.cwl in: lds2: bacterial_annot_misc/Extract_Model_Proteins_lds2 proteins: bacterial_annot_misc/Extract_Model_Proteins_proteins annotation: spurious_annot_final/Good_AntiFam_filtered_annotations_out Good_AntiFam_filtered_proteins_gilist: spurious_annot_final/Good_AntiFam_filtered_proteins_output sequence_cache: genomic_source/asncache uniColl_cache: passdata/uniColl_cache identification_db_dir: passdata/identification_db_dir naming_sqlite: passdata/naming_sqlite hmm_assignments: bacterial_annot_misc/Assign_Naming_HMM_to_Proteins_assignments wp_assignments: bacterial_annot_misc/Name_by_WPs_names Extract_Model_Proteins_prot_ids: bacterial_annot_misc/Extract_Model_Proteins_seqids CDDdata: passdata/CDDdata CDDdata2: passdata/CDDdata2 thresholds: passdata/thresholds defline_cleanup_rules: passdata/defline_cleanup_rules blastdb: Get_Proteins/selected_blastdb scatter_gather_nchunks: scatter_gather_nchunks taxid: taxid blast_hits_cache: blast_hits_cache_data_split_dir/blast_hits_cache taxon_db: passdata/taxon_db genus_list: genus_list_file2ints/values out: - id: out_annotation # # # # Pseudo plane default 2, we do not need that for new submissions in off-NCBI environment # # # # Preserve_Annotations: # Pseudo plane default 2 # # run: task_types/tt_preserve_annot.cwl # # in: # # asn_cache: # # source: [genomic_source/asncache] # # linkMerge: merge_flattened # # input_annotation: bacterial_annot/annotation # # rfam_amendments: rfam_amendments # # no_ncRNA: # # default: true # # out: [annotations] # # preserve_annot_markup: # Pseudo plane default 2 # # # uncharted territory!!! # # run: preserve_annot_markup.cwl # Preserve Product Accessions # # in: # # #seq_cache: genobacterial_prepare_unannotated/asncache # # #unicoll_cache: uniColl_cache # # input_annotation: Preserve_Annotations/annotations # # asn_cache: [genomic_source/asncache, uniColl_cache] # # egene_ini: gene_master_ini # # gc_assembly: genomic_source/gencoll_asn # # input: Preserve_Annotations/annotations # # prok_entrez_gene_stuff: Cache_Entrez_Gene/prok_entrez_gene_stuff # # out: [annotations] # # # # End of Pseudo plane default 2 # # # # This step takes input from bacterial_annot 4/Bacterial Annot Filter, see GP-23942 # # Status: # # tasktype coded, input/output matches # # application not coded # ############################################### # # AMR plane is for later stages skipping # ############################################### bacterial_orthology_conditional: run: bacterial_orthology/wf_bacterial_orthology_conditional.cwl in: input: Add_Locus_Tags/output taxid: taxid naming_sqlite: passdata/naming_sqlite taxon_db: passdata/taxon_db gc_cache: passdata/gc_cache asn_cache: source: [passdata/uniColl_nuc_cache, genomic_source/asncache] linkMerge: merge_flattened genus_list: genus_list_file2ints/values blastdb: default: [blastdb] scatter_gather_nchunks: scatter_gather_nchunks gencoll_asn: genomic_source/gencoll_asn out: [output] Add_Locus_Tags: run: progs/add_locus_tags.cwl in: input: bacterial_annot_2nd_pass/out_annotation locus_tag_prefix: locus_tag_prefix dbname: dbname out: [output] # # Pseudo plane default 3 # # # Final_Bacterial_Package task # Final_Bacterial_Package_asn_cleanup: run: progs/asn_cleanup.cwl in: inp_annotation: bacterial_orthology_conditional/output # production out: [annotation] Final_Bacterial_Package_final_bact_asn: run: progs/final_bact_asn.cwl in: annotation: source: [Final_Bacterial_Package_asn_cleanup/annotation] linkMerge: merge_flattened asn_cache: genomic_source/asncache gc_assembly: genomic_source/gencoll_asn # gc_create_from_sequences master_desc: Prepare_Unannotated_Sequences/master_desc it: default: true submission_mode_genbank: default: true nogenbank: default: true order: genomic_source/order out: [outfull] Final_Bacterial_Package_dumb_down_as_required: run: progs/dumb_down_as_required.cwl in: annotation: Final_Bacterial_Package_final_bact_asn/outfull asn_cache: source: [genomic_source/asncache] linkMerge: merge_flattened max_x_ratio: default: 0.1 max_x_run: default: 3 partial_cov_threshold: default: 65 partial_len_threshold: default: 30 drop_partial_in_the_middle: default: true submission_mode_genbank: default: true submol_block_json: submol_block_json nogenbank: default: true it: default: true out: [outent] Final_Bacterial_Package_ent2sqn: run: progs/ent2sqn.cwl in: annotation: Final_Bacterial_Package_dumb_down_as_required/outent asn_cache: source: [genomic_source/asncache] linkMerge: merge_flattened gc_assembly: genomic_source/gencoll_asn # gc_create_from_sequences submit_block_template: source: [genomic_source/submit_block_template] linkMerge: merge_flattened it: default: true contact_as_author_possible: contact_as_author_possible output_impl: default: annot-wo-checksum.sqn out: [output] add_checksum_sqn: label: Add Checksum to SQN run: progs/annot_checksum.cwl in: input: Final_Bacterial_Package_ent2sqn/output output_name: default: 'annot.sqn' t: default: true ifmt: default: seq-submit mode: default: add out: [output] Final_Bacterial_Package_sqn2gbent: run: progs/sqn2gbent.cwl doc: We are not taking here sqn with added annot checksum. in: input: Final_Bacterial_Package_ent2sqn/output it: default: true out_name: default: annot-gb-wo-checksum.ent out: [output] checkm: label: 'Run CheckM in PGAP graph' doc: 'Identify completeness of genome based on core HMM models in CheckM' run: checkm/wf_checkm.cwl in: models: Final_Bacterial_Package_sqn2gbent/output checkm_data_path: passdata/checkm_data_path filter_for_raw_checkm: passdata/filter_for_raw_checkm taxid: taxid taxon_db: passdata/taxon_db out: [checkm_raw, checkm_results] add_checksum_gbent: label: Add Checksum to Genbank class ENT run: progs/annot_checksum.cwl in: input: Final_Bacterial_Package_sqn2gbent/output output_name: default: 'annot-gb.ent' t: default: true ifmt: default: seq-entry mode: default: add out: [output] Generate_Annotation_Reports_gff: run: progs/gp_annot_format.cwl in: input: Final_Bacterial_Package_dumb_down_as_required/outent ifmt: default: seq-entry t: default: true ofmt: default: gff3 exclude_external: default: true out: [output] Generate_Annotation_Reports_gbk: run: progs/asn2flat.cwl in: input: Final_Bacterial_Package_sqn2gbent/output no_external: default: true type: default: seq-entry mode: default: entrez style: default: master gbload: default: false out: [output] Generate_Annotation_Reports_nuc_fasta: run: progs/asn2fasta.cwl in: i: Final_Bacterial_Package_sqn2gbent/output type: default: seq-entry nuc_fasta_name: default: annot.fna out: [nuc_fasta] Generate_Annotation_Reports_prot_fasta: run: progs/asn2fasta.cwl in: i: Final_Bacterial_Package_sqn2gbent/output type: default: seq-entry prot_fasta_name: default: annot.faa out: [prot_fasta] Generate_Annotation_Reports_cds_nuc_fasta: run: progs/asn2fasta.cwl in: i: Final_Bacterial_Package_sqn2gbent/output type: default: seq-entry feats: default: fasta_cds_na fasta_name: default: annot_cds_from_genomic.fna out: [fasta] Generate_Annotation_Reports_cds_prot_fasta: run: progs/asn2fasta.cwl in: i: Final_Bacterial_Package_sqn2gbent/output type: default: seq-entry feats: default: fasta_cds_aa fasta_name: default: annot_translated_cds.faa out: [fasta] Final_Bacterial_Package_std_validation: run: progs/std_validation.cwl in: annotation: Final_Bacterial_Package_dumb_down_as_required/outent asn_cache: source: [genomic_source/asncache] exclude_asndisc_codes: # default: - AUTODEF_USER_OBJECT - FEATURE_LIST - BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS - PARTIAL_CDS_COMPLETE_SEQUENCE - MISSING_AFFIL - OVERLAPPING_CDS - BAD_BGPIPE_QUALS - FLATFILE_FIND - COMMENT_PRESENT - SHORT_PROT_SEQUENCES - OVERLAPPING_GENES - EXTRA_GENES - N_RUNS - BAD_LOCUS_TAG_FORMAT - TAX_LOOKUP_MISMATCH - TAX_LOOKUP_MISSING inent: Final_Bacterial_Package_dumb_down_as_required/outent ingb: Final_Bacterial_Package_sqn2gbent/output insqn: Final_Bacterial_Package_ent2sqn/output master_desc: source: [Prepare_Unannotated_Sequences/master_desc] linkMerge: merge_flattened submit_block_template: source: [genomic_source/submit_block_template] linkMerge: merge_flattened it: default: true submission_mode_genbank: default: true nogenbank: default: true out: - id: outdisc - id: outdiscxml - id: outmetamaster - id: outval Final_Bacterial_Package_asndisc_evaluate: run: progs/xml_evaluate.cwl in: input: Final_Bacterial_Package_std_validation/outdisc xpath_fail: xpath_fail_final_asndisc ignore_all_errors: ignore_all_errors stdout_redir: default: 'final_asndisc_diag.xml' out: [xml_output] Final_Bacterial_Package_asnvalidate_evaluate: run: progs/xml_evaluate.cwl in: input: Final_Bacterial_Package_std_validation/outval xpath_fail: xpath_fail_final_asnvalidate ignore_all_errors: ignore_all_errors stdout_redir: default: 'final_asnval_diag.xml' out: [success, xml_output] Final_Bacterial_Package_val_stats: # TESTED (unit test) run: progs/val_stats.cwl in: annot_val: Final_Bacterial_Package_std_validation/outval c_toolkit: default: true out: [output, xml] # # end of Final_Bacterial_Package task # #### we do not need this # Prepare_Init_Refseq_Molecules: # run: progs/ # # Validate_Annotation task # Validate_Annotation_bact_univ_prot_stats: run: progs/bact_univ_prot_stats.cwl in: annot_request_id: default: -1 # this is dummy annot_request_id hmm_search: bacterial_annot_misc/Search_Naming_HMMs_hmm_hits hmm_search_proteins: bacterial_annot_misc/PGAP_plus_ab_initio_annotation input: Final_Bacterial_Package_final_bact_asn/outfull univ_prot_xml: passdata/univ_prot_xml val_res_den_xml: passdata/val_res_den_xml it: default: true out: [bact_univ_prot_stats_old_xml, var_bact_univ_prot_details_xml, var_bact_univ_prot_stats_xml] Validate_Annotation_proc_annot_stats: run: progs/proc_annot_stats.cwl in: input: Final_Bacterial_Package_dumb_down_as_required/outent max_unannotated_region: default: 5000 univ_prot_xml: passdata/univ_prot_xml val_res_den_xml: passdata/val_res_den_xml it: default: true out: - id: var_proc_annot_stats_xml - id: var_proc_annot_details_xml Validate_Annotation_xsltproc_asnvalidate: run: progs/xsltproc.cwl in: xml: Final_Bacterial_Package_val_stats/xml xslt: passdata/asn2pas_xsl output_name: default: 'var_proc_annot_stats.val.xml' out: [output] Validate_Annotation_xsltproc_asndisc: run: progs/xsltproc.cwl in: xml: Final_Bacterial_Package_std_validation/outdiscxml xslt: passdata/asn2pas_xsl output_name: default: 'var_proc_annot_stats.disc.xml' out: [output] Validate_Annotation_collect_annot_stats: # TESTED (unit test) run: progs/collect_annot_stats.cwl in: input: source: - Validate_Annotation_bact_univ_prot_stats/var_bact_univ_prot_stats_xml - Validate_Annotation_proc_annot_stats/var_proc_annot_stats_xml - Validate_Annotation_xsltproc_asndisc/output - Validate_Annotation_xsltproc_asnvalidate/output linkMerge: merge_flattened output_name: default: proc_annot_stats.xml out: [output] Validate_Annotation_collect_annot_details: run: progs/collect_annot_stats.cwl in: input: source: - Validate_Annotation_bact_univ_prot_stats/var_bact_univ_prot_details_xml - Validate_Annotation_proc_annot_stats/var_proc_annot_details_xml linkMerge: merge_flattened output_name: default: proc_annot_details.xml out: [output] ping_stop: run: progs/pinger.cwl in: report_usage: report_usage uuid_in: ping_start/uuid_out state: default: "stop" workflow: default: "pgap" # Note: the input on the following line should be the same as all of the outputs # for this workflow, so we ensure this is the final step. infile: - Final_Bacterial_Package_sqn2gbent/output - Generate_Annotation_Reports_gff/output - Generate_Annotation_Reports_gbk/output - Generate_Annotation_Reports_nuc_fasta/nuc_fasta - Generate_Annotation_Reports_prot_fasta/prot_fasta out: [stdout] # # end of Validate_Annotation task # # # End of Pseudo plane default 3 # ############################################### # taxonomy plane is for later stages skipping ############################################### # # Pseudo plane default 4 # # task: Generate Annotation Reports # # Generate_Annotation_Reports_pgaap_prepare_review: # run: progs/pgaap_prepare_review.cwl # Generate_Annotation_Reports_lds2_indexer: # run: progs/lds2_indexer.cwl # # comparisons only for pre-existing annotation, one of the next phases # # # Generate_Annotation_Reports_comparison_format_curr_comparison: # # run: progs/comparison_format.cwl # # Generate_Annotation_Reports_comparison_format_prev_comparison: # # run: progs/comparison_format.cwl # # Generate_Annotation_Reports_comparison_format_prev_assm_comparison: # # run: progs/comparison_format.cwl # # Generate_Annotation_Reports_comparison_format_ref_comparison: # # run: progs/comparison_format.cwl # Generate_Annotation_Reports_bact_asn_stats: # run: progs/bact_asn_stats.cwl # in: # input_annotation: Final_Bacterial_Package_dumb_down_as_required/outent # it: # default: true # out: [output, xml_output] # Generate_Annotation_Reports_val_format: # run: progs/val_format.cwl # Generate_Annotation_Reports_gbproject: # run: progs/gbproject.cwl # Generate_Annotation_Reports_asn2nucleotide_fasta: # run: progs/asn2fasta.cwl # Generate_Annotation_Reports_asn2all_protein_fasta: # run: progs/asn2fasta.cwl # Generate_Annotation_Reports_asn2protein_fasta: # run: progs/asn2fasta.cwl # Generate_Annotation_Reports_asn2flat: # run: progs/asn2flat.cwl # Generate_Annotation_Reports_format_rrnas: # run: progs/format_rrnas.cwl # Generate_Annotation_Reports_asn2rrna_fa: # run: progs/asn2fasta.cwl # Generate_Annotation_Reports_gp_annot_format: # run: progs/gp_annot_format.cwl # end of task: Generate Annotation Reports # # # End of Pseudo plane default 4 # outputs: gbent: type: File outputSource: add_checksum_gbent/output gff: type: File outputSource: Generate_Annotation_Reports_gff/output gbk: type: File outputSource: Generate_Annotation_Reports_gbk/output nucleotide_fasta: type: File? outputSource: Generate_Annotation_Reports_nuc_fasta/nuc_fasta protein_fasta: type: File? outputSource: Generate_Annotation_Reports_prot_fasta/prot_fasta cds_nucleotide_fasta: type: File? outputSource: Generate_Annotation_Reports_cds_nuc_fasta/fasta cds_protein_fasta: type: File? outputSource: Generate_Annotation_Reports_cds_prot_fasta/fasta sqn: type: File outputSource: add_checksum_sqn/output proc_annot_stats: type: File outputSource: Validate_Annotation_proc_annot_stats/var_proc_annot_stats_xml all_proc_annot_stats: type: File outputSource: Validate_Annotation_collect_annot_stats/output initial_asndisc_error_diag: type: File? outputSource: Prepare_Unannotated_Sequences_asndisc_evaluate/xml_output initial_asnval_error_diag: type: File? outputSource: Prepare_Unannotated_Sequences_asnvalidate_evaluate/xml_output final_asndisc_error_diag: type: File? outputSource: Final_Bacterial_Package_asndisc_evaluate/xml_output final_asnval_error_diag: type: File? outputSource: Final_Bacterial_Package_asnvalidate_evaluate/xml_output checkm_raw: type: File outputSource: checkm/checkm_raw checkm_results: type: File outputSource: checkm/checkm_results