CMPB Configuration File

Below, the configuration file with the underlying defaults, is displayed.

# Configuration file for the SPECIMEN CMPB workflow

# Meaning of the default parameters:
#    The value __USER__ indicates parameters required to be specified by the user
#    The value USER indicates parameters required only in specific cases
#    To avoid warnings, set parameters you do not use to null or NULL

# Meta info:
#    model:     USER
#    organism:  USER
#    date:      USER
#    author:    USER

# Input for the pipeline
# ----------------------
input:
modelpath: NULL            # Optional, path to a model.
                           # If not given, runs CarveMe
mediapath: __USER__        # Path to a media config to test growth with

# General options
# ---------------
general:
dir: './'                  # Path/Name of a directory to save output to
colours: 'YlGn'            # Set the colour scheme for the plots
                           # should be a valid matplotlib continuous color palette
namespace: BiGG            # Namespace to use for the model
                           # Possible identifiers, currently: BiGG
save_all_models: True      # Save a model per step
memote_always_on: False    # Run MEMOTE after every step
stats_always_on: False     # Calculate the model statistics after every step

# Options used by multiple steps of the workflow
refseq_gff: USER           # Path to RefSeq GFF file:
                            # Can be optionally provided for cm-polish.
kegg_organism_id: USER     # KEGG ID of the organism: Required for gap analysis with 'KEGG'.
                            # Can be optionally provided for cm-polish.
protein_fasta: USER        # Required, if used for CarveMe or GeneGapFiller. Optional
                            # for cm-polish except for 'is_lab_strain: True'.
                            # The path to the protein FASTA used to create the CarveMe model.
                            # For more information, please refer to the documentation.

tech-resources:
email: USER   # User Mail to use for Entrez (accessing NCBI).
threads: 2    # Number of threads available for tools like DIAMOND.

# Part-specific options
# =====================

# Build a model using CarveMe
# ---------------------------
carveme:
    # CarveMe requires protein_fasta under general to be set instead of modelpath
    # if CarveMe should be run,
    # fill out the params below
    gram: USER      # Choose either grampos or gramneg, depending on the Gram-test
                    # resilts of your organism

# Polish a CarveMe model
#    Only neccessary, if the model will or has been build with CarveMe
#    Will only be used, if model is indeed a CarveMe model
cm-polish:
is_lab_strain: False     # Whether the users strain originates from a lab
                        # Needs to be set to ensure that protein IDs get the 'bqbiol:isHomologTo' qualifier
                        # & to set the locus_tag to the ones obtained by the annotation
                        # (Warning: Might cause issues if annotatione was not performed with NCBI PGAP!)

# Filling gaps, optional
# ----------------------
gapfilling:

    ########### general options ###########
    # parameters, that apply to all the gap filling algorithmns
    idprefix: 'CMPB'            # prefix to use for fantasy IDs, if IDs for
                                # the namespace do not exist.
    formula-check: 'existence'  # When checking, if a metabolite can be added to the model
                                # also check the formula. For more information about
                                # available options, please refer to the docs of
                                # the function isreaction_comlete().
    exclude-dna: True           # Exclude reactions containing 'DNA' in their name
                                # from being added to the model.
    exclude-rna: True           # Exclude reactions containing 'RNA' in their name
                                # from being added to the model.

    ########## enable algorithms ##########
    # via KEGG ...................
    # requires KEGG organism ID to be set
    KEGGapFiller: False   # activate gap filling via GFF
    # via BioCyc .................
    BioCycGapFiller: False        # Activate gap filling via BioCyc.
    BioCycGapFiller parameters:
        gene-table: USER            # Path to a gene smart table file from BioCyc.
        reacs-table: USER           # Path to a reactions smart table from BioCyc.
        gff: USER                   # Path to a GFF file of the genome of the model.
    # via GFF ....................
    GeneGapFiller: False              # Activate gap filling via GFF
    GeneGapFiller parameters:
        gff: USER                     # Path to a gff file (does not have to be the RefSeq).
                                      # Needs to be from the same genome the model was build on.
        swissprot-dmnd: USER          # Path to the SwissProt DIAMOND database file.
        swissprot-mapping: USER       # Path to the SwissProt mapping file (against EC / BRENDA)
        check-NCBI: False             # Enable checking NCBI accession numbers for EC numbers - time costly.
        sensitivity: 'more-sensitiv'  # Sensitivity option for the DIAMOND run.
        coverage: 90.0                # Coverage (parameter for DIAMOND).
        percentage identity: 90.0     # Percentage identity threshold value for accepting
                                      # matches found by DIAMOND as homologous.


# Add KEGG pathways as groups, optional
# -------------------------------------
kegg_pathway_groups: True

# Resolve duplicates
# ------------------
duplicates:
# Three possible options for the resolvement of duplicates for the following model entities:
# - check:  Check for duplicates and simply report them
# - remove: Check for and remove duplicates from the model (if possible)
# - skip:   Skip the resolvement
reactions: remove
metabolites: remove
# Additionally, remove unused metabolites (possibly reduces knowledge-base)
remove_unused_metabs: False

# Finding and solvong Energy Generating Cycles (EGCs)
# ---------------------------------------------------
EGCs:
solver: NULL # solver gives the algorithm to use for solving EGCs
            # if NULL, only searches for EGCs without trying to solve them
            # options include: greedy

# BOFdat / Biomass objective function
# -----------------------------------
BOF:
run_bofdat: False
# if BOFdat should be run,
# fill out the params below
bofdat_params:
    full_genome_sequence: USER  # Whole genome sequence
    dna_weight_fraction: USER   # DNA weight fraction for the organism
    weight_fraction: USER       # Enzyme/ion weight fractions for the organism