Source code for specimen.util.set_up
"""Collection of functions for setting up the environment for the pipelines."""
__author__ = "Carolin Brune"
################################################################################
# requirements
################################################################################
import click
import logging
import os
import yaml
from importlib.resources import files
from pathlib import Path
from typing import Literal, Union
from refinegems.utility.set_up import download_config as rg_config
################################################################################
# variables
################################################################################
# config keys
# -----------
# config keys for pipeline files
HQTB_CONFIG_PATH_OPTIONAL = [
"media_gap",
"ncbi_map",
"biocyc",
"universal",
"pan-core",
"fasta",
"gff",
"dmnd-database",
"database-mapping",
] #: :meta:
HQTB_CONFIG_PATH_REQUIRED = [
"annotated_genome",
"full_sequence",
"model",
"diamond",
"media_analysis",
] #: :meta:
CMPB_CONFIG_PATHS_REQUIRED = ["mediapath"] #: :meta:
CMPB_CONFIG_PATHS_OPTIONAL = [
"modelpath",
"full_genome_sequence",
"gff",
"protein_fasta",
"gene-table",
"reacs-table",
"gff",
"dmnd-database",
"database-mapping",
"reaction_direction",
] # :meta:
PIPELINE_PATHS_OPTIONAL = {
"hqtb": HQTB_CONFIG_PATH_OPTIONAL,
"cmpb": CMPB_CONFIG_PATHS_OPTIONAL,
} #: :meta:
PIPELINE_PATHS_REQUIRED = {
"hqtb": HQTB_CONFIG_PATH_REQUIRED,
"cmpb": CMPB_CONFIG_PATHS_REQUIRED,
} #: :meta:
# config keys for pipelines directories
PIPELINE_DIR_PATHS = ["dir"]
################################################################################
# functions
################################################################################
# ----------------------
# setup data (structure)
# ----------------------
[docs]
def build_data_directories(
pipeline: Literal[
"hqtb", "high-quality template based", "cmpb", "carveme modelpolisher based"
],
parent_dir: str,
):
"""Set up the necessary directory structure and download files if possible
for the given pipeline.
Args:
- pipeline (Literal['hqtb','high'):
For which pipeline the structure should be.
- parent_dir (str):
Parent directory/ Path to write the structure to.
Raises:
- ValueError: Unknown input for parameter pipeline
"""
match pipeline:
# HQTB setup
case "hqtb" | "high-quality template based":
# create the data directory structure
print("Creating directory structure...")
DATA_DIRECTORIES = [
"annotated_genomes",
"BioCyc",
"RefSeqs",
"medium",
"pan-core-models",
"template-models",
"universal-models",
]
for sub_dir in DATA_DIRECTORIES:
new_dir = Path(parent_dir, sub_dir)
try:
Path(new_dir).mkdir(parents=True, exist_ok=False)
print(f"Creating new directory {new_dir}")
except FileExistsError:
print(f"Directory {new_dir} already exists.")
# CMPB output
case "cmpb" | "carveme modelpolisher based":
# create the data directory structure
print("Creating directory structure...")
DATA_DIRECTORIES = {
"logs": [],
"misc": [
'auxotrophy',
'growth',
'kegg_pathways',
'mcc',
'memote',
'stats',
],
"models": [],
} # Subfolders & subsubfolders of parent_dir
# Create parent_dir for cmpb
parent_dir = Path(parent_dir, "cmpb_out")
# Generate subdirectories & subsubdirectories
for sub_dir in DATA_DIRECTORIES.keys():
new_dir = Path(parent_dir, sub_dir)
# Try to create sub directories: if exist, overwrite & warn user
try:
Path(new_dir).mkdir(parents=True, exist_ok=False)
print(f"Creating new directory {new_dir}")
except FileExistsError:
logging.warning(
f"Given directory {new_dir} already exists. High possibility of files being overwritten."
)
# Generate sub sub directories
if DATA_DIRECTORIES[sub_dir]:
for ssdir in DATA_DIRECTORIES[sub_dir]:
new_sub_dir = Path(new_dir, ssdir)
# Try to create sub sub directories: if exist, overwrite & warn user
try:
Path(new_sub_dir).mkdir(parents=True, exist_ok=False)
print(f"Creating new sub directory {new_sub_dir}")
except FileExistsError:
logging.warning(
f"Given sub directory {new_sub_dir} already exists. High possibility of files being overwritten."
)
# default case
case _:
message = f"Unknown input for parameter pipeline: {pipeline}"
raise ValueError(message)
# ---------------------
# handling config files
# ---------------------
[docs]
def download_config(
filename: str = "my_basic_config.yaml",
type: Literal[
"hqtb-basic", "hqtb-advanced", "hqtb-defaults", "media", "cmpb"
] = "hqtb basic",
):
"""Load a configuration file from the package and save
a copy for the user to edit.
The media config and the config for the cmpb / CarveMe + Modelpolisher based pipeline
can be downloaded using 'media' and 'cmpb' respectively
For the hqtb / high-quality template based pipeline:
Depending on the knowledge of the user, either a 'hqtb-basic' or an 'hqtb-advanced' type
of configuration file can be downloaded (or 'hqtb-defaults' for developers).
Args:
- filename (str, optional):
Filename/filepath to save the downloaded config file under.
Defaults to 'my_basic_config.yaml'.
- type (Literal['hqtb-basic','hqtb-advanced','hqtb-defaults','media','cmpb'], optional):
The type of file to download.
Can be 'hqtb-basic', 'hqtb-advanced' or 'hqtb-defaults' or 'media' or 'cmpb'.
Defaults to 'hqtb basic'.
Raises:
- ValueError: Unknown type of config file detected.
"""
# copy an examplary version of the config file for the user to edit it
match type:
# the 'beginner' version
case "hqtb-basic":
config_file = files("specimen.data.config").joinpath(
"hqtb_basic_config_expl.yaml"
)
with open(config_file, "r") as cfg_file, open(filename, "w") as cfg_out:
for line in cfg_file:
cfg_out.write(line)
# for advanced users
case "hqtb-advanced":
config_file = files("specimen.data.config").joinpath(
"hqtb_advanced_config_expl.yaml"
)
with open(config_file, "r") as cfg_file, open(filename, "w") as cfg_out:
for line in cfg_file:
cfg_out.write(line)
# for developer: the config with all internal defaults
case "hqtb-defaults":
config_file = files("specimen.data.config").joinpath(
"hqtb_config_default.yaml"
)
with open(config_file, "r") as cfg_file, open(filename, "w") as cfg_out:
for line in cfg_file:
cfg_out.write(line)
# media config from refinegems
case "media":
rg_config(filename, type="media")
# for the cmpb pipeline
case "cmpb":
config_file = files("specimen.data.config").joinpath("cmpb_config.yaml")
with open(config_file, "r") as cfg_file, open(filename, "w") as cfg_out:
for line in cfg_file:
cfg_out.write(line)
# type not found
case _:
raise ValueError(f"Unknown type of config file detected: {type}")
# hqtb
# ----
[docs]
def validate_config(userc: str, pipeline: Literal["hqtb", "cmpb"] = "hqtb") -> dict:
"""Validate a user hqtb config file for use in the pipeline.
.. note::
Currently not everything is checked, mainly the needed files are.
Args:
- userc (str):
Path to the user configuration file.
Raises:
- FileNotFoundError: Directory set for config:data:data:direc does not exist.
Returns:
dict:
The validated, read-in configuration file, nested (read-in yaml file).
"""
def dict_recursive_combine(dictA: dict, dictB: dict) -> dict:
"""Helper-function for :py:func:`~specimen.util.set_up.validate_config` to combine two configuration file.
Args:
- dictA (dict):
Information from one config file in dict format.
- dictB (dict):
Information from the other config file in dict format.
Returns:
dict:
The combined information.
"""
if not isinstance(dictB, dict):
return dictB
for key in dictA.keys():
if key in dictB.keys():
dictA[key] = dict_recursive_combine(dictA[key], dictB[key])
return dictA
def dict_recursive_overwrite(dictA: dict, key: str = None) -> dict:
"""Helper-function for :py:func:`~specimen.util.set_up.validate_config` to combine two configuration file.
Args:
- dictA (dict):
The dictionary to validate
Raises:
- TypeError: Missing file/path
Returns:
dict:
the Dictionary with USER overwritten as None
"""
if not isinstance(dictA, dict):
# check for missing input
if dictA == "__USER__":
raise TypeError(
f"Missing a required argument in the config file ({key})."
)
elif dictA == "USER":
mes = f"Keyword USER detected in config ({key}). Either due to skipped options or missing required information.\nReminder: This may lead to downstream problems."
logging.warning(mes)
return None
else:
return dictA
for key in dictA.keys():
dictA[key] = dict_recursive_overwrite(dictA[key], key)
return dictA
def dict_recursive_check(
dictA: dict, key: str = None, pipeline: Literal["hqtb", "cmpb"] = "hqtb"
):
"""Helper-function for :py:func:`~specimen.util.set_up.validate_config`
to check if a configuration is valid to run the high-quality template based pipeline.
Args:
- dictA (dict):
Current dictionary or value to be validated.
- key (str, optional):
key of dictA, if it was an entry of a dictionary.
Defaults to None.
Raises:
- TypeError: Missing a required argument in the config file.
- FileNotFoundError: Path does not exist: {dictA}
- FileNotFoundError: Path does not exist: {dictA}
"""
if not isinstance(dictA, dict):
# required file paths
if key in PIPELINE_PATHS_REQUIRED[pipeline]:
if isinstance(dictA, list):
for entry in dictA:
if os.path.isfile(entry):
continue
else:
raise FileNotFoundError(f"Path does not exist: {dictA}")
elif dictA and os.path.isfile(dictA):
return
else:
raise FileNotFoundError(f"Path does not exist: {dictA}")
# optional file paths
elif key in PIPELINE_PATHS_OPTIONAL[pipeline]:
if isinstance(dictA, str):
if os.path.isfile(dictA):
return
elif not os.path.isfile(dictA):
mes = f"Path does not exist: {dictA}. \nReminder: It is optional, but it may lead to downstream problems."
logging.warning(mes)
pass
else:
raise FileNotFoundError(f"Path does not exist: {dictA}")
if isinstance(dictA, list):
for entry in dictA:
if entry and os.path.isfile(entry):
return
elif not os.path.isfile(entry):
mes = f"Path does not exist: {entry}. \nReminder: It is optional, but it may lead to downstream problems."
logging.warning(mes)
pass
else:
raise FileNotFoundError(f"Path does not exist: {entry}")
elif key in PIPELINE_DIR_PATHS:
if dictA and os.path.exists(dictA):
return
else:
raise FileNotFoundError(f"Directory does not exist: {dictA}")
# not found or missing
else:
pass
return
else:
for key in dictA.keys():
dict_recursive_check(dictA[key], key, pipeline)
return
# validate a user config file by checking for missing input
# by combining it with a default config
# load both files
match pipeline:
case "hqtb":
defaultc_path = files("specimen.data.config").joinpath(
"hqtb_config_default.yaml"
)
case "cmpb":
defaultc_path = files("specimen.data.config").joinpath("cmpb_config.yaml")
case _:
raise ValueError(f"Unknown input for pipeline: {pipeline}")
with open(defaultc_path, "r") as cfg_def, open(userc, "r") as cfg_usr:
config_d = yaml.load(cfg_def, Loader=yaml.loader.FullLoader)
config_u = yaml.load(cfg_usr, Loader=yaml.loader.FullLoader)
# combine
combined_config = dict_recursive_combine(config_d, config_u)
# overwrite __USER__ and USER
combined_config = dict_recursive_overwrite(combined_config)
# check for missing or problematic values
# special case for HQTB pipeline with relative paths
if (
"data" in combined_config.keys()
and "data_direc" in combined_config["data"].keys()
and combined_config["data"]["data_direc"]
):
if os.path.isdir(combined_config["data"]["data_direc"]):
for key in combined_config["data"]:
if combined_config["data"][key] and key != "data_direc":
combined_config["data"][key] = (
combined_config["data"]["data_direc"]
+ combined_config["data"][key]
)
dict_recursive_check(combined_config, key=None, pipeline=pipeline)
else:
raise FileNotFoundError(
"Directory set for config:data:data_direc does not exist."
)
# normal recursion for validation
else:
dict_recursive_check(combined_config, key=None, pipeline=pipeline)
if combined_config["general"]["modelname"] is None and (
combined_config["general"]["authorinitials"] is None
or combined_config["general"]["organism"] is None
or combined_config["general"]["strainid"] is None
):
raise ValueError(
f"Either the model name or all of the following parameters must be stated: authorinitials, organism and strainID"
)
return combined_config
# cmpb
# ----
[docs]
def save_cmpb_user_input(configpath: Union[str, None] = None) -> dict:
"""Guide the user step by step through the creation of the configuration for a cmpb pipeline run
(via commandline).
Args:
- configpath (Union[str,None], optional):
Path to a file to save the config under. Defaults to None.
Returns:
dict:
The configuration in dictionary format.
"""
print("No config or no valid config given, you will be asked for input")
config_file = files("specimen.data.config").joinpath("cmpb_config.yaml")
with open(config_file, "r") as cfg:
config = yaml.load(cfg, Loader=yaml.loader.FullLoader)
# if model, get path
has_model = click.prompt(
"Do you already have a draft model e.g. created with CarveMe?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match has_model:
case "y":
modelpath = click.prompt(
"Enter the path to your model", type=click.Path(exists=True)
)
config["input"]["modelpath"] = modelpath
case "n":
pass
# required input
# --------------
print("------------")
print("The following information is REQUIRED for the pipeline")
print("------------")
config["input"]["mediapath"] = click.prompt(
"Enter the path to a media configuration file for growth simulation",
type=click.Path(exists=True),
)
# general options
# ---------------
print("------------")
print("General options")
print("------------")
# output directory
config["general"]["dir"] = click.prompt(
"Enter your desired output directory path", type=click.Path()
)
# name for the model
modelname = click.prompt(
"Do you have a specific name for your model?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match modelname:
case "y":
config["carveme"]["modelname"] = click.prompt(
"Please enter your desired name for the model", type=str
)
case "n":
config["general"]["authorinitials"] = click.prompt(
"An automated name based on the pattern iOrganismStrainAuthorYear will be created. \n Please enter your intials.",
type=str,
)
config["general"]["organism"] = click.prompt(
"Please enter an abbreviation for your organism.", type=str
)
config["general"]["strainid"] = click.prompt(
"Please enter the ID for your strain.", type=str
)
# colour
set_col = click.prompt(
"Do you want to use the default colour map YlGn for the visualisation?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match set_col:
case "n":
colours = click.prompt("Enter your chosen colour scheme", type=str)
config["general"]["colours"] = colours
case "y":
pass
# save all models or not
save_models = click.prompt(
"Do you want to save the model separatly after each step?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match save_models:
case "y":
config["general"]["save_all_models"] = True
case "n":
config["general"]["save_all_models"] = False
# run memote always y/n
run_memote = click.prompt(
"Do you want to run memote after each step?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match run_memote:
case "y":
config["general"]["memote_always_on"] = True
case "n":
config["general"]["memote_always_on"] = False
# run stats always y/n
models_stats = click.prompt(
"Do you want to run stats after each step?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match models_stats:
case "y":
config["general"]["stats_always_on"] = True
case "n":
config["general"]["stats_always_on"] = False
# some additional, sometimes required, sometimes optional files
refseq = click.prompt(
"If you want to run a gap analysis with KEGG or have a CarveMe model, please enter the path to your refseq gff file",
type=click.Path(),
)
config["general"]["gff"] = refseq
kegg_org_id = click.prompt(
"If you want to run a gap analysis with KEGG, please enter the KEGG organism ID"
)
config["general"]["kegg_organism_id"] = kegg_org_id
protein_fasta = click.prompt(
"If you want to use CarveMe or GeneGapFiller, please enter the path to your protein fasta file",
type=click.Path(),
)
config["general"]["protein_fasta"] = protein_fasta
# tech resources
# --------------
email = click.prompt("Enter the e-mail that will be used for Entrez")
config["tech-resources"]["email"] = email
set_threads = click.prompt(
"The default number of threads available for tools like DIAMOND is 2. Do you want to change that?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match set_threads:
case "y":
threads = click.prompt(
"Enter the number of threads available for tools like DIAMOND", type=int
)
config["tech-resources"]["threads"] = threads
# part-specific
# -------------
print("------------")
print("Part-specific options")
print("------------")
# CarveMe
carve = click.prompt(
"Do you want to build a model using CarveMe?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match carve:
case "y":
if config["general"]["protein_fasta"] is None:
protein_fasta = click.prompt(
"Enter the path to your protein fasta file",
type=click.Path(exists=True),
)
config["general"]["protein_fasta"] = protein_fasta
gram = click.prompt(
"Do you want to use a template specialized for gram-positive or gram-negative bacteria?",
type=click.Choice(["grampos", "gramneg", "None"]),
show_choices=True,
)
config["carveme"]["gram"] = gram
case "n":
if config["input"]["modelpath"] is None:
model = click.prompt(
"Please choose between an existing model or building a model with CarveMe. To run the CMPB workflow, you need a model.",
type=click.Choice(["modelpath", "CarveMe"]),
show_choices=True,
)
match model:
case "modelpath":
modelpath = click.prompt(
"Enter the path to an existing model",
type=click.Path(exists=True),
)
config["input"]["modelpath"] = modelpath
case "CarveMe":
carveme = click.prompt(
"Enter the path to a protein fasta file",
type=click.Path(exists=True),
)
config["general"]["protein_fasta"] = carveme
# model polish
carveme = click.prompt(
"Is your draft model CarveMe-based?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
if carveme == "y":
labs = click.prompt(
"Do you have a strain without any database information?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
labs = True if labs == "y" else False
config["cm-polish"]["is_lab_strain"] = labs
# gapfilling
gap_analysis = click.prompt(
"Do you want to run a gap analysis?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
if gap_analysis == "y":
idprefix = click.prompt(
"Enter a prefix to be used of IDs for the namespace do not exist"
)
config["gapfilling"]["idprefix"] = idprefix
formula_check = click.prompt(
"Enter the parameter for checking the metabolite formula before adding them to the model",
type=click.Choice(["none", "strict", "existence", "wildcard"]),
show_choices=True,
)
config["gapfilling"]["formula-check"] = formula_check
exclude_dna = click.prompt(
"Do you want to exlude reactions containing 'DNA' in their name?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
config["gapfilling"]["exclude-dna"] = exclude_dna
exclude_rna = click.prompt(
"Do you want to exlude reactions containing 'RNA' in their name?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
config["gapfilling"]["exclude-rna"] = exclude_rna
config["gapfilling"]["threshold_add_reacs"] = click.prompt(
"Enter the threshold for adding reactions (max. allowed matches of an EC number).",
type=int,
default=5,
)
algorithm = click.prompt(
"Which algorithm do you want to use for gapfilling?",
type=click.Choice(["KEGGapFiller", "BioCycGapFiller", "GeneGapFiller"]),
show_choices=True,
)
another_gapfiller = True
while another_gapfiller:
match algorithm:
case "KEGGapFiller":
config["gapfilling"]["KEGGapFiller"] = True
if config["general"]["kegg_organism_id"] is None:
kegg_org_id = click.prompt("Enter the KEGG organism id")
config["general"]["kegg_organism_id"] = kegg_org_id
case "BioCycGapFiller":
config["gapfilling"]["BioCycGapFiller"] = True
gene_table = click.prompt(
"Enter the path to a gene smart table from BioCyc",
type=click.Path(exists=True),
)
config["gapfilling"]["BioCycGapFiller parameters"][
"gene-table"
] = gene_table
reacs_table = click.prompt(
"Enter the path to a reactions smart table from BioCyc",
type=click.Path(exists=True),
)
config["gapfilling"]["BioCycGapFiller parameters"][
"reacs-table"
] = reacs_table
gff = click.prompt(
"Enter the path to a GFF file of the genome of the model",
type=click.Path(exists=True),
)
config["gapfilling"]["BioCycGapFiller parameters"]["gff"] = gff
case "GeneGapFiller":
config["gapfilling"]["GeneGapFiller"] = True
gff = click.prompt(
"Enter the path to a GFF file of the genome of the model",
type=click.Path(exists=True),
)
config["gapfilling"]["GeneGapFiller parameters"]["gff"] = gff
swissprot_dmnd = click.prompt(
"Enter the path to the SwissProt DIAMOND database file",
type=click.Path(exists=True),
)
config["gapfilling"]["GeneGapFiller parameters"][
"swissprot-dmnd"
] = swissprot_dmnd
swissprot_mapping = click.prompt(
"Enter the path to the SwissProt mapping file",
type=click.Path(exists=True),
)
config["gapfilling"]["GeneGapFiller parameters"][
"swissprot-mapping"
] = swissprot_mapping
check_NCBI = click.prompt(
"Do you want to enable checking NCBI accession numbers for EC numbers?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
check_NCBI = True if check_NCBI == "y" else False
config["gapfilling"]["GeneGapFiller parameters"][
"check-NCBI"
] = check_NCBI
sensitivity = click.prompt(
"Enter the sensitivity option for the DIAMOND run",
type=click.Choice(
[
"fast",
"mid-sensitive",
"sensitive",
"more-sensitive",
"very-sensitive",
"ultra-sensitive",
]
),
show_choices=True,
)
config["gapfilling"]["GeneGapFiller parameters"][
"sensitivity"
] = sensitivity
coverage = click.prompt(
"Enter the coverage for DIAMOND", type=float
)
config["gapfilling"]["GeneGapFiller parameters"][
"coverage"
] = coverage
percentage_identity = click.prompt(
"Enter the percentage identity threshold value for accepting matches",
type=float,
)
config["gapfilling"]["GeneGapFiller parameters"][
"percentage identity"
] = percentage_identity
another_gapfiller = click.prompt(
"Do you want to use another algorithm for gapfilling?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
another_gapfiller = True if another_gapfiller == "y" else False
if another_gapfiller:
algorithm = click.prompt(
"Which algorithm do you want to use for gapfilling?",
type=click.Choice(
["KEGGapFiller", "BioCycGapFiller", "GeneGapFiller"]
),
show_choices=True,
)
# ModelPolisher
modelpolisher = click.prompt(
"Do you want to run ModelPolisher?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match modelpolisher:
case "y":
config["modelpolisher"] = True
allow_model_to_be_saved_on_server = click.prompt(
"Do you want to allow the model to be saved on the server?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
allow_model_to_be_saved_on_server = (
True if allow_model_to_be_saved_on_server == "y" else False
)
config["mp"][
"allow-model-to-be-saved-on-server"
] = allow_model_to_be_saved_on_server
dont_fix = click.prompt(
"Do you want to fix the model? Unset default values will be set, if they are mandatory.",
type=click.Choice(["y", "n"]),
show_choices=True,
)
dont_fix = False if dont_fix == "y" else True
config["mp"]["fixing"]["dont-fix"] = dont_fix
annotate_with_bigg = click.prompt(
"Do you want to annotate with BiGG?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
annotate_with_bigg = True if annotate_with_bigg == "y" else False
config["mp"]["annotation"]["bigg"][
"annotate-with-bigg"
] = annotate_with_bigg
include_any_uri = click.prompt(
"Do you want to include annotation that are not MIRIAM-compliant?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
include_any_uri = True if include_any_uri == "y" else False
config["mp"]["annotation"]["bigg"]["include-any-uri"] = include_any_uri
case "n":
config["modelpolisher"] = False
# kegg pathways as groups
kegg_pw_groups = click.prompt(
"Do you want to add KEGG pathways as groups to the model?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match kegg_pw_groups:
case "y":
config["kegg_pathway_groups"] = True
case "n":
config["kegg_pathway_groups"] = False
# resolve duplicates
reac_dups = click.prompt(
"Do you want to check for and/or remove duplicate reactions?",
type=click.Choice(["skip", "check", "remove"]),
show_choices=True,
)
config["duplicates"]["reactions"] = reac_dups
meta_dups = click.prompt(
"Do you want to check for and/or remove duplicate metabolites?",
type=click.Choice(["skip", "check", "remove"]),
show_choices=True,
)
config["duplicates"]["metabolites"] = meta_dups
unused_meta = click.prompt(
"Do you want to remove unused metabolites?",
type=click.Choice(["y", "n"]),
show_choices=True,
)
match unused_meta:
case "y":
config["duplicates"]["remove_unused_metabs"] = True
case "n":
config["duplicates"]["remove_unused_metabs"] = False
# handling EGCs
egc_solver = click.prompt(
"Choose a solver (or none) for handling energy generating cycles.",
type=click.Choice(["none", "greedy"]),
show_choices=True,
)
if egc_solver == "none":
egc_solver = None
# BOF
do_bofdat = click.prompt(
"Do you want do run BOFdat?", type=click.Choice(["y", "n"]), show_choices=True
)
match do_bofdat:
case "y":
config["BOF"]["run_bofdat"] = True
full_genome_path = click.prompt(
"Please enter the path to the full genome sequence",
type=click.Path(exists=True),
)
config["BOF"]["full_genome_sequence"] = full_genome_path
dna_wf = click.prompt(
"Enter the DNA weight fraction of your organism", type=float
)
config["BOF"]["dna_weight_fraction"] = dna_wf
wf = click.prompt(
"Enter the weight fraction of your organism (enzyme/ion)", type=float
)
config["BOF"]["weight_fraction"] = wf
case "n":
config["BOF"]["run_bofdat"] = False
# save config
if configpath:
pass
else:
configpath = Path(config["general"]["dir"], "config.yaml")
with open(configpath, "w") as outf:
yaml.dump(config, outf, default_flow_style=False)
return config