Source code for specimen.hqtb.core.analysis

"""Analyse a model (step 5 of the workflow)."""

__author__ = "Carolin Brune"

################################################################################
# requirements
################################################################################

import logging
import time

from pathlib import Path
from typing import Literal

from refinegems.classes.medium import load_media
from refinegems.analysis import growth
from refinegems.utility.io import load_model
from refinegems.analysis.core_pan import compare_to_core_pan
from refinegems.curation.pathways import kegg_pathway_analysis
from refinegems.utility.util import test_biomass_presence
from refinegems.developement.decorators import suppress_warning
from refinegems.utility.connections import run_memote

from ...classes.reports import SpecimenModelInfoReport

################################################################################
# setup logging
################################################################################
# general logging
genlogger = logging.getLogger(__name__)
# internal logger with logging file
logger = logging.getLogger(__name__ + "-intern")
logger.setLevel(logging.DEBUG)
logger.propagate = False

################################################################################
# functions
################################################################################

# run this part
# -------------

[docs] @suppress_warning("invalid character '*' found in formula") def run( model_path: str, dir: str, media_path: str = None, namespace: Literal["BiGG"] = "BiGG", pc_model_path: str = None, pc_based_on: Literal["id"] = "id", test_aa_auxotrophies: bool = True, pathway: bool = True, ): """SPECIMEN Step 5: Analyse the generated model. Args: - model_path (str): Path to the model. - dir (str): Path to the output directory. - media_path (str, optional): Path to a media config file. Using this enables growth simulation. Defaults to None. - namespace (Literal['BiGG'], optional): Namespace to work on. Defaults to 'BiGG'. - pc_model_path (str, optional): Path to a core-pan model. Defaults to None. - pc_based_on (Literal['id'], optional): How to compare the model to the core-pan model. Defaults to 'id'. - test_aa_auxotrophies (bool, optional): Option to enable the amino acid auxotrophy simulation. Defaults to True. - pathway (bool, optional): Optional to enable KEGG pathway analysis. Defaults to True. """ total_time_s = time.time() genlogger.info( "\nanalysis\n################################################################################\n" ) # ----------------------- # create output directory # ----------------------- try: Path(dir, "05_analysis").mkdir(parents=True, exist_ok=False) genlogger.info(f'Creating new directory {Path(dir,"05_analysis")}') except FileExistsError: genlogger.info("Given directory already has required structure.") # set path for logging file Path(dir, "05_analysis", "analysis.log").unlink(missing_ok=True) handler = logging.handlers.RotatingFileHandler( str(Path(dir, "05_analysis", "analysis.log")), mode="w", # maxBytes=1000, backupCount=10, encoding="utf-8", delay=0, ) handler.setFormatter( logging.Formatter( "{levelname} \t {name} \t {message}", style="{", ) ) logger.addHandler(handler) # redirect cobrapy logging cobralogger = logging.getLogger("cobra") cobralogger.addHandler(handler) cobralogger.propagate = False # redirect matplotlib logging mpllogger = logging.getLogger("matplotlib") mpllogger.addHandler(handler) mpllogger.propagate = False # redirect refinegems logging rglogger = logging.getLogger("refinegems") rglogger.addHandler(handler) rglogger.propagate = False # load model model = load_model(str(model_path), "cobra") # ------------------ # general statistics # ------------------ logger.info("\n# ------------------\n# general statistics\n# ------------------") statistics_report = SpecimenModelInfoReport(model) statistics_report.save(Path(dir, "05_analysis")) # ------ # memote # ------ logger.info("\n# ------\n# memote\n# ------") run_memote( model, "html", save_res=Path(dir, "05_analysis", "final_memote.html"), ) # ----------------- # pan-core analysis # ----------------- if pc_model_path: logger.info("\n# ------------------\n# pan-core analysis\n# ------------------") pc_model = load_model(pc_model_path, "cobra") pan_core_report = compare_to_core_pan(model, pc_model, pc_based_on) pan_core_report.save(Path(dir, "05_analysis")) # ---------------- # pathway analysis # ---------------- if pathway: logger.info("\n# -----------------\n# pathway analysis\n# -----------------") pathway_report = kegg_pathway_analysis(model) pathway_report.save(Path(dir, "05_analysis")) # --------------- # growth analysis # --------------- if media_path: logger.info("\n# ---------------\n# growth analysis\n# ---------------") # try to set objective to growth growth_func_list = test_biomass_presence(model) if growth_func_list: # independently of how many growth functions are found, the first one will be used model.objective = growth_func_list[0] # simulate growth on different media growth_report = growth.growth_analysis( model, media_path, namespace=namespace, retrieve="report" ) growth_report.save(Path(dir, "05_analysis")) else: logger.warning( "No growth/biomass function detected, growth simulation will be skipped." ) # test auxotrophies if test_aa_auxotrophies: media_list = load_media(media_path) auxo_report = growth.test_auxotrophies( model, media_list[0], media_list[1], namespace ) auxo_report.save(Path(dir, "05_analysis")) total_time_e = time.time() logger.info(f"total runtime: {total_time_e-total_time_s}")