Source code for specimen.hqtb.core.validation

"""Validate a model (step 4 of the workflow).

Implemented tests in include:
- cobra/sbml check using cobrapy
"""

__author__ = "Carolin Brune"

################################################################################
# requirements
################################################################################

import cobra
import logging
import pprint
import time
import warnings

import pandas as pd

from pathlib import Path
from typing import Union

from refinegems.utility.connections import run_ModelPolisher
from refinegems.utility.io import write_model_to_file

################################################################################
# setup logging
################################################################################
# general logging
genlogger = logging.getLogger(__name__)
# internal logger with logging file

logger = logging.getLogger(__name__ + "-intern")
logger.setLevel(logging.DEBUG)
logger.propagate = False

################################################################################
# functions
################################################################################


[docs] def run( dir: str, model_path: str, tests: Union[None, str, list] = None, run_all: bool = True, ): """SPECIMEN Step 4: Validate the model. Included tests (name : description): - modelpolisher: Semantic control and BiGG annotation fixing with ModelPolisher - cobra: SBML validation using COBRApy Args: - dir (str): Path to the output directory. - model_path (str): Path to the model to be validated - tests (Union[None, str, list], optional): Tests to perform. If the test name is either in a string or an element in a list, the corresponding test will be run. Defaults to None. - run_all (bool, optional): Run al available tests. If True, overwrites the previous parameter. Defaults to True. """ total_time_s = time.time() # -----------------------^ # create output directory # ----------------------- try: Path(dir, "04_validation").mkdir(parents=True, exist_ok=False) genlogger.info(f'Creating new directory {str(Path(dir,"04_validation"))}') except FileExistsError: genlogger.info("Given directory already has required structure.") # ----------------- # fine tune logging # ----------------- # interal logging Path(dir, "04_validation", "validation.log").unlink(missing_ok=True) handler = logging.handlers.RotatingFileHandler( str(Path(dir, "04_validation", "validation.log")), mode="w", backupCount=10, encoding="utf-8", delay=0, ) handler.setFormatter( logging.Formatter( "{levelname} \t {name} \t {message}", style="{", ) ) logger.addHandler(handler) # redirect cobrapy logging cobralogger = logging.getLogger("cobra") cobralogger.addHandler(handler) cobralogger.propagate = False # -------------- # validate model # -------------- logger.info( "\nvalidation\n################################################################################\n" ) # generalise input match tests: case None: pass case str(): tests = tests.lower() case list(): tests = [t.lower() for t in tests] case _: warnings.warn(f"Tests parameter must be of type str or list, got {type(tests)}. Setting to None.") tests = None # ModelPolisher # ------------- if run_all or (tests and "modelpolisher" in tests): logger.info( "\n" "# -------------\n" "# ModelPolisher\n" "# -------------" ) logger.warning('ModelPolisher is currently not maintained and might not work as expected. Use at your own risk.') start = time.time() # generate specific directory for ModelPolisher output try: Path(dir, "04_validation", "modelpolisher").mkdir(parents=True, exist_ok=False) logger.info(f'Creating new directory {str(Path(dir,"04_validation", "modelpolisher"))}') except FileExistsError: logger.info("Given directory already has required structure.") # setting ModelPolisher params config_mp = { "allow-model-to-be-saved-on-server": False, "fixing": {"dont-fix": False}, "annotation": { "bigg": { "annotate-with-bigg": True, "include-any-uri": False, } }, } # running ModelPolisher result = run_ModelPolisher(str(model_path), config_mp) # @DEBUG Should the run-id be saved somewhere for debugging purposes? result['run_id'] # @WARNING ModelPolisher is currently not maintained and might not work as expected if result: if len(result['diff']) > 1: pd.DataFrame(result["diff"]).to_csv( Path(dir, "04_validation", "modelpolisher", "diff_mp.csv"), sep=";", header=False, ) else: logger.warning(f"{result['diff']}") pd.DataFrame(result["pre_validation"]).to_csv( Path(dir, "04_validation", "modelpolisher", "pre_validation.csv"), sep=";", header=True, ) pd.DataFrame(result["post_validation"]).to_csv( Path(dir, "04_validation", "modelpolisher", "post_validation.csv"), sep=";", header=True, ) # save model model_polisher_model_path = Path(dir, "04_validation", f"{Path(model_path).stem}_after_mp.xml") current_libmodel = result["polished_document"].getModel() write_model_to_file(current_libmodel, model_polisher_model_path) else: logger.warning('No result was produced with ModelPolisher. This step will be skipped.') end = time.time() logger.info(f"\ttime: {end - start}s") # COBRApy # ------- if run_all or (tests and "cobra" in tests): logger.info( "\n" "# ------------------\n" "# COBRApy validation\n" "# ------------------" ) start = time.time() # validate using cobra cobra_report = cobra.io.validate_sbml_model(model_path) with open( Path(dir, "04_validation", "cobrapy-validation.txt"), "w" ) as cpyval_file: pprint.pprint(cobra_report, stream=cpyval_file) end = time.time() logger.info(f"\ttime: {end - start}s") total_time_e = time.time() logger.info(f"total runtime: {total_time_e-total_time_s}") # restore cobrapy logging behaviour cobralogger.handlers.clear() cobralogger.propagate = False