Source code for gsmodutils.utils.scrumpy

from __future__ import print_function

import click
import json
import os

import cobra
import re

from gsmodutils.utils import StringIO


[docs]class ParseError(Exception):
    pass


[docs]def load_scrumpy_model(filepath_or_string, name=None, model_id=None, media=None, objective_reactions=None,
                       obj_dir='min', fixed_fluxes=None):
    """
    Specify a base scrumpy structural model file and returns a cobra model.
    This hasn't be thoroughly tested so expect there to be bugs

    To get a solution from the returned object you need to specify nice stuff like the atpase reaction and media

    :param filepath_or_string: filepath or scrumpy string
    :param name:
    :param model_id:
    :param media:
    :param objective_reactions:
    :param obj_dir:
    :param fixed_fluxes:
    :return:
    """

    if objective_reactions is None:
        objective_reactions = ['Biomass']

    if fixed_fluxes is not None:
        assert isinstance(fixed_fluxes, dict)

    if os.path.isfile(filepath_or_string):
        rel_path = '/'.join(os.path.abspath(filepath_or_string).split('/')[:-1])
        fp = os.path.abspath(filepath_or_string).split('/')[-1]
        reactions, metabolites, externals = parse_file(fp, rel_path=rel_path)
    else:
        rel_path = '.'
        reactions, metabolites, externals = parse_string(filepath_or_string, rel_path=rel_path)

    model = cobra.Model()
    for mid in metabolites:
        compartment = 'e'
        if mid[:2] == "x_" or mid in externals:
            compartment = 'e'
        m = cobra.Metabolite(id=mid, compartment=compartment)  # ScrumPy does not use compartments
        model.add_metabolites([m])

    added_reactions = []
    for reaction in reactions:
        if reaction['id'] not in added_reactions:
            r = cobra.Reaction(reaction['id'])
            model.add_reactions([r])
            r.lower_bound = reaction['bounds'][0]
            r.upper_bound = reaction['bounds'][1]
            r.add_metabolites(reaction['metabolites'])
            added_reactions.append(reaction['id'])

    # We need to add transporters for external metabolites not defined with the "External" directive
    for metabolite in model.metabolites:
        if metabolite.id[:2] == "x_":
            r = cobra.Reaction("EX_{}".format(metabolite.id[2:]))
            model.add_reactions([r])
            r.lower_bound = -1000.0
            r.upper_bound = 1000.0
            r.add_metabolites({
                metabolite.id: -1.0
            })
            added_reactions.append(r.id)

    if media is not None:
        for ex_reaction in model.exchanges:
            ex_reaction.lower_bound = media.get(ex_reaction.id, 0)

    if fixed_fluxes is not None:
        for rid, flux in fixed_fluxes.items():
            try:
                reaction = model.reactions.get_by_id(rid)
                reaction.lower_bound = flux
                reaction.upper_bound = flux
            except KeyError:
                click.echo('Error setting fixed flux for reaction id {}, not found'.format(rid))

    for oreact in objective_reactions:
        try:
            objreac = model.reactions.get_by_id(oreact)
            objreac.objective_coefficient = 1.0
        except KeyError:
            print('Error setting objective, reaction name {} not found'.format(oreact))
    
    model.objective.direction = obj_dir

    model.id = model_id
    model.name = name
    return model


[docs]def get_tokens(line):
    """
    Goes through each charachter in scrumpy file attempting to find tokens

    FIXME: if there is a numeric after a direction token this fails
    e.g. '->2 "PROTON"' fails but '-> 2 "PROTON"' works
    :param line_dt:
    :return:
    """

    line_dt = line.strip().split('#')[0]

    tokens = []
    quoted = False
    tk_str = ""
    line_dt = line_dt.replace("->", "-> ")
    line_dt = line_dt.replace("<-", "<- ")
    line_dt = line_dt.replace("<>", "<> ")
    
    for ch in line_dt:
        if ch in ['"', "'"]:
            if not quoted:
                quoted = True
                if len(tk_str) and ch != " ":
                    tokens.append(tk_str)
                tk_str = ch
            elif tk_str[0] == ch:
                tk_str += ch
                tokens.append(tk_str)
                tk_str = ""
                quoted = False
            else:
                tk_str += ch
                
        elif ch in ["(", ")", ":", ",", " "] and not quoted:
            if len(tk_str):
                tokens.append(tk_str)
            tk_str = ""
            if ch != " ":
                tokens.append(ch)
        else:
            tk_str += ch
    
    if len(tk_str):
        tokens.append(tk_str)
            
    return tokens


[docs]def parse_file(filepath, fp_stack=None, rel_path=''):
    """
     Recursive function - takes in a scrumpy spy file and parses it, returning a set of reactions

    Note this code is not fully tested. Expect some bugs.
    :param filepath:
    :param fp_stack:
    :param rel_path:
    :return:
    """
    if fp_stack is None:
        fp_stack = [filepath]
    else:
        fp_stack.append(filepath)

    with open(os.path.join(rel_path, filepath)) as infile:
        reactions, metabolites, externals = parse_fobj(infile, fp_stack, rel_path, filepath)
    return reactions, metabolites, externals


[docs]def parse_string(spy_string, rel_path='.'):
    with StringIO() as fstr:
        fstr.write(spy_string)
        fstr.seek(0)
        reactions, metabolites, externals = parse_fobj(fstr, [], rel_path, "scrumpy_string")
    return reactions, metabolites, externals


[docs]def parse_fobj(infile, fp_stack, rel_path, source_name):

    num_match = re.compile("[0-9]*/[0-9]*")
    reactions = []
    metabolites = []
    externals = []

    in_include = False
    in_external = False
    in_reaction = False
    s_coef = -1
    si = 1.0

    for linecount, line in enumerate(infile):
        # Ignore anything after comments
        tokens = get_tokens(line)
        prev_token = ''
        # print tokens
        for token in tokens:
            if in_reaction:

                if token == '~':
                    in_reaction = False
                    s_coef = -1
                    reactions.append(reaction)
                elif token in ["<-", "<>", "->"]:
                    s_coef = 1
                    if token == "<-":
                        reaction['bounds'] = [-1000.0, 0.0]
                    elif token == "->":
                        reaction['bounds'] = [0.0, 1000.0]
                    else:
                        reaction['bounds'] = [-1000.0, 1000.0]

                elif token == "+":
                    pass
                else:
                    try:
                        si = float(token)
                    except ValueError:

                        if num_match.match(token):
                            si = eval(token)
                        elif len(token.strip()):
                            metab = token.replace('"', '').replace("'", '')

                            metabolites.append(metab)
                            # not a stoichiometric value
                            reaction['metabolites'][metab] = s_coef * si
                            si = 1.0

                prev_token = token
                continue

            if in_external:
                if token in [',', '(']:
                    continue
                elif token == ')':
                    in_external = False
                else:
                    token = token.replace('"', '')
                    metabolites.append(token)
                    externals.append(token)
                    rs = dict(
                        id='{}_tx'.format(token),
                        metabolites={token: -1.0},
                        source=source_name,
                        bounds=[-1000.0, 1000.0]
                    )

                    reactions.append(rs)

                continue

            if in_include:

                if token in [',', '(']:
                    continue

                elif token == ')':
                    in_include = False
                elif token in fp_stack:
                    raise ParseError('Cyclic dependency for file {}'.format(token))
                else:
                    rset, mset, exset = parse_file(token, fp_stack, rel_path)
                    reactions += rset
                    metabolites += mset
                    externals += exset
                continue

            if token == 'External':
                in_external = True

            elif token == 'Include':
                in_include = True
            elif token == ":":
                in_reaction = True
                s_coef = -1
                reaction = dict(
                    source=source_name,
                    metabolites={},
                    id=prev_token.replace('"', '').replace("'", ""),
                    line=linecount,
                )

            prev_token = token

    return reactions, metabolites, externals


@click.command()
@click.argument('model')
@click.argument('model_id')
@click.option('--name', default=None, help='Specify a name for this model')
@click.option('--output', default='omodel.json', help='output location for json file')
@click.option('--media', default=None, type=str, help='A growth media constraints file')
@click.option('--fixed_fluxes', default=None, help='Path to a json dictionary containing biomass composition')
@click.option('--objective', default='Biomass', help='Objective reaction id')
@click.option('--objective_direction', default='min', help='objective direction (min or max)')
def scrumpy_to_cobra(model, model_id, name, output, media, fixed_fluxes, objective,
                     objective_direction):
    """
    Command line utility for parsing scrumpy files and creating cobrapy models

    By default, models use the minimisation of flux objective function approach, though if a lumped biomass reaction
    is present, this can be specified as a maximisation objective.

    For the minimisation of fluxes approach a biomass composition should be specified.
    This should be a json file of fixed biomass transporter reaction identifiers and their associated flux value.

    If the lumped biomass reaction is used the media composition will be required for growth.
    These values are the lower bounds for the fluxes on uptake reactions.
    """
    if fixed_fluxes is not None and os.path.exists(fixed_fluxes):
        with open(fixed_fluxes) as mp:
            fixed_fluxes = json.load(mp)
    else:
        fixed_fluxes = None

    if media is not None and os.path.exists(media):
        with open(media) as mp:
            media = json.load(mp)
    else:
        media = None

    model = load_scrumpy_model(model,
                               media=media, 
                               objective_reactions=[objective],
                               obj_dir=objective_direction,
                               fixed_fluxes=fixed_fluxes,
                               name=name,
                               model_id=model_id
                               )

    cobra.io.save_json_model(model, output)