Source code for schrodinger.application.desmond.packages.viparr1.viparr.viparr_residue

from . import elements
from . import isomorph


[docs]def dfs(G, marked, start_vert, ordering):
    # store vertices in dfs order
    ordering.append(start_vert)
    marked[start_vert] = 1

    # loop over neighbors of start_vert that have not been marked
    for i in G[start_vert]:
        if marked[i] != 0:
            continue
        dfs(G, marked, i, ordering)


[docs]def templates_reorder_atoms(templates):
    '''Reorder the atoms in the template to be more suitable for the isomorphism test.
     Input is a raw template (not internal format).
     This function must be called before converting the template to internal format.
  '''
    for template in templates.values():

        G, Gd = residue_structure_template(template)
        Ga = [elements.n2e[(a[1])] for a in template['atoms']]
        for i, d in enumerate(Gd):
            Ga[i] = Ga[i] + str(Gd[i])

        # compute frequency for each type
        di = dict()
        for ga in Ga:
            di[ga] = di.setdefault(ga, 0) + 1
        # traverse dictionary and find key with lowest frequency
        lowkey, lowval = di.popitem()
        for key, val in di.items():
            if val < lowval:
                lowkey, lowval = key, val
        # now pick an atom with this key
        first = Ga.index(lowkey)

        # construct depth-first search ordering rooted at first atom
        marked = [0] * len(G)
        ordering = []
        dfs(G, marked, first, ordering)

        # remap the atom order
        old = template['atoms']
        atoms2 = []
        for i in range(len(ordering)):
            atoms2.append(old[ordering[i]])
        template['atoms'] = atoms2


[docs]def get_hill_order(elm):
    '''Given a list of elements, return the list in hill order
     ( C 1st, H 2nd, others in alpha order. If no C, use alpha order).
  '''
    h = elm[:]
    h.sort()
    if ('C' in h):
        h.remove('C')
        h.insert(0, 'C')
        if ('H' in h):
            h.remove('H')
            h.insert(1, 'H')
    return h


[docs]def residue_formula(nlist):
    '''Given a list of atomic numbers, return a string which is its chemical formula
     (elements given in hill order).
  '''
    d = dict()
    # count frequency of each element
    for an in nlist:
        name = elements.n2e[an]
        d[name] = d.setdefault(name, 0) + 1

    # loop over keys in residue in order
    keys = get_hill_order(list(d))
    formula = ''
    for name in keys:
        count = d[name]
        formula = formula + name + '(%d)' % count
    return formula


[docs]def get_formula_dict(templates):
    '''Dictionary for mapping formulas to a list of template residue names.
  '''
    formula_dict = dict()
    for resname in list(templates):
        nlist = [a[1] for a in templates[resname]['atoms']]
        formula = residue_formula(nlist)
        formula_dict.setdefault(formula, []).append(resname)
    return formula_dict


[docs]def residue_structure_mae(neighbors, res):
    '''Return structure of mae residue.
  '''
    anumbers = res.nlist  # list of residue atom numbers (0-based)
    # create map of atom numbers to index
    d = dict()
    for i, a in enumerate(anumbers):
        d[a] = i
    # create and fill graph structure
    G = [[] for i in range(len(anumbers))]
    deg = [0] * len(anumbers)
    # loop over all bonds
    for i, a in enumerate(anumbers):
        deg[i] = len(neighbors[a])
        for j in neighbors[a]:
            ii = d.get(a)
            jj = d.get(j - 1)
            if jj == None:
                # Must be a bond in-between residues
                #print 'In residue <%s>, ignoring mae bond <%d,%d>' % (res.name, a+1, j)
                pass
            else:
                G[ii].append(jj)
                # don't need reverse, since all both directions will be traversed in loop
    return G, deg


[docs]def residue_structure_template(template):
    '''Return structure of template residue.
  '''
    # create map of atom names to index
    d = dict()
    for i, a in enumerate(template['atoms']):
        d[a[0]] = i
    # create and fill graph structure
    G = [[] for i in range(len(template['atoms']))]
    deg = [0] * len(template['atoms'])
    if 'bonds' in template:
        for i, j in template['bonds']:
            if d.get(i) != None and d.get(j) != None:
                G[d[i]].append(d[j])
                G[d[j]].append(d[i])
            if d.get(i) != None:
                deg[d[i]] = deg[d[i]] + 1
            if d.get(j) != None:
                deg[d[j]] = deg[d[j]] + 1
    return G, deg


[docs]def res_add_externals(res, template, neighbors):
    '''Add external atoms to res.atoms dict
     (Does not depend on $dollar sign.)
  '''
    if 'bonds' not in template:
        return

    # loop over bonds in template
    for ai, aj in template['bonds']:
        i = res.atoms.get(ai)
        j = res.atoms.get(aj)
        if i != None and j != None:
            continue  # normal case of internal bond
        if i == None and j == None:
            raise AssertionError('Residue <"%s",%d> (%s) does not contain bond <%s,%s>' % \
                (res.chain, res.resnum_orig, res.name_orig, ai, aj))
        if j == None:
            neigh = set([k - 1 for k in neighbors[i]])
            neigh.difference_update(set(res.nlist))
            if len(neigh) != 1:
                print(neigh)
                raise AssertionError(
                    'Could not identify atom of external bond <%s,%s>' %
                    (ai, aj))
            res.atoms[aj] = neigh.pop()  # 0-based
        if i == None:
            neigh = set([k - 1 for k in neighbors[j]])
            neigh.difference_update(set(res.nlist))
            if len(neigh) != 1:
                print(neigh)
                raise AssertionError(
                    'Could not identify atom of external bond <%s,%s>' %
                    (ai, aj))
            res.atoms[ai] = neigh.pop()  # 0-based


[docs]def compare_lists(gl, hl):
    '''Given two lists, check that they contain the same elements (which may be repeated).
  '''
    gd = dict()
    hd = dict()
    for g in gl:
        gd[g] = gd.setdefault(g, 0) + 1
    for h in hl:
        hd[h] = hd.setdefault(h, 0) + 1
    return (gd == hd)


[docs]def get_residue_list(atoms, neighbors, templates):
    '''Top-level function for constructing the residue list.
     In this new version, each residue knows about the immediate atoms that connect to it
     from outside the residue.
     Note that in this new version, it is not important that the residues be ordered
     linearly in a chain; i.e., the list data structure can be replaced by a dict.
  '''

    class Res(object):

        def __init__(self):
            self.alist = list()  # list of atom names
            self.nlist = list()  # list of atom indices in same order (0-based)
            self.chain = None  # needed in error messages
            self.resnum_orig = None  # needed in error messages
            self.name_orig = None
            self.name = None
            self.atoms = None  # mapping of atom names to indices
            self.match = None

    # determine number of residues (atom.resnum is 1-based)
    # which is simply the maximum residue number
    num_residues = 0
    for atom in atoms:
        if atom.resnum > num_residues:
            num_residues = atom.resnum

    # traverse all atoms to create the raw residue list
    # and also check that mae residue names are consistent
    residue_list = [Res() for i in range(num_residues)]
    for i, atom in enumerate(atoms):
        res = residue_list[atom.resnum - 1]
        res.alist.append(atom.pdbname)
        res.nlist.append(i)  # 0-based atom index in atoms data structure
        res.chain = atom.chain
        res.resnum_orig = atom.resnum_orig
        # assign residue name and check that residue names are consistent
        if res.name_orig == None:
            res.name_orig = atom.pdbres
            res.name = atom.pdbres  # will be overwritten with new name
        else:
            if res.name_orig != atom.pdbres:
                raise AssertionError('In mae file, residue <"%s",%d> uses multiple names: %s %s' % \
                    (res.chain, res.resnum_orig, res.name_orig, atom.pdbres))

    # construct formula_dict
    formula_dict = get_formula_dict(templates)

    # map each residue to a template
    for res in residue_list:
        # compute structure of mae residue, call it H
        H, Hd = residue_structure_mae(neighbors, res)
        Ha = [elements.n2e[atoms[i].atomicnum] for i in res.nlist]
        for i, d in enumerate(Hd):
            Ha[i] = Ha[i] + str(Hd[i])

        # formula for this residue
        formula = residue_formula([atoms[i].atomicnum for i in res.nlist])

        # list of candidate template residues to be checked for isomorphism
        candidates = formula_dict.get(formula, [])

        # check isomorphism for each candidate, but if more than one matches,
        # then it is an error in the force field
        resmatches = []
        for candidate in candidates:
            G, Gd = residue_structure_template(templates[candidate])
            Ga = [elements.n2e[(a[1])] for a in templates[candidate]['atoms']]
            for i, d in enumerate(Gd):
                Ga[i] = Ga[i] + str(Gd[i])

            # check that Ga and Ha are compatible
            if not compare_lists(Ga, Ha):
                continue

            f = []
            ret = isomorph.isomorph(G, H, Ga, Ha, f)
            if ret:
                resmatches.append(candidate)
                if len(resmatches) > 1:
                    print("Searching through candidates: ", candidates)
                    print("Matched at least: ", resmatches)
                    raise AssertionError(
                        'Error in force field: multiple templates match residue <"%s",%d> (%s)'
                        % (res.chain, res.resnum_orig, res.name_orig))

                # map residue name
                res.name = candidate  # set the new name of this residue

                # remap atom names if the set of template names and mae names are different
                atom_names_in_res_template = [
                    a[0] for a in templates[candidate]['atoms']
                ]
                if set(atom_names_in_res_template) != set(res.alist):
                    for i in range(len(res.alist)):
                        # f is a mapping of G to H
                        res.alist[f[i]] = atom_names_in_res_template[i]
                # otherwise the names are not mapped

                # create dictionary using the (new) names
                res.atoms = dict()
                for i, a in enumerate(res.alist):
                    res.atoms[a] = res.nlist[i]

                res_add_externals(res, templates[candidate], neighbors)

        if len(resmatches) == 0:
            res.match = False
        else:
            res.match = True

    return residue_list