Source code for schrodinger.application.desmond.packages.viparr1.viparr.viparr_residue

from . import elements
from . import isomorph


[docs]def dfs(G, marked, start_vert, ordering): # store vertices in dfs order ordering.append(start_vert) marked[start_vert] = 1 # loop over neighbors of start_vert that have not been marked for i in G[start_vert]: if marked[i] != 0: continue dfs(G, marked, i, ordering)
[docs]def templates_reorder_atoms(templates): '''Reorder the atoms in the template to be more suitable for the isomorphism test. Input is a raw template (not internal format). This function must be called before converting the template to internal format. ''' for template in templates.values(): G, Gd = residue_structure_template(template) Ga = [elements.n2e[(a[1])] for a in template['atoms']] for i, d in enumerate(Gd): Ga[i] = Ga[i] + str(Gd[i]) # compute frequency for each type di = dict() for ga in Ga: di[ga] = di.setdefault(ga, 0) + 1 # traverse dictionary and find key with lowest frequency lowkey, lowval = di.popitem() for key, val in di.items(): if val < lowval: lowkey, lowval = key, val # now pick an atom with this key first = Ga.index(lowkey) # construct depth-first search ordering rooted at first atom marked = [0] * len(G) ordering = [] dfs(G, marked, first, ordering) # remap the atom order old = template['atoms'] atoms2 = [] for i in range(len(ordering)): atoms2.append(old[ordering[i]]) template['atoms'] = atoms2
[docs]def get_hill_order(elm): '''Given a list of elements, return the list in hill order ( C 1st, H 2nd, others in alpha order. If no C, use alpha order). ''' h = elm[:] h.sort() if ('C' in h): h.remove('C') h.insert(0, 'C') if ('H' in h): h.remove('H') h.insert(1, 'H') return h
[docs]def residue_formula(nlist): '''Given a list of atomic numbers, return a string which is its chemical formula (elements given in hill order). ''' d = dict() # count frequency of each element for an in nlist: name = elements.n2e[an] d[name] = d.setdefault(name, 0) + 1 # loop over keys in residue in order keys = get_hill_order(list(d)) formula = '' for name in keys: count = d[name] formula = formula + name + '(%d)' % count return formula
[docs]def get_formula_dict(templates): '''Dictionary for mapping formulas to a list of template residue names. ''' formula_dict = dict() for resname in list(templates): nlist = [a[1] for a in templates[resname]['atoms']] formula = residue_formula(nlist) formula_dict.setdefault(formula, []).append(resname) return formula_dict
[docs]def residue_structure_mae(neighbors, res): '''Return structure of mae residue. ''' anumbers = res.nlist # list of residue atom numbers (0-based) # create map of atom numbers to index d = dict() for i, a in enumerate(anumbers): d[a] = i # create and fill graph structure G = [[] for i in range(len(anumbers))] deg = [0] * len(anumbers) # loop over all bonds for i, a in enumerate(anumbers): deg[i] = len(neighbors[a]) for j in neighbors[a]: ii = d.get(a) jj = d.get(j - 1) if jj == None: # Must be a bond in-between residues #print 'In residue <%s>, ignoring mae bond <%d,%d>' % (res.name, a+1, j) pass else: G[ii].append(jj) # don't need reverse, since all both directions will be traversed in loop return G, deg
[docs]def residue_structure_template(template): '''Return structure of template residue. ''' # create map of atom names to index d = dict() for i, a in enumerate(template['atoms']): d[a[0]] = i # create and fill graph structure G = [[] for i in range(len(template['atoms']))] deg = [0] * len(template['atoms']) if 'bonds' in template: for i, j in template['bonds']: if d.get(i) != None and d.get(j) != None: G[d[i]].append(d[j]) G[d[j]].append(d[i]) if d.get(i) != None: deg[d[i]] = deg[d[i]] + 1 if d.get(j) != None: deg[d[j]] = deg[d[j]] + 1 return G, deg
[docs]def res_add_externals(res, template, neighbors): '''Add external atoms to res.atoms dict (Does not depend on $dollar sign.) ''' if 'bonds' not in template: return # loop over bonds in template for ai, aj in template['bonds']: i = res.atoms.get(ai) j = res.atoms.get(aj) if i != None and j != None: continue # normal case of internal bond if i == None and j == None: raise AssertionError('Residue <"%s",%d> (%s) does not contain bond <%s,%s>' % \ (res.chain, res.resnum_orig, res.name_orig, ai, aj)) if j == None: neigh = set([k - 1 for k in neighbors[i]]) neigh.difference_update(set(res.nlist)) if len(neigh) != 1: print(neigh) raise AssertionError( 'Could not identify atom of external bond <%s,%s>' % (ai, aj)) res.atoms[aj] = neigh.pop() # 0-based if i == None: neigh = set([k - 1 for k in neighbors[j]]) neigh.difference_update(set(res.nlist)) if len(neigh) != 1: print(neigh) raise AssertionError( 'Could not identify atom of external bond <%s,%s>' % (ai, aj)) res.atoms[ai] = neigh.pop() # 0-based
[docs]def compare_lists(gl, hl): '''Given two lists, check that they contain the same elements (which may be repeated). ''' gd = dict() hd = dict() for g in gl: gd[g] = gd.setdefault(g, 0) + 1 for h in hl: hd[h] = hd.setdefault(h, 0) + 1 return (gd == hd)
[docs]def get_residue_list(atoms, neighbors, templates): '''Top-level function for constructing the residue list. In this new version, each residue knows about the immediate atoms that connect to it from outside the residue. Note that in this new version, it is not important that the residues be ordered linearly in a chain; i.e., the list data structure can be replaced by a dict. ''' class Res(object): def __init__(self): self.alist = list() # list of atom names self.nlist = list() # list of atom indices in same order (0-based) self.chain = None # needed in error messages self.resnum_orig = None # needed in error messages self.name_orig = None self.name = None self.atoms = None # mapping of atom names to indices self.match = None # determine number of residues (atom.resnum is 1-based) # which is simply the maximum residue number num_residues = 0 for atom in atoms: if atom.resnum > num_residues: num_residues = atom.resnum # traverse all atoms to create the raw residue list # and also check that mae residue names are consistent residue_list = [Res() for i in range(num_residues)] for i, atom in enumerate(atoms): res = residue_list[atom.resnum - 1] res.alist.append(atom.pdbname) res.nlist.append(i) # 0-based atom index in atoms data structure res.chain = atom.chain res.resnum_orig = atom.resnum_orig # assign residue name and check that residue names are consistent if res.name_orig == None: res.name_orig = atom.pdbres res.name = atom.pdbres # will be overwritten with new name else: if res.name_orig != atom.pdbres: raise AssertionError('In mae file, residue <"%s",%d> uses multiple names: %s %s' % \ (res.chain, res.resnum_orig, res.name_orig, atom.pdbres)) # construct formula_dict formula_dict = get_formula_dict(templates) # map each residue to a template for res in residue_list: # compute structure of mae residue, call it H H, Hd = residue_structure_mae(neighbors, res) Ha = [elements.n2e[atoms[i].atomicnum] for i in res.nlist] for i, d in enumerate(Hd): Ha[i] = Ha[i] + str(Hd[i]) # formula for this residue formula = residue_formula([atoms[i].atomicnum for i in res.nlist]) # list of candidate template residues to be checked for isomorphism candidates = formula_dict.get(formula, []) # check isomorphism for each candidate, but if more than one matches, # then it is an error in the force field resmatches = [] for candidate in candidates: G, Gd = residue_structure_template(templates[candidate]) Ga = [elements.n2e[(a[1])] for a in templates[candidate]['atoms']] for i, d in enumerate(Gd): Ga[i] = Ga[i] + str(Gd[i]) # check that Ga and Ha are compatible if not compare_lists(Ga, Ha): continue f = [] ret = isomorph.isomorph(G, H, Ga, Ha, f) if ret: resmatches.append(candidate) if len(resmatches) > 1: print("Searching through candidates: ", candidates) print("Matched at least: ", resmatches) raise AssertionError( 'Error in force field: multiple templates match residue <"%s",%d> (%s)' % (res.chain, res.resnum_orig, res.name_orig)) # map residue name res.name = candidate # set the new name of this residue # remap atom names if the set of template names and mae names are different atom_names_in_res_template = [ a[0] for a in templates[candidate]['atoms'] ] if set(atom_names_in_res_template) != set(res.alist): for i in range(len(res.alist)): # f is a mapping of G to H res.alist[f[i]] = atom_names_in_res_template[i] # otherwise the names are not mapped # create dictionary using the (new) names res.atoms = dict() for i, a in enumerate(res.alist): res.atoms[a] = res.nlist[i] res_add_externals(res, templates[candidate], neighbors) if len(resmatches) == 0: res.match = False else: res.match = True return residue_list