from . import elements
from . import isomorph
[docs]def dfs(G, marked, start_vert, ordering):
# store vertices in dfs order
ordering.append(start_vert)
marked[start_vert] = 1
# loop over neighbors of start_vert that have not been marked
for i in G[start_vert]:
if marked[i] != 0:
continue
dfs(G, marked, i, ordering)
[docs]def templates_reorder_atoms(templates):
'''Reorder the atoms in the template to be more suitable for the isomorphism test.
Input is a raw template (not internal format).
This function must be called before converting the template to internal format.
'''
for template in templates.values():
G, Gd = residue_structure_template(template)
Ga = [elements.n2e[(a[1])] for a in template['atoms']]
for i, d in enumerate(Gd):
Ga[i] = Ga[i] + str(Gd[i])
# compute frequency for each type
di = dict()
for ga in Ga:
di[ga] = di.setdefault(ga, 0) + 1
# traverse dictionary and find key with lowest frequency
lowkey, lowval = di.popitem()
for key, val in di.items():
if val < lowval:
lowkey, lowval = key, val
# now pick an atom with this key
first = Ga.index(lowkey)
# construct depth-first search ordering rooted at first atom
marked = [0] * len(G)
ordering = []
dfs(G, marked, first, ordering)
# remap the atom order
old = template['atoms']
atoms2 = []
for i in range(len(ordering)):
atoms2.append(old[ordering[i]])
template['atoms'] = atoms2
[docs]def get_hill_order(elm):
'''Given a list of elements, return the list in hill order
( C 1st, H 2nd, others in alpha order. If no C, use alpha order).
'''
h = elm[:]
h.sort()
if ('C' in h):
h.remove('C')
h.insert(0, 'C')
if ('H' in h):
h.remove('H')
h.insert(1, 'H')
return h
[docs]def residue_structure_mae(neighbors, res):
'''Return structure of mae residue.
'''
anumbers = res.nlist # list of residue atom numbers (0-based)
# create map of atom numbers to index
d = dict()
for i, a in enumerate(anumbers):
d[a] = i
# create and fill graph structure
G = [[] for i in range(len(anumbers))]
deg = [0] * len(anumbers)
# loop over all bonds
for i, a in enumerate(anumbers):
deg[i] = len(neighbors[a])
for j in neighbors[a]:
ii = d.get(a)
jj = d.get(j - 1)
if jj == None:
# Must be a bond in-between residues
#print 'In residue <%s>, ignoring mae bond <%d,%d>' % (res.name, a+1, j)
pass
else:
G[ii].append(jj)
# don't need reverse, since all both directions will be traversed in loop
return G, deg
[docs]def residue_structure_template(template):
'''Return structure of template residue.
'''
# create map of atom names to index
d = dict()
for i, a in enumerate(template['atoms']):
d[a[0]] = i
# create and fill graph structure
G = [[] for i in range(len(template['atoms']))]
deg = [0] * len(template['atoms'])
if 'bonds' in template:
for i, j in template['bonds']:
if d.get(i) != None and d.get(j) != None:
G[d[i]].append(d[j])
G[d[j]].append(d[i])
if d.get(i) != None:
deg[d[i]] = deg[d[i]] + 1
if d.get(j) != None:
deg[d[j]] = deg[d[j]] + 1
return G, deg
[docs]def res_add_externals(res, template, neighbors):
'''Add external atoms to res.atoms dict
(Does not depend on $dollar sign.)
'''
if 'bonds' not in template:
return
# loop over bonds in template
for ai, aj in template['bonds']:
i = res.atoms.get(ai)
j = res.atoms.get(aj)
if i != None and j != None:
continue # normal case of internal bond
if i == None and j == None:
raise AssertionError('Residue <"%s",%d> (%s) does not contain bond <%s,%s>' % \
(res.chain, res.resnum_orig, res.name_orig, ai, aj))
if j == None:
neigh = set([k - 1 for k in neighbors[i]])
neigh.difference_update(set(res.nlist))
if len(neigh) != 1:
print(neigh)
raise AssertionError(
'Could not identify atom of external bond <%s,%s>' %
(ai, aj))
res.atoms[aj] = neigh.pop() # 0-based
if i == None:
neigh = set([k - 1 for k in neighbors[j]])
neigh.difference_update(set(res.nlist))
if len(neigh) != 1:
print(neigh)
raise AssertionError(
'Could not identify atom of external bond <%s,%s>' %
(ai, aj))
res.atoms[ai] = neigh.pop() # 0-based
[docs]def compare_lists(gl, hl):
'''Given two lists, check that they contain the same elements (which may be repeated).
'''
gd = dict()
hd = dict()
for g in gl:
gd[g] = gd.setdefault(g, 0) + 1
for h in hl:
hd[h] = hd.setdefault(h, 0) + 1
return (gd == hd)
[docs]def get_residue_list(atoms, neighbors, templates):
'''Top-level function for constructing the residue list.
In this new version, each residue knows about the immediate atoms that connect to it
from outside the residue.
Note that in this new version, it is not important that the residues be ordered
linearly in a chain; i.e., the list data structure can be replaced by a dict.
'''
class Res(object):
def __init__(self):
self.alist = list() # list of atom names
self.nlist = list() # list of atom indices in same order (0-based)
self.chain = None # needed in error messages
self.resnum_orig = None # needed in error messages
self.name_orig = None
self.name = None
self.atoms = None # mapping of atom names to indices
self.match = None
# determine number of residues (atom.resnum is 1-based)
# which is simply the maximum residue number
num_residues = 0
for atom in atoms:
if atom.resnum > num_residues:
num_residues = atom.resnum
# traverse all atoms to create the raw residue list
# and also check that mae residue names are consistent
residue_list = [Res() for i in range(num_residues)]
for i, atom in enumerate(atoms):
res = residue_list[atom.resnum - 1]
res.alist.append(atom.pdbname)
res.nlist.append(i) # 0-based atom index in atoms data structure
res.chain = atom.chain
res.resnum_orig = atom.resnum_orig
# assign residue name and check that residue names are consistent
if res.name_orig == None:
res.name_orig = atom.pdbres
res.name = atom.pdbres # will be overwritten with new name
else:
if res.name_orig != atom.pdbres:
raise AssertionError('In mae file, residue <"%s",%d> uses multiple names: %s %s' % \
(res.chain, res.resnum_orig, res.name_orig, atom.pdbres))
# construct formula_dict
formula_dict = get_formula_dict(templates)
# map each residue to a template
for res in residue_list:
# compute structure of mae residue, call it H
H, Hd = residue_structure_mae(neighbors, res)
Ha = [elements.n2e[atoms[i].atomicnum] for i in res.nlist]
for i, d in enumerate(Hd):
Ha[i] = Ha[i] + str(Hd[i])
# formula for this residue
formula = residue_formula([atoms[i].atomicnum for i in res.nlist])
# list of candidate template residues to be checked for isomorphism
candidates = formula_dict.get(formula, [])
# check isomorphism for each candidate, but if more than one matches,
# then it is an error in the force field
resmatches = []
for candidate in candidates:
G, Gd = residue_structure_template(templates[candidate])
Ga = [elements.n2e[(a[1])] for a in templates[candidate]['atoms']]
for i, d in enumerate(Gd):
Ga[i] = Ga[i] + str(Gd[i])
# check that Ga and Ha are compatible
if not compare_lists(Ga, Ha):
continue
f = []
ret = isomorph.isomorph(G, H, Ga, Ha, f)
if ret:
resmatches.append(candidate)
if len(resmatches) > 1:
print("Searching through candidates: ", candidates)
print("Matched at least: ", resmatches)
raise AssertionError(
'Error in force field: multiple templates match residue <"%s",%d> (%s)'
% (res.chain, res.resnum_orig, res.name_orig))
# map residue name
res.name = candidate # set the new name of this residue
# remap atom names if the set of template names and mae names are different
atom_names_in_res_template = [
a[0] for a in templates[candidate]['atoms']
]
if set(atom_names_in_res_template) != set(res.alist):
for i in range(len(res.alist)):
# f is a mapping of G to H
res.alist[f[i]] = atom_names_in_res_template[i]
# otherwise the names are not mapped
# create dictionary using the (new) names
res.atoms = dict()
for i, a in enumerate(res.alist):
res.atoms[a] = res.nlist[i]
res_add_externals(res, templates[candidate], neighbors)
if len(resmatches) == 0:
res.match = False
else:
res.match = True
return residue_list