Source code for schrodinger.structutils.assignbondorders

"""
A module to assign bond orders based on molecular geometry.

Assigns double and triple bonds to structures based on molecular geometry
(bond length, bond angles, and dihedral angles). Useful when importing
ligands from PDBs into Maestro.   Please check the output structure for
errors and compare it to the molecular formula. If this script assigns
bond orders incorrectly to a reasonable structure, please email the
maestro file of the structure to help@schrodinger.com.

There is a single public function::

    assignbondorders.assign_st(input_st, atoms=None, neutralize=False,
                               problem_only=False, skip_assigned_residues=True,
                               _logger=None)

Assigns bond orders to atom list [atoms] of structure input_st and returns a
list of bonds that were adjusted. Bond orders are assigned for all atoms if the
atoms list is empty or not specified.

Copyright (c) Schrodinger, LLC. All rights reserved.

"""

import io
import os
import warnings
import zipfile

import numpy
from rdkit import Chem

from schrodinger import structure
from schrodinger.comparison import atom_mapper
from schrodinger.infra import structure as infrastructure
from schrodinger.job import util
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.thirdparty import rdkit_adapter
from schrodinger.utils import log

# Check whether SCHRODINGER_PYTHON_DEBUG is set for debugging:
DEBUG = (log.get_environ_log_level() <= log.DEBUG)

logger = log.get_output_logger("schrodinger.structutils.assignbondorders")
if DEBUG:
    logger.setLevel(log.DEBUG)

DEBUG_AROMATIC = True
DEBUG_BOND_SCORE = True
DEBUG_GROUP = True

# WARNING: Tampering with this value may break something else!!!
DOUBLE_BOND_THRESHOLD = 5.0

ASSIGNED_AROMATIC, NOT_AROMATIC, NOT_ASSIGNED = list(range(3))

# Maximum number of bonds that can be made to an atom (by element):
MAX_VALENCE = [
    0, 1, 1, 1, 2, 6, 4, 4, 2, 1, 1, 1, 2, 8, 8, 8, 8, 1, 1, 1, 2, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 1, 1, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 8, 1, 1, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9
]

# FIXME Update these values with a dict for actual bond lengths
# generated by python/test/assignbondorders_test/calc_bond_lengths.py
ATOM_RADII_DICT = {
    1: 0.23,  # H
    5: 0.83,  # B
    6: 0.68,  # C
    7: 0.68,  # N
    8: 0.68,  # O
    9: 0.64,  # F
    14: 1.20,  # Si
    15: 1.05,  # P
    16: 1.02,  # S
    17: 0.99,  # Cl
    33: 1.21,  # As
    34: 1.22,  # Se
    35: 1.21,  # Br
    52: 1.47,  # Te
    53: 1.40,  # I
    # NOTE: Not all of these are used
}

CCD_FILE = os.path.join(util.hunt('mmshare', dir='data'), 'mmpdbx',
                        'cif_components.zip')
assert os.path.isfile(CCD_FILE)
CCD_CACHE = {}
CCD_ATOM_PROPERTY = 's_ppw_CCD_assignment_status'


[docs]def debug(txt): """ For general debug messages """ logger.debug(txt)
[docs]def debuggroup(msg): """ For debugging group bond assignments """ if DEBUG_GROUP: debug(msg)
[docs]def debugbonders(msg): """ For debugging aromatic ring assignments """ if DEBUG_AROMATIC: debug(msg)
[docs]def debugbond(msg): """ For debugging code that calculates bond "scores" """ if DEBUG_BOND_SCORE: debug(msg)
[docs]def warning(msg): # NOTE: For now, print in debug mode only: logger.debug(msg)
[docs]def get_single_bond_length(a1, a2): """ Returns ideal length of a single-order bond between specified atoms. If either of the atoms is non-common, returns 0.0. """ radius1 = ATOM_RADII_DICT.get(a1.atomic_number) radius2 = ATOM_RADII_DICT.get(a2.atomic_number) if not radius1 or not radius2: return 0.0 else: return radius1 + radius2 + 0.1
[docs]def get_neighbors(st, atom): """ Returns a list of atoms that <atom> is bound to.""" return [b.atom2.index for b in st.atom[int(atom)].bond]
[docs]def calculate_average_angle(st, atom_num): """ Calculates the average of all bond angles around the input atom. Returns 0 if the angle can not be calculated. Close to 109 degrees = tetrahedral Close to 120 degrees = planar Close to 180 degrees = linear """ neighbors = get_neighbors(st, atom_num) num_n = len(neighbors) if num_n <= 1: return 0 elif num_n >= 4: return 0 elif num_n == 2: return st.measure(neighbors[0], atom_num, neighbors[1]) elif num_n == 3: angle1 = st.measure(neighbors[0], atom_num, neighbors[1]) angle2 = st.measure(neighbors[0], atom_num, neighbors[2]) angle3 = st.measure(neighbors[1], atom_num, neighbors[2]) return (angle1 + angle2 + angle3) / 3
[docs]def calculate_dihedral(st, a1, a2): """ Calculates the average dihedral angle of the bond between the specified atoms. Close to 0 (zero) is likely to be double bond. """ a1 = int(a1) a2 = int(a2) num_angles = 0 dihedral_angle_sum = 0.0 substituents1 = get_neighbors(st, a1) substituents2 = get_neighbors(st, a2) if a2 not in substituents1 or a1 not in substituents2: return -1 substituents1.remove(a2) substituents2.remove(a1) if len(substituents1) < 1 or len(substituents2) < 1: return -1 for sub1 in substituents1: for sub2 in substituents2: num_angles += 1 angle = st.measure(sub1, a1, a2, sub2) if angle < 0: angle = -angle if angle > 90: angle = 180.0 - angle dihedral_angle_sum += angle return dihedral_angle_sum / num_angles
[docs]def order_ring_or_chain(st, atoms): """ This function orders the atoms in the ring or chain by bonding order. """ atoms_ordered = [] input_num = len(atoms) if len(atoms) <= 2: return atoms # if chain, look for terminal atom terminal = atoms[0] is_ring = 1 for atom_num in atoms: neighbors = get_neighbors(st, atom_num) bondable_neighbors = [] for n in neighbors: if n in atoms: bondable_neighbors.append(n) num_n = len(bondable_neighbors) if num_n == 1: terminal = atom_num is_ring = 0 break atoms_ordered.append(terminal) current_atom = st.atom[terminal] atoms.remove(terminal) if is_ring == 1: if st.areBound(atoms[0], terminal): atoms_ordered.append(atoms[0]) current_atom = st.atom[atoms[0]] atoms.remove(atoms[0]) current_atom_save = 0 while atoms != []: # goes through this code once for every atom next_atom = 0 for ibond in current_atom.bond: atom2 = ibond.atom2 if atom2.index in atoms and atom2.index not in atoms_ordered: atoms_ordered.append(atom2.index) current_atom = atom2 atoms.remove(current_atom.index) next_atom = 1 break if next_atom == 0: if current_atom == current_atom_save and len(atoms) > 0: # sorted the current chain. Move on to the next: atoms_ordered.append(atoms[0]) current_atom = st.atom[atoms[0]] atoms.remove(atoms[0]) current_atom_save = current_atom if input_num != len(atoms_ordered): print(" INPUT AND OUTPUT HAVE DIFFERENT ATOM #!") return atoms_ordered
[docs]def do_rings_share_atoms(r1, r2): for a1 in r1: for a2 in r2: if a1 == a2: return True return False
[docs]def get_rings_in_group(ring, rings_to_check): group = [] group.append(ring) for n1 in rings_to_check: if n1 not in group and do_rings_share_atoms(ring, n1): group.append(n1) for n2 in rings_to_check: if n2 not in group and do_rings_share_atoms(n1, n2): group.append(n2) for n3 in rings_to_check: if n3 not in group and do_rings_share_atoms(n2, n3): group.append(n3) for n4 in rings_to_check: if n4 not in group and do_rings_share_atoms( n3, n4): group.append(n4) return group
[docs]class CCDAtomMapper(atom_mapper.ConnectivityAtomMapper): """ Class for determining most ideal mapping of a CCD SMILES structure to a 3D ligand conformation. """
[docs] def score_mapping(self, ccd_st, st, atset): """ This method is called to "score" a given mapping. Here, both input structures have been re-ordered to have the same atom ordering, and <atset> is a set of atoms that are "shared" between the 2 structures. """ st = st.copy() # Copy bond orders and formal charges from CCD to ST: het_heavy_atoms = list(atset) _assign(ccd_st, het_heavy_atoms, st, het_heavy_atoms) # For now, we only consider valence violations and differences in # chiralities. In the future, bad geometries will be penalized as well, # and eventually perhaps bad interactions with the protein. num_issues = 0 for anum in atset: # Make sure to consider bonds to hydrogens (which are not in atset) # and the protein (covalent bonds), metals, etc. atom = st.atom[anum] total_orders = sum(bond.order for bond in atom.bond) if total_orders > MAX_VALENCE[atom.atomic_number]: num_issues += 1 ccd_ch = ccd_st.atom[anum].chirality st_ch = st.atom[anum].chirality if ccd_ch != st_ch and ccd_ch not in (None, 'undef'): num_issues += 1 return num_issues
[docs]class AssignBondOrders(object):
[docs] def __init__(self, st, atoms, neutralize): """ This is a private class; use assign_st() function instead. """ self.st = st self.neutralize_groups = neutralize # Optimize by making a SET of atoms. Also exclude any hydrogens: self.atoms = set() for a in atoms: if self.st.atom[a].atomic_number != 1: self.atoms.add(a) self.amide_nitrogens = [] debug("Assigning orders to atoms: %s" % self.atoms) self.all_rings = [] self.aromatic_rings = [] self.aromatic_ring_atoms = set() self.aromatic_ring_groups = [] self.bonds_in_6mem_rings = set() # Set of (atom1, atom2). Atom with lowest index appears first self.bonds_in_5mem_rings = set() self.changed = False self.assigned_bonds = []
[docs] def assign(self): debug("Number of atoms in the structure: %i" % self.st.atom_total) # Find atoms with zero-order bonds: # FIXME put this into a separate method eventually? self.zero_bonded_atoms = set() for bond in self.st.bond: if bond.order == 0: self.zero_bonded_atoms.add(bond.atom1.index) self.zero_bonded_atoms.add(bond.atom2.index) debug("assigning templated groups") self.assignTemplatedSubstructures() debug("getting rings") self.findRings() self.findAromaticRings() debug("identifying groups by connectivity") self.identifyGroups() self.assignTripleBonds() self.groupAromaticRings() self.assignAromaticRingOrders() self.assignChainOrders() self.fixKetones() self.fix_N4s() self.st.retype() self.st.retype() # Needs to be run twice to work properly debug("Assigning structure is complete") return self.assigned_bonds
[docs] def assignTemplatedSubstructures(self): """ Force assignment of known functional groups by SMARTS """ groups_list = [ ("Staurosporine core", "[O-0X1][C-0X3]1[N-0X2][C-0X2][C-0X3]2[C-0X3]1[C-0X3]1[C-0X3]3[C-0X2][C-0X2][C-0X2][C-0X2][C-0X3]3[N-0X3][C-0X3]1[C-0X3]1[N-0X3][C-0X3]3[C-0X2][C-0X2][C-0X2][C-0X2][C-0X3]3[C-0X3]12", { (0, 1): 2, (2, 3): 1 }, {}), ( "Heme core", # 1hho "[C-0X3]1[C-0X3][C-0X3]2[C-0X2][C-0X3]3[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]4[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]5[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]1[N-0X3]2)[N-0X3]5)[N-0X3]4)[N-0X3]3", { (0, 1): 2, (2, 3): 2, (4, 5): 2, (6, 7): 2, (8, 9): 2, (10, 11): 2, (12, 22): 2, (13, 14): 2, (15, 16): 2, (17, 18): 2, (19, 20): 2, }, { 21: -1, 23: -1 }, # FIXME Add the Fe to the SMARTS pattern! Otherwise non-templated # structures will not get a +2 charge on the iron. ), ( "Siroheme core (with ZOBs and Iron)", # 1aop "[C-0X4]1[C-0X3][C-0X3]2[C-0X2][C-0X3]3[C-0X4][C-0X3][C-0X3]4[C-0X2][C-0X3]5[C-0X3][C-0X3][C-0X3]6[C-0X2][C-0X3]7[C-0X3][C-0X3][C-0X3]8[C-0X2][C-0X3]1[N-0X3]2_[Fe-0X4](_[N-0X3]78)(_[N-0X3]34)_[N-0X3]56", { (2, 20): 2, (3, 4): 2, (7, 8): 2, (9, 23): 2, (10, 11): 2, (12, 13): 2, (14, 15): 2, (16, 17): 2, (18, 19): 2, }, { 22: -1, 23: -1, 21: +2 }, # FIXME Consider adding support for other metals. ), ( "Siroheme core (without ZOBs nor Iron)", # 1aop, during PPW workflow "[C-0X4]1[C-0X3][C-0X3]2[C-0X2][C-0X3]3[C-0X4][C-0X3][C-0X3]([C-0X2][C-0X3]4[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]5[C-0X3][C-0X3][C-0X3]([C-0X2][C-0X3]1[N-0X3]2)[N-0X3]5)[N-0X3]4)[N-0X3]3", { (2, 20): 2, (3, 4): 2, (7, 8): 2, (9, 22): 2, (10, 11): 2, (12, 13): 2, (14, 15): 2, (16, 17): 2, (18, 19): 2, }, { 21: -1, 23: -1 }, ), ( "Cobalamin core", # 7req "[C-0X3]1[C-0X4][C-0X3]2[C-0X3][C-0X3]3[C-0X3][C-0X4][C-0X4]([N-0X3]3)[C-0X3]3[C-0X3][C-0X4][C-0X3]([C-0X3][C-0X3]4[C-0X3][C-0X4][C-0X3]([C-0X2][C-0X3]1[N-0X3]2)[N-0X3]4)[N-0X3]3", { (2, 3): 2, (4, 8): 2, (12, 13): 2, (14, 21): 2, (17, 18): 2, (19, 20): 2, }, { 22: -1 }, ), ] regenerate_tmp_st = True for name, smarts, bonds, charges in groups_list: if regenerate_tmp_st: # Create a CT with only the atoms that need to be assigned: assigned_atoms = [] for anum in range(1, self.st.atom_total + 1): if anum not in self.atoms: assigned_atoms.append(anum) tmp_st = self.st.copy() renumber_map = tmp_st.deleteAtoms(assigned_atoms, renumber_map=True) # Invert the renumber map dict: reverted_renumber_map = {} for anum in range(1, self.st.atom_total + 1): new_atomnum = renumber_map[anum] if new_atomnum is not None: reverted_renumber_map[new_atomnum] = anum regenerate_tmp_st = False # NOTE: To use evaluate_smarts_canvas() here, we need to rewrite # some of the SMARTS patterns. with warnings.catch_warnings(): warnings.filterwarnings("ignore", message=".*evaluate_smarts()", category=DeprecationWarning) sets = analyze.evaluate_smarts(tmp_st, smarts, unique_sets=True) for atom_set in sets: debug("MATCHED %s: %s" % (name, atom_set)) # To make implementing new patterns easier: if DEBUG and False: for i, anum in enumerate(atom_set): self.st.atom[anum].pdbname = str(i) for index_atom, value in charges.items(): atom = reverted_renumber_map[atom_set[index_atom]] if atom not in self.atoms: # This atom already matched this pattern, and was assigned # (import for heme-like groups) continue self.setFormalCharge(atom, value) # Make sure this atom does not get re-assigned: self.atoms.remove(atom) regenerate_tmp_st = True for (index_atom1, index_atom2), value in bonds.items(): atom1 = reverted_renumber_map[atom_set[index_atom1]] atom2 = reverted_renumber_map[atom_set[index_atom2]] if atom1 not in self.atoms or atom2 not in self.atoms: # These atoms already matched this pattern, and were assigned # (import for heme-like groups) continue # if atom1 in self.atoms and atom2 in self.atoms: self.setBondOrder(atom1, atom2, value) # Make sure these atoms do not get re-assigned # (importnat especially if the bond was forced to be # single): self.atoms.remove(atom1) self.atoms.remove(atom2) regenerate_tmp_st = True
[docs] def isOnlySingleBonded(self, atom): """ Returns False if atom has double or triple bonds. """ for bond in self.st.atom[int(atom)].bond: if bond.order > 1: return False return True
[docs] def getNeighbors(self, atom): """ Returns a list of atoms that <atomnum> is bound to.""" if type(atom) == type(1): atom = self.st.atom[atom] return [n.index for n in atom.bonded_atoms]
[docs] def getNeighborsObj(self, atom): """ Returns a list of atoms that <atomnum> is bound to.""" if type(atom) == type(1): atom = self.st.atom[atom] return list(atom.bonded_atoms)
[docs] def getNumNeighbors(self, atom): """ Returns a the number of atoms that <atom> is bound to. EXCLUDING zero-order bonds """ # Ev:100421 if type(atom) == type(1): # Convert atom number to atom object atom = self.st.atom[atom] n = 0 for bond in atom.bond: if bond.order > 0: n += 1 return n
[docs] def getNumBondOrders(self, atom): if type(atom) == type(1): atom = self.st.atom[atom] n = 0 for bond in atom.bond: n += bond.order return n
[docs] def getElement(self, atomnum): return self.st.atom[atomnum].element
[docs] def fixFormalCharge(self, atom): """ Adjusts the formal charge so that it is valid for number of bonds and bond orders. """ # NOTE hydrogens may or may not be present!!!! if type(atom) == type(1): atom = self.st.atom[atom] # Number of bonds the atom is involved with: # (double bonds counted as 2, triple as 3) num_bonds = 0 for bond in atom.bond: num_bonds += bond.order element = atom.element if DEBUG: print(' fixFormalChrage() for atom', atom, 'num_bonds:', num_bonds, 'element:', element) # Ev:69974 For now, only special case for carbon monoxide is present: if element == 'O': if num_bonds == 3: atom.formal_charge = +1 if DEBUG: print(' fixFormalChrage() changed charge to +1') elif element == 'C': if num_bonds == 3: for n in atom.bonded_atoms: if n.formal_charge == +1: atom.formal_charge = -1 if DEBUG: print( ' fixFormalChrage() changed charge to -1')
[docs] def setFormalCharge(self, atom, charge): if type(atom) == type(1): atom = self.st.atom[atom] atom.formal_charge = charge if charge > 0: charge_str = "+%i" % charge else: charge_str = "%i" % charge debug(" Set formal charge of %i to %s" % (atom, charge_str))
[docs] def setBondOrder(self, atom1, atom2, bond_order): """ Sets the bond order between atom1 and atom2 to bond_order.""" atom1 = int(atom1) atom2 = int(atom2) bond = self.st.getBond(atom1, atom2) if not bond: raise RuntimeError("setBondOrder(): Invalid bond: %s %s" % (atom1, atom2)) bond.order = bond_order debug(" Set bond %i-%i order to %i" % (atom1, atom2, bond_order)) if atom1 < atom2: self.assigned_bonds.append((atom1, atom2, bond_order)) else: self.assigned_bonds.append((atom2, atom1, bond_order)) # Ev:69974 if bond_order == 3: # For triple bonds only for now # Charge of one may depend on charge of the other, so need 3: self.fixFormalCharge(atom1) self.fixFormalCharge(atom2) self.fixFormalCharge(atom1) # PPREP-1047 If any of these atoms is a zero-order bonded carbon with 3 # total orders, then assign a negative charge to it: for atom in (atom1, atom2): if atom in self.zero_bonded_atoms \ and self.st.atom[atom].element == 'C' \ and self.getNumBondOrders(atom) == 3: self.setFormalCharge(atom, -1)
[docs] def getAtomsRings(self, atom): """ Return a list of rings that this atom is part of. """ atom_rings = [] anum = int(atom) for ring in self.all_rings: if anum in ring: atom_rings.append(ring) return atom_rings
[docs] def isAtomInRing(self, atom): anum = atom.index for ring in self.all_rings: if anum in ring: return True return False
[docs] def isAtomInSmallRing(self, atom_num): """ Returns 1 if atom_num is a ring with 3, 4, or 5 members. """ debug_small_ring = False if debug_small_ring: print(" isAtomInSmallRing() atom =", atom_num) for n1 in self.getNeighbors(atom_num): if debug_small_ring: print(" n1 =", n1) for n2 in self.getNeighbors(n1): if n2 == atom_num: continue if debug_small_ring: print(" n2 =", n2) for n3 in self.getNeighbors(n2): if n3 == n1: continue if debug_small_ring: print(" n3 =", n3) if n3 == atom_num: return True for n4 in self.getNeighbors(n3): if n4 == n2: continue if debug_small_ring: print(" n4 =", n4) if n4 == atom_num: return True for n5 in self.getNeighbors(n4): if n5 == n3: continue if debug_small_ring: print(" n5 =", n5) if n5 == atom_num: return True return False
[docs] def getExtracyclicAtom(self, ring, a): """ Given an atom of a ring, return the extra-cyclic atom that is bound to this atom. """ other_atom = None for n in a.bonded_atoms: if n.index not in ring: if other_atom is not None: raise ValueError( "Given atom must have only 1 extra cyclic bond") other_atom = n return other_atom.index
[docs] def forEdgeInRing(self, ring): """ Iterates over (atom1, atom2) in the given ring """ # Because -1 is the last item in the list. for i, a1 in enumerate(ring, start=-1): yield a1, ring[i]
[docs] def calcBondScore(self, a1, a2, distance_weight=1.0, ring_angle_weight=0.0): """ Calculates the probability that the two atoms are double bonded to each other. """ st = self.st a1 = int(a1) a2 = int(a2) atom1 = st.atom[a1] atom2 = st.atom[a2] a1_neighbors = self.getNeighbors(atom1) a2_neighbors = self.getNeighbors(atom2) atom1_nn = len(a1_neighbors) atom2_nn = len(a2_neighbors) # Ev:88246 and Ev:122006 Decrease the distance weight for 5 membered rings: mem5ring = tuple(sorted((a1, a2))) in self.bonds_in_5mem_rings if mem5ring: distance_weight -= 0.4 # Decrease the distance weight for 6 membered rings slightly: mem6ring = tuple(sorted((a1, a2))) in self.bonds_in_6mem_rings if mem6ring: distance_weight -= 0.2 bond_score = 0.0 debugbond('\n CALCULATING BOND SCORE FOR BOND: %s %s' % (a1, a2)) if not st.areBound(a1, a2): debugbond( " ERROR! calcBondScore(): The input atoms %s & %s are not bonded!!!!!" % (a1, a2)) return -1 a1_angle = calculate_average_angle(st, a1) a2_angle = calculate_average_angle(st, a2) # If one of the atoms is already double bonded to something, then it # can not double bond to anything else unless it is sp hybridized, in # which case the bond angle wll be close to 180 degrees: if not self.isOnlySingleBonded(a1): if a1_angle < 155: # exception C=C=C and N=N+=N debugbond( ' ** bond score is 0, because average angle of atom %s is < 155' % a1) return 0 if not self.isOnlySingleBonded(a2): if a2_angle < 155: # exception C=C=C and N=N+=N debugbond( ' ** bond score is 0, because average angle of atom %s is < 155' % a2) return 0 # If Oxygen or Sulfur is already bound twice, then they can't bond again. if atom1.element in ['O', 'S']: if len(a1_neighbors) == 2: # bonded to the fullest if not mem5ring: # Ev:88246 debugbond( ' ** bond score is 0, because atom %s (%s) is already bonded twice' % (a1, atom1.element)) return 0 if atom2.element in ['O', 'S']: if len(a2_neighbors) == 2: # bonded to the fullest if not mem5ring: # Ev:88246 debugbond( ' ** bond score is 0, because atom %s (%s) is already bonded twice' % (a2, atom2.element)) return 0 # Punish Amides: amideN_present = False amideN_is_tertiary = False # Whether this amide is tertiary if atom1.element == 'N': for n in a1_neighbors: if n != a2 and st.atom[n].element == 'C': nns = self.getNeighbors(n) for nn in nns: if nn != a1 and st.atom[nn].element == 'O': if not self.isOnlySingleBonded(nn): amideN_present = True if self.getNumBondOrders(atom1) == 3: amideN_is_tertiary = True break if atom2.element == 'N' and not amideN_present: for n in a2_neighbors: if n != a1 and st.atom[n].element == 'C': nns = self.getNeighbors(n) for nn in nns: if nn != a2 and st.atom[nn].element == 'O': if not self.isOnlySingleBonded(nn): amideN_present = True if self.getNumBondOrders(atom2) == 3: amideN_is_tertiary = True break if amideN_present: delta = -4.0 if amideN_is_tertiary: delta -= 3.0 bond_score += delta debugbond(" Delta accounting for amideN presence: %.2f" % delta) # Account for the bond length: bond_dist = st.measure(a1, a2) # the sum of the radii of atom1 and atom2. # If the bond length is less than this value, the bond is likely double # Zero is invalid atom elements: dist_threshold = get_single_bond_length(atom1, atom2) if dist_threshold == 0.0: print("WARNING: get_single_bond_length() failed for atoms:", a1, a2) print(" Atom elements:", atom1.element, atom2.element) debugbond(" dist= %s " % bond_dist + " threshold= %s" % dist_threshold) # dist_threshold - bonds longer than this are likely to be single bonds; # bonds shorter are likely to be double bonds. dist_diff = bond_dist - dist_threshold if dist_diff < -0.2: # Do not over-reward very short bonds: extra_diff = -(dist_diff + 0.2) delta = (0.2 * 70.0 + extra_diff * 30.0) elif dist_diff < -0.1: # Reward short bonds: delta = -(dist_diff * 70.0) # FIXME penalize the first 0.1A less elif dist_diff < 0.0: # Smaller reward for slightly short bonds: delta = -(dist_diff * 60.0) else: # Penalize long bonds: delta = -(dist_diff * 80.0) delta *= distance_weight bond_score += delta debugbond(" Delta accounting for bond length: %.2f" % delta) # FIXME # Decrease the weight if abs_diff is small # Increase the weight if abs_diff is large # Account for the dihedral angle bond_dihedral = calculate_dihedral(st, a1, a2) delta = 0.0 if bond_dihedral == -1: debugbond( " Could not calculate the dihedral angle for bond %i-%i" % (a1, a2)) elif bond_dihedral < 10.0: delta += (10.0 - bond_dihedral) / 2 elif bond_dihedral > 10.0: delta -= (bond_dihedral - 10) / 3 if delta: bond_score += delta debugbond(" Delta accounting for dihedral: %f" % delta) tertiary_nitrogen_punishment = 7.0 # See PPREP-940 terminal_nitrogen_punishment = 6.5 # See PPREP-715 terminal_nitrogen_on_ring_punishment = 5.0 # See PPREP-715 secondary_nitrogen_punishment = 0.5 # Punishing bonds with nitrogens (especially tertiary, which would create # a nitrogen with a positive charge): delta = 0.0 if atom1.element == 'N' and atom2.element == 'N': if not self.isBondRingEdge(atom1, atom2) \ and not self.isAtomBoundToAromaticRing(atom1) \ and not self.isAtomBoundToAromaticRing(atom2): # Double bonds between nitrogens are not very stable (unless they are # in an aromatic ring, see PPREP-1004) or bound to aromatic rings, # see PPREP-1014. delta = -10.0 # NOTE: Do not increase this penalty (test will fail) else: if atom1.element == 'N': n = atom1 n_count = atom1_nn other = atom2 elif atom2.element == 'N': n = atom2 n_count = atom2_nn other = atom1 else: n = None if n: # One of the atoms in this bond is a nitrogen if n_count == 3: delta -= tertiary_nitrogen_punishment elif n_count == 1: # Fine-tuning for PPREP-715 if self.isAtomInRing(other): delta -= terminal_nitrogen_on_ring_punishment else: delta -= terminal_nitrogen_punishment else: delta -= secondary_nitrogen_punishment if delta: bond_score += delta debugbond( " Delta accounting for penalizing of special nitrogens %s " % delta) # The bond angle is more reliable by this factor if the atom has 3 neighbors rather than 2: angle_3n_weight = 1.7 angle_2n_weight = 0.1 # If the bond angle can be calculated, reward or penalize it: for anum, angle, nn in ((a1, a1_angle, atom1_nn), (a2, a2_angle, atom2_nn)): if not angle: continue diff = angle - 117.0 # Standard reward for positive <diff> and penalty for negative: if nn == 3: # Angle is more reliable if atom has 3 neighbors: if diff > 2.5: # This atom is pretty flat, very likely it's involved in a # double bond, so increase the weight of diff after the # first 2.5: extra_diff = diff - 2.5 diff += (extra_diff * 3.0) angle_reward = diff * angle_3n_weight elif self.isAtomInSmallRing(anum): angle_reward = diff * angle_2n_weight * ring_angle_weight else: angle_reward = diff * angle_2n_weight bond_score += angle_reward debugbond(" Delta accounting for atom %s angle (%f): %f" % (anum, angle, angle_reward)) debugbond(" bond_dist=" + str(bond_dist) + " dihedral=" + str(bond_dihedral) + " a1_nn=" + str(atom1_nn) + " a2_nn=" + str(atom2_nn)) debugbond(" a1_angle=" + str(a1_angle) + " a2_angle=" + str(a2_angle)) debugbond(" --- Bond_score for %d-%s = %s" % (a1, atom2.index, bond_score)) return bond_score
[docs] def getBestDoubleBond(self, a1, a2, a3): """ Returns a list of two atoms that are the best candidates. Returns [] if neither bond is potential double bond. """ st = self.st debug("\ngetBestDoubleBond(%i, %i, %i):" % (a1, a2, a3)) # If both bonds are under DOUBLE_BOND_THRESHOLD, [] is returned bond1_score = self.calcBondScore(a1, a2) bond2_score = self.calcBondScore(a2, a3) debug(" bond %s-%s score: %.2f" % (a1, a2, bond1_score)) debug(" bond %s-%s score: %.2f" % (a2, a3, bond2_score)) if bond1_score == -1: msg = " ABO ERROR! getBestDoubleBond(): Atoms are not bonded: %i, %i" % ( a2, a3) raise ValueError(msg) if bond2_score == -1: msg = " ABO ERROR! getBestDoubleBond(): Atoms are not bonded: %i, %i" % ( a2, a3) raise ValueError(msg) # Positively charged nitrogen REVERSE penalty: a1_posnit = st.atom[a1].element == 'N' and self.getNumNeighbors(a1) == 3 a3_posnit = st.atom[a3].element == 'N' and self.getNumNeighbors(a3) == 3 if a1_posnit and not a3_posnit: debug(" Penalizing bond (tertiary N present): %s-%s" % (a1, a2)) bond1_score -= 2.0 elif a3_posnit and not a1_posnit: debug(" Penalizing bond (tertiary N present): %s-%s" % (a2, a3)) bond2_score -= 2.0 if bond1_score >= bond2_score and bond1_score > DOUBLE_BOND_THRESHOLD: return [a1, a2] elif bond2_score > bond1_score and bond2_score > DOUBLE_BOND_THRESHOLD: return [a2, a3] else: debug(" getBestDoubleBond(): Neither is double bond") return []
[docs] def sortFirstThreeOfGroup(self, atom_group): """ This function sorts the atoms in the group so that the 3 (or 4) atoms that need to be worked on the first will be listed first.""" debuggroup("\nsortFirstThreeOfGroup(): %s" % atom_group) st = self.st terminals = [] linears = [] branches = [] sorted_group = [] neighbors_dict = {} input_len = len(atom_group) if len(atom_group) < 2: return [] # Remove any atoms that are disconneted from the rest of the atoms in # the group: _group_excluding_singles = [] for a in atom_group: neighbors = self.getNeighbors(a) bondable_neighbors = [] for n in neighbors: if n in atom_group: bondable_neighbors.append(n) if len(bondable_neighbors) != 0: _group_excluding_singles.append(a) atom_group = _group_excluding_singles if len(atom_group) <= 1: debuggroup( " ERROR in sortFirstThreeOfGroup()!!! small atom_group!") return [] for atom_num in atom_group: neighbors = self.getNeighbors(atom_num) bondable_neighbors = [] for n in neighbors: if n in atom_group: bondable_neighbors.append(n) neighbors_dict[atom_num] = bondable_neighbors num_n = len(bondable_neighbors) if num_n == 1: terminals.append(atom_num) elif num_n == 2: linears.append(atom_num) elif num_n == 3: branches.append(atom_num) elif num_n == 0: debug(" ERROR!! num_n =0 of atom :" + str(atom_num)) else: debug(" ERROR in sortFirstThreeOfGroup()!!!!!!!") return [] # Sort linears if len(atom_group) <= 2: return atom_group if terminals == [] and branches == []: debuggroup(" the atoms are in a ring!!!") ring = order_ring_or_chain(st, atom_group) return ring # Search for a terminal next to a linear. If exists, add them to the list first # finds only the first occurance. for terminal in terminals: if terminal in sorted_group: break for linear in neighbors_dict[terminal]: if linear in linears and linear not in sorted_group: sorted_group.append(terminal) sorted_group.append(linear) linears_neighbors = neighbors_dict[linear] for linear_n in linears_neighbors: if linear_n != terminal and linear_n not in sorted_group: sorted_group.append(linear_n) break break if not sorted_group: debuggroup("No linears next to terminal!") # search for branch bound to two terminals. Make branch go first for branch in branches: neighbors = neighbors_dict[branch] if neighbors[0] in terminals and neighbors[1] in terminals: sorted_group.append(branch) sorted_group.append(neighbors[0]) sorted_group.append(neighbors[1]) sorted_group.append(neighbors[2]) break elif neighbors[1] in terminals and neighbors[2] in terminals: sorted_group.append(branch) sorted_group.append(neighbors[1]) sorted_group.append(neighbors[2]) sorted_group.append(neighbors[0]) break elif neighbors[0] in terminals and neighbors[2] in terminals: sorted_group.append(branch) sorted_group.append(neighbors[0]) sorted_group.append(neighbors[2]) sorted_group.append(neighbors[1]) break if not sorted_group: # search for a branch surrounded by one terminal: terminal_n = None for branch in branches: neighbors = neighbors_dict[branch] if len(neighbors): for n in neighbors: if n in terminals: terminal_n = n break if terminal_n: break if terminal_n: sorted_group.append(branch) sorted_group.append(n) for other_n in neighbors: if other_n != n: sorted_group.append(other_n) debuggroup(" Added branch: %i (%i is terminal)" % (branch, n)) if not sorted_group and len(branches) > 0: debuggroup( "Only branches bonded to linears and other branches are left!!!!" ) # search for any other branch... branch = branches[0] neighbors = neighbors_dict[branch] sorted_group.append(branch) sorted_group.append(neighbors[0]) sorted_group.append(neighbors[1]) sorted_group.append(neighbors[2]) if not sorted_group and len(atom_group) > 4: debug( " ERROR !!!! len(sorted_group) == 0 and len(atom_group) > 4!!!" ) # Append other atoms to the group: for atom_num in atom_group: if atom_num not in sorted_group: sorted_group.append(atom_num) if input_len < len(sorted_group): debug( "ERROR in sortFirstThreeOfGroup()!!!! Input length < output length !!!" ) return sorted_group
[docs] def resolveTriangularGroup(self, atoms): """ Assign a double bond to "best" bond pair in the set. If any bond score is under the double bond threshold and is also "terminal", remove it from further consideration for double bonding. Input atom list must be sorted: the first atom in the list must be central, and the three next atoms are bonded to it in a star-like arrangement. If all three neighbors have bonds scores above the threshold, the one with the highest score is turned into a double bond. Else, the one with the lowest score is removed from the set to avoid infinite recursion. In case of symmetrical inputs, 2 or more bonds may have the same length, etc, and ties may happen. We need a non-geometrical criterion to resolve ties in a reproducible manner. For this reason, use atom indexes as a secondary criterion. """ DOUBLE_BONDING_SCORE_THRESHOLD = 4 central_atom = atoms[0] bond_scores = [] for neighbor in atoms[1:4]: bond_score = self.calcBondScore(central_atom, neighbor) bond_scores.append((bond_score, neighbor)) if bond_score < DOUBLE_BONDING_SCORE_THRESHOLD: # If this bond has low score, and is not bound to the rest of the # atoms in the group (it's "terminal") leave this bond a single bond. if set(self.getNeighbors(neighbor)).intersection(atoms) == { central_atom }: atoms.remove(neighbor) return atoms # If all 3 bonds are "good enough" to be considered for a double # bond, pick the one with the best score and remove both ends: bond_scores.sort() lowest_score, lowest_neighbor = bond_scores[0] if lowest_score >= DOUBLE_BONDING_SCORE_THRESHOLD: # All bonds in list are suited to be made double. # Choose the one with highest score: highest_score, highest_neighbor = bond_scores[-1] self.setBondOrder(central_atom, highest_neighbor, 2) atoms.remove(central_atom) atoms.remove(highest_neighbor) else: # None of the bonds are "good enough" to be made a double bond. # Remove neighbor that would form the lowest-scoring double bond # from consideration for the next iteration. atoms.remove(lowest_neighbor) return atoms
[docs] def assignGroupDoubleBonds(self, atom_set): """ Assigns double bonds where appropriate for the atoms in the supplied group. """ if len(atom_set) < 2: return atom_set = self.sortFirstThreeOfGroup(atom_set) debug( " self.assignGroupDoubleBonds() input after sorting first 3: %s" % atom_set) if len(atom_set) < 2: # One sp2 atom by itself. Don't bond. return elif len(atom_set) == 2: # 2 potential double-bonding atoms together bond_score = self.calcBondScore(atom_set[0], atom_set[1]) if bond_score > DOUBLE_BOND_THRESHOLD: debug(" Adding double bond between " + str(atom_set[0]) + " and " + str(atom_set[1])) self.setBondOrder(atom_set[0], atom_set[1], 2) return elif len(atom_set) == 3: # 3 potential double-bond atoms together. best_bond = self.getBestDoubleBond(atom_set[0], atom_set[1], atom_set[2]) if best_bond != []: self.setBondOrder(best_bond[0], best_bond[1], 2) return else: # 4 or more atoms if len(atom_set) < 4: debug("ERROR !!! len(atoms) < 4 !!!!") return st = self.st # Triangular arrangement if st.areBound(atom_set[0], atom_set[1]) and \ st.areBound(atom_set[0], atom_set[2]) and \ st.areBound(atom_set[0], atom_set[3]): atom_set = self.resolveTriangularGroup(atom_set) # The first four atoms in the set have been processed. Now, recurse # into this method again to move down the atom set # (THIS CODE MAY HAVE MAJOR PROBLEMS) self.assignGroupDoubleBonds(atom_set) return # NOT triangular arrangement: # Check to see if all of these atoms form a ring. in_ring = 1 for atom_num in atom_set: neighbors = self.getNeighbors(atom_num) bondable_neighbors = [] for n in neighbors: if n in atom_set: bondable_neighbors.append(n) num_n = len(bondable_neighbors) if num_n == 1 or num_n == 3: in_ring = 0 break elif num_n <= 0 or num_n > 3: debug(" ERROR!! num_n <=0 or > 3!: %s" % atom_num) if in_ring: debug(" THE ATOMS ARE IN A RING!!!") debug(" ring=%s" % atom_set) try: best_bond_of_1_2 = self.getBestDoubleBond( atom_set[0], atom_set[1], atom_set[2]) except: debug(" ERROR from getBestDoubleBond() ") return atoms = atom_set[:] # modify a copy if in_ring: try: best_bond_of_2_3 = self.getBestDoubleBond( atom_set[1], atom_set[2], atom_set[3]) if in_ring and best_bond_of_1_2 == best_bond_of_2_3: self.setBondOrder(best_bond_of_1_2[0], best_bond_of_1_2[1], 2) atoms.pop(1) atoms.pop(1) self.assignGroupDoubleBonds(atoms) return except: debug(" ERROR from getBestDoubleBond() ") if best_bond_of_1_2 == []: debuggroup(" getBestDoubleBond() returned []!!!") # none are double bonds. remove first two atoms or the second atom atoms.pop(1) self.assignGroupDoubleBonds(atoms) return first2atoms = [atoms[0], atoms[1]] if best_bond_of_1_2 == first2atoms: if in_ring: # reorder the atoms debuggroup( " first two atoms could double bond, but it's a ring, therefore reordering atoms" ) atom0 = atoms[0] atoms.pop(0) atoms.append(atom0) else: debuggroup( " first two atoms should double bond! adding bond") self.setBondOrder(best_bond_of_1_2[0], best_bond_of_1_2[1], 2) atoms.pop(0) atoms.pop(0) self.assignGroupDoubleBonds(atoms) return else: # the second bond should be a double bond if in_ring: atom0 = atoms[0] atom1 = atoms[1] atoms.pop(0) atoms.pop(0) atoms.append(atom0) atoms.append(atom1) debuggroup(" first two atoms should NOT double bond") atoms.pop(0) self.assignGroupDoubleBonds(atoms) return
[docs] def identifyGroups(self): """ Identifies groups and sets bond orders appropriately. """ # TODO create a function for each group here: def _handle_nitroso(r, n, o): if not self.getNumNeighbors(o) == 1: return if self.isAtomAromatic(r): # FIXME In the future we should expand into detecting # nitronos in cases where aromatic rings are not involved. self.setBondOrder(n, o, 2) flip = 0 # for debugging purposes debug("\nIdentifying functional groups...") for a in self.atoms: atom = self.st.atom[a] element = atom.element # Ignore atoms that are not first atoms in templates: if element not in ['C', 'N', 'P', 'S', 'As']: continue # Ignore the atom if it already has double bonds: if not self.isOnlySingleBonded(atom): continue neighbors = [] for natom in atom.bonded_atoms: neighbors.append(natom) num_bonds = len(neighbors) # FIRST ATOM IS NITROGEN: if element == 'N': # Nitrate groups: if num_bonds == 3: oxygens = [] for natom in neighbors: if natom.element in ['O', 'S']: if len(natom.bond) == 1: oxygens.append(natom.index) if len(oxygens) == 2: if flip: # for debugging purposes tmp = oxygens[0] oxygens[0] = oxygens[1] oxygens[1] = tmp ox0 = self.st.atom[oxygens[0]] ox1 = self.st.atom[oxygens[1]] debug("Group: OM oxygen") # Decide which Oxygen to double bond to... if ox0.formal_charge == -1 or ox0.atom_type == 18: self.setBondOrder(a, oxygens[1], 2) if 1: # not self.neutralize_groups: ox0.atom_type = 18 # OM ox0.formal_charge = -1 else: self.setBondOrder(a, oxygens[0], 2) if 1: # not self.neutralize_groups: ox1.atom_type = 18 # OM ox1.formal_charge = -1 atom.atom_type = 31 atom.formal_charge = 1 continue if num_bonds == 2: # R-N-R n1 = neighbors[0] n2 = neighbors[1] if n1.element == 'N' and n2.element == 'N': # Azide R--N=N=N debug("Group Azide R--N=N=N") angle = self.st.measure(n1, a, n2) if angle > 150: self.setBondOrder(n1, a, 2) self.setBondOrder(a, n2, 2) # PPREP-917 Correct the formal charges: self.setFormalCharge(atom, +1) # Add negative charges to side nitrogens (if single-bonded): for nn in (n1, n2): # NOTE: atoms ZOB-bonded are NOT considered neighbors if self.getNumNeighbors(nn) == 1: self.setFormalCharge(nn, -1) elif n1.element == 'O': _handle_nitroso(n2, a, n1) elif n2.element == 'O': _handle_nitroso(n1, a, n2) # FIRST ATOM IS CARBON elif element == 'C': # Caboxylic Acids: if num_bonds <= 3: c_num = a # find all OX1's oxygens = [] for n in neighbors: if n.element == 'O': if self.getNumNeighbors(n) == 1: oxygens.append(n.index) if len(oxygens) >= 2: debug("Group: Carboxylic Acid") if flip: # for debugging purposes tmp = oxygens[0] oxygens[0] = oxygens[1] oxygens[1] = tmp # Carboxylic Acid - C(=O)-OH ### ox0 = self.st.atom[oxygens[0]] ox1 = self.st.atom[oxygens[1]] # Decide which Oxygen to double bond to... if ox0.formal_charge == -1 or ox0.atom_type == 18: self.setBondOrder(c_num, oxygens[1], 2) if not self.neutralize_groups: ox0.atom_type = 18 ox0.formal_charge = -1 else: self.setBondOrder(c_num, oxygens[0], 2) if not self.neutralize_groups: ox1.atom_type = 18 ox1.formal_charge = -1 continue # Amides/Esters no2s = [] for natom in neighbors: if natom.element in ['O', 'S']: if len(natom.bond) > 1: no2s.append(natom.index) elif natom.element == 'N': no2s.append(natom.index) if len(oxygens) == 1 and len(no2s) >= 1: if not self.isAtomAromatic(c_num): o_num = oxygens[0] no2_num1 = no2s[0] c_bond_angle = calculate_average_angle( self.st, c_num) length = self.st.measure(c_num, o_num) if (length < 1.36 and c_bond_angle > 109) or (c_bond_angle > 115): # See Ev:118556 debug("Group: Amide/ester") self.setBondOrder(c_num, o_num, 2) n = self.st.atom[no2_num1] if n.element == 'N': self.amide_nitrogens.append(n.index) continue # Searching for Amidines/Guanidines: # R # \ # C=NH # / # NH2 terminal_nitrogen_neighbors = [] for n in self.getNeighbors(a): if self.st.atom[n].element == 'N': nonhcount = 0 for nn in self.getNeighbors(n): if self.st.atom[nn].atomic_number != 1: nonhcount += 1 if nonhcount == 1: terminal_nitrogen_neighbors.append(n) if len(terminal_nitrogen_neighbors) >= 2: if flip: # For debugging purposes. dn = terminal_nitrogen_neighbors[1] else: dn = terminal_nitrogen_neighbors[0] debug("Group: Amidine/Guanidine") self.setBondOrder(c_num, dn, 2) if self.getNumNeighbors(dn) == 1: # Ev:120911 This nitrogen is not bound to anything; # add a hydrogen to it: self.st.atom[dn].formal_charge = 1 continue # Ketones if len(oxygens) == 1: ox_atom = self.st.atom[oxygens[0]] the_bond = ox_atom.bond[1] bond_length = self.st.measure(the_bond.atom1, the_bond.atom2) c_bond_angle = calculate_average_angle(self.st, c_num) bond_likely_double = False if c_bond_angle > 115 and c_bond_angle < 150: if bond_length < 1.3: bond_likely_double = True c_is_aromatic = self.isAtomAromatic(c_num) if c_is_aromatic == 0 and bond_likely_double: debug("Group: Ketone") self.setBondOrder(c_num, ox_atom, 2) continue # Phosphates # For all single bonded Oxygens, make one double bonded and rest to OM (O-) elif element == 'P': # atom (a) is a Phosphorus without double bonds to anything p_atom = a one_set_to_double = False for n in atom.bonded_atoms: if n.element == 'O' and self.getNumNeighbors(n) == 1: if not one_set_to_double: debug("Group: Phosphate") self.setBondOrder(p_atom, n, 2) one_set_to_double = True else: n.atom_type = 18 # OM n.formal_charge = -1 # Arsenate # For all single bonded Oxygens, make one double bonded and rest to OM (O-) elif element == 'As' and num_bonds == 4: # atom (a) is a Arsenic without double bonds to anything as_atom = a one_set_to_double = False for n in atom.bonded_atoms: if n.element == 'O' and self.getNumNeighbors(n) == 1: if not one_set_to_double: debug("Group: Arsenate") self.setBondOrder(as_atom, n, 2) one_set_to_double = True else: n.atom_type = 18 # OM n.formal_charge = -1 elif element == 'S': # Sulfones and Thioamides (Ev:53894): # R # \ # C=S # / # N s_atom = a o_neighbors = [] for n in neighbors: if n.element == 'O' and self.getNumNeighbors(n) == 1: o_neighbors.append(n.index) if len(o_neighbors) == 2: # R # O=S=O # R debug("Group: Sulfone") self.setBondOrder(s_atom, o_neighbors[0], 2) self.setBondOrder(s_atom, o_neighbors[1], 2) continue elif len(o_neighbors) == 3: # O # O=S=O # R debug("Group: Sulfone") # Ev:128301 Do not add double bond to the oxygen that has a negative charge num_bonds_incremented = 0 for oatom in o_neighbors: if self.st.atom[oatom].formal_charge == 0: self.setBondOrder(s_atom, oatom, 2) num_bonds_incremented += 1 if num_bonds_incremented == 2: break # 2 dobule bonds is enough continue elif len(o_neighbors) == 1: if self.getNumNeighbors(s_atom) > 2: debug("Group: R-S(=O)-R") self.setBondOrder(a, o_neighbors[0], 2) elif num_bonds == 1 and neighbors[0].element == 'C': carbon = neighbors[0] # Go through each neighbor of the carbon that's bonded to the sulfur: for ns_n in carbon.bonded_atoms: if ns_n.element == 'N': if self.getNumNeighbors(carbon) == 2: # Arrangement: S-C-N c_bond_angle = calculate_average_angle( self.st, carbon) if c_bond_angle > 160: # Ev:117941 debug("Group: S=C=N (linear), angle: %.2f" % c_bond_angle) self.setBondOrder(carbon, ns_n, 2) self.setBondOrder(s_atom, carbon, 2) if len(ns_n.bond) == 1: # Ev:117941 debug( " Nitrogen is terminal (Thiocynate)" ) ns_n.formal_charge = -1 break debug("Group: Thioamide - NC(=S)R") self.setBondOrder(s_atom, carbon, 2) break debug("Identify_groups() has completed.") return
[docs] def findRings(self): """ Returns a list of all rings in the st within the atoms range. """ debug("Creating a list of rings...") raw_rings = self.st.find_rings(sort=True) postprocessed_rings = [] for ring in raw_rings: keep_ring = True for a1, a2 in self.forEdgeInRing(ring): # Remove rings that contain atoms that we are excluding # from the assignment: if a1 not in self.atoms or a2 not in self.atoms: keep_ring = False break # Remove rings that contain ZOBs are their edges: if self.st.getBond(a1, a2).order == 0: keep_ring = False break if keep_ring: postprocessed_rings.append(ring) self.all_rings = postprocessed_rings debug("all_rings = %s" % self.all_rings) self.bonds_in_5mem_rings = set() self.bonds_in_6mem_rings = set() for ring in self.all_rings: if len(ring) == 5: for a1, a2 in self.forEdgeInRing(ring): self.bonds_in_5mem_rings.add(tuple(sorted((a1, a2)))) elif len(ring) == 6: for a1, a2 in self.forEdgeInRing(ring): self.bonds_in_6mem_rings.add(tuple(sorted((a1, a2)))) debug("bonds_in_5mem_rings = %s" % self.bonds_in_5mem_rings) debug("bonds_in_6mem_rings = %s" % self.bonds_in_6mem_rings)
[docs] def isRingAromatic(self, ringatoms): """ Returns a boolean - whether the specified ring is aromatic or not. Aromatic ring is defined in this context as a 5 or 6 membered ring that is likely, based on bond lengths and angles, etc. to be an aromatic ring. ringatoms are assumed to be in connectivity order. """ debug("\nisRingAromatic(): ring: %s" % ringatoms) st = self.st ring = structure._Ring(st, 0, ringatoms, None) num_mem = len(ringatoms) if num_mem not in [5, 6]: debug( " Not aromatic. Only 5 and 6 atom rings are considered aromatic by this code" ) return False # NOT 5 or 6 membered; go to the next ring planarity = calculate_planarity(self.st, ringatoms) debug(" planarity: %.2f" % planarity) # Look for atoms that can't be present in aromatic rings bad_atoms_present = False for atom in ring.atom: element = atom.element angle = calculate_average_angle(st, atom) num_n = self.getNumNeighbors(atom) if num_n < 2 or num_n > 3: bad_atoms_present = True debug(" NON-AROMATIC ATOM: < 2 or > 3 neighbors for atom %i" % atom) elif angle < 115 and num_n == 3: bad_atoms_present = True debug( " NON-AROMATIC ATOM: small ave angle for atom %i angle: %.3f" % (atom, angle)) elif element not in ['C', 'N', 'O', 'S', 'Si']: # atom other than C and N debug( " NON-AROMATIC ATOM: Bad element found! Atom %i, element: %s" % (atom, element)) bad_atoms_present = True if bad_atoms_present: debug(" Not aromatic because unallowable atoms are present") return False debug(" No bad atoms present. Checking angles...") # IMPORTANT a non-aromatic ring can have double bonds in it also # So it is BAD dihedral & distances that we need to punish. # 5 membered rings are more sensitive to dihedrals & lengths: if num_mem == 5: distance_weight = 1.4 score_cutoff = 5.0 score_weight = 0.5 penalty_threshold = 4.0 ring_angle_weight = 0.5 score_top_cutoff = 17.0 planarity_weight = 0.8 elif num_mem == 6: distance_weight = 0.8 score_cutoff = 5.0 # penalize bonds with scores less than this score_weight = 0.01 # how important is bond score penalty_threshold = 5.0 # penalty more than this = non-aromatic ring_angle_weight = 0.3 score_top_cutoff = 20.0 planarity_weight = 1.0 penalty = 0 # Fix for Ev:107169 num_high_scoring_bonds = 0 for edge in ring.edge: atom1 = edge.atom1 atom2 = edge.atom2 score = self.calcBondScore(atom1, atom2, distance_weight, ring_angle_weight) debug(' BOND (%i, %i) SCORE: %f' % (atom1.index, atom2.index, score)) if score < score_cutoff: # Likely a single bond debug(" bond score is below cutoff of %f" % score_cutoff) penalty += (score_cutoff - score) * score_weight debug(" increasing penalty by %f" % ((score_cutoff - score) * score_weight)) elif score > score_top_cutoff: # Likely a double bond debug(" bond score is above top cutoff of %f" % score_top_cutoff) num_high_scoring_bonds += 1 if num_high_scoring_bonds >= 4: # Fix for Ev:107169 # Likely to be an aromatic ring in 4 out of 5 or 6 bonds are high-scoring. debug( " Decreasing penalty by 1.0 due to high number of high scoring bonds" ) penalty -= 1.0 # Rings less planar will be penalized: # NOTE: It is possible for a 5-member ring to be completely planar, yet # not be aromatic. p_penalty = planarity * planarity_weight debug(" penalty due to non-planarity: %f" % p_penalty) penalty += p_penalty debug(' RING PENALTY: %s THRESHOLD: %s' % (penalty, penalty_threshold)) if penalty < penalty_threshold: debug(' aromatic') return True else: debug(' non aromatic') return False
[docs] def findAromaticRings(self): """ Returns a list of aromatic rings in st within all_rings.""" aromatic_rings = [] bu_level = logger.level self.aromatic_ring_atoms = set() for ringatoms in self.all_rings: if self.isRingAromatic(ringatoms): aromatic_rings.append(ringatoms) self.aromatic_ring_atoms.update(ringatoms) logger.setLevel(bu_level) self.aromatic_rings = aromatic_rings
[docs] def isAtomAromatic(self, atom): """ Returns True if the given atom is part of an aromatic ring """ anum = int(atom) for r in self.aromatic_rings: if anum in r: return True return False
[docs] def areAtomsOrtho(self, ring, atom1, atom2): r1_index = ring.index(atom1) r2_index = ring.index(atom2) return abs(r1_index - r2_index) == 3
[docs] def isBondRingEdge(self, a1, a2, rings=None): a1 = a1.index a2 = a2.index if rings is None: rings = self.all_rings for r in rings or rings: try: index1 = r.index(a1) index2 = r.index(a2) except ValueError: continue else: if abs(index1 - index2) == 1 or (index1 == 0 and index2 == len(r) - 1) or ( index2 == 0 and index2 == len(r) - 1): return True return False
[docs] def isAtomBetweenTwoRings(self, atom): """ Returns True if this atom is bound to two rings, but is not part of a ring itself. Like this: Ring-X-Ring. Such an atom may get a double bond to one of the rings if needed. """ anum = atom.index num_rings = 0 num_others = 0 # Need to make sure this atom is not bound to anything else # other than these two rings (for now). In the future, we could # check whether the geometry is planar (if bound to 3 other atoms). for bond in atom.bond: if bond.order == 1: nnum = bond.atom2.index n_in_ring = False for ring in self.all_rings: if nnum in ring and anum not in ring: n_in_ring = True break if n_in_ring: num_rings += 1 else: num_others += 1 return (num_rings == 2) and (num_others == 0)
# FIXME perhaps 3 rings is okay also (if the geometry is planar)?
[docs] def isAtomBoundToAromaticRing(self, atom): """ Returns True if at least one of atoms bound to it is a member of an aromatic ring. """ for bond in atom.bond: if bond.atom2.index in self.aromatic_ring_atoms: return True return False
[docs] def assignAromaticBonders(self, ring, bonders): """ Assigns bond orders for the supplied 5 or 6-membered aromatic ring. Returns True if successful, False if not. <bonders> is a list of atoms that MUST get double-bonded. """ if len(ring) != 5 and len(ring) != 6: debug(" EXITING - require 5 or 6 membered ring") return False debug(" assignAromaticBonders() ring: %s, bonders: %s" % (ring, bonders)) if len(bonders) == 6: # 3 bonds from this ring must be double. # We need to pick one bond which is the most ideal canditate for a # double bond, and then assign the other 4 later. # If this ring shares an edge with ONE 5-membered aromatic ring (as in # 1fvt), make that bond double. This makes the assignment of that ring # more flexible later (more options). # Do NOT do this if the 6-membered ring is stuck between 2 opposing # 5-membered rings (such as in 3coh), as that can cause a condition # where the 2 remaining bonders are not bonded to each other. shared_edges = [] for a1, a2 in self.forEdgeInRing(ring): if tuple(sorted((a1, a2))) in self.bonds_in_5mem_rings: shared_edges.append((a1, a2)) # Pick one "shared" edge to make a double bond: # FIXME ideally we should add logic to pick the "best" bond if more than # one shared edge is found. if shared_edges: atom1, atom2 = shared_edges[0] self.setBondOrder(atom1, atom2, 2) # Must have the "if" checks, since each atom can be in 2 edges if atom1 in bonders: bonders.remove(atom1) if atom2 in bonders: bonders.remove(atom2) # Ev:63487 Put the atoms into groups by connectivity: bonders_groups = [] curr_group = [] for anum in ring: if anum in bonders: curr_group.append(anum) else: if curr_group: bonders_groups.append(curr_group) curr_group = [] if curr_group: bonders_groups.append(curr_group) # Join first and last bonders_groups if first and last atoms in the # ring are both bonders: if len(bonders_groups) > 1: if ring[0] in bonders and ring[-1] in bonders: bonders_groups[0].extend(bonders_groups[-1]) bonders_groups.remove(bonders_groups[-1]) debug(" Bonders groups: %s" % bonders_groups) for group in bonders_groups: debug(' Analyzing group: %s' % group) if len(group) == 1: debug(" ONLY 1 not doubled atom in ring group!") elif len(group) == 2: if self.st.areBound(group[0], group[1]): debug(" adding aromatic bond: %i-%i" % (group[0], group[1])) self.setBondOrder(group[0], group[1], 2) elif len(group) == 3: debug(" THREE not doubled in ring group!") elif len(group) == 4: group = order_ring_or_chain(self.st, group) if self.st.areBound(group[0], group[1]): if not self.st.areBound(group[2], group[3]): raise ValueError("Ring atoms %i and %i are not bound" % (group[2], group[3])) debug(" adding aromatic bonds: %i-%i, %i-%i" % (group[0], group[1], group[2], group[3])) self.setBondOrder(group[0], group[1], 2) self.setBondOrder(group[2], group[3], 2) else: if not self.st.areBound(group[1], group[2]): raise ValueError("Ring atoms %i and %i are not bound" % (group[1], group[2])) if not self.st.areBound(group[0], group[3]): raise ValueError("Ring atoms %i and %i are not bound" % (group[0], group[3])) debug(" adding aromatic bonds: %i-%i, %i-%i" % (group[1], group[2], group[0], group[3])) self.setBondOrder(group[1], group[2], 2) self.setBondOrder(group[0], group[3], 2) elif len(group) == 5: debug(" assignAromaticBonders(): FIVE bonders given!!!") elif len(group) == 6 and len(ring) == 6: debug(" adding aromatic bonds: %i-%i, %i-%i, %i-%i" % (ring[1], ring[2], ring[3], ring[4], ring[5], ring[0])) self.setBondOrder(ring[1], ring[2], 2) self.setBondOrder(ring[3], ring[4], 2) self.setBondOrder(ring[5], ring[0], 2)
[docs] def determineAromaticAtomType(self, ring, a, ring_group_atoms): """ Determines the type of the atom in the aromatic ring. """ def get_ring_neighbors(ring_atom): neighbors = [] for bond in self.st.atom[ring_atom].bond: ni = bond.atom2.index if ni in ring: neighbors.append(ni) return neighbors aatom = self.st.atom[a] a_element = aatom.element # search for REQUIRED non-bonders ############### # X --X # | \ # | O/S # | / # X---X if a_element in ['O', 'S']: return ("RNB", 0) # search for REQUIRED non-bonders ############### # X---X X===X # | \ | | # | C==X and X X # | / \ / # X---X X if not self.isOnlySingleBonded(aatom): return ("RNB", 0) if a_element == 'C': neighbors = self.getNeighbors(a) # search for: ############ # X --X # | \ # | C <----- # | / # X---X if len(neighbors) == 2: # carbon only return ("RB", 0) # It is a carbon bound to 3 neighbors for n in neighbors: if n not in ring: natom = self.st.atom[n] n_element = natom.atomic_number n_element_name = natom.element # search for: ############ # X --X # | \ # | C==O <----- # | / # X---X if n_element == 8 or n_element == 16: if len(natom.bond) == 1: # If the atom already has a negative charge, don't # add the double bond: if natom.formal_charge == -1: return ("RB", 0) else: return ("CO", 0) # search for atoms bound to other aromatic rings: # X---X X---X # | \ / \ # | O C---n O X # | / \ / # X---X X---X # Whether (n) is part of another aromatic ring other_ring = None na_same_r = False for r in self.aromatic_rings: if n in r: other_ring = r na_same_r = (a in r) break if other_ring: # If (n) is part of an aromatic ring if n not in ring_group_atoms: # The other atom (n) is NOT in this aromatic group # Ev:127473 (1e9h) FIXME This code is not ideal: # Also see PPREP-940, which this section of code # initially created if len(other_ring) == 5 and len(ring) == 5: debugbonders(" Bound to another aromatic ring") if self.st.getBond(a, n).length < 1.37: return ("RNB", 0) return ("RB", 0) if na_same_r: # The other atom (n) and (a) are in ANOTHER # aromatic ring together # XDXAR #################################### # X----X a = Carbon # | \ # | O a---n <- Carbon or nitrogen : MAY NEED TO BE CHANGED! # | / \ # X----X O X # \ / # X---x if (n_element == 6 or n_element == 7): return ("XDXAR", 0) # XDXAR #################################### # X----X # | \ # | O a---N (terminal) # | / # X----X # The neighbor is terminal Nitrogen if len(natom.bond) == 1 and n_element == 7: pass # return ("XDXAR",points) # search for REQUIRED BONDERS: ############ # X---X # | \ # | C---X (non-double-bond that can't potentially double bond) # | / X != C,N,O,S except -O- and -S- # X---X if n_element != 6 and n_element != 8 and n_element != 7 and n_element != 16: return ("RB", 0) if n_element == 8 or n_element == 16: if len(natom.bond) >= 2: return ("RB", 0) else: # CO break # search for REQUIRED BONDERS: ############ # X---X # | \ # | C---C/N (non-double-bond that can't potentially double bond) # | / # X---X # bound to carbon outside of ring if n_element == 6 or n_element == 7: score = self.calcBondScore(a, n) debugbonders(" outside score for %s-%s = %s" % (a, n, score)) if score < 5.0: return ("RB", 0) if self.st.measure(a, n) > 1.45: # "Longer" bonds can be considered double # only if they are next to a ketone carbon. # (1r78) ketone_neighbor = False for n in get_ring_neighbors(a): if self.isKetoneCarbon(n): ketone_neighbor = True break if not ketone_neighbor: return ("RB", 0) break debugbonders(" XDX[AR][CH2AR]...") # search for POTENTIAL systems: ############ # (exocyclic double-bond) ############## # X --X # | \ # | C==X <----- # | / # X---X if a_element == 'C' and len(neighbors) == 3: # carbon n = self.getExtracyclicAtom(ring, aatom) score = self.calcBondScore(a, n) debugbonders(" Score to exocyclic atom: %s->%s = %.2f" % (a, n, score)) # PPREP-956 Must be only single bonded: if self.isOnlySingleBonded( n) and score >= 6.0 and n_element_name == 'C': # XDX and XDXCH2AR########################## # X----X ( nn1 ) <- Optional # | \ / # | a(C)-n(C) # | / \ # X----X ( nn2 ) <- Optional for n2 in self.getNeighbors(n): if n2 == a: # n2 in ring: continue # XDXCH2AR ################## # X----X # | \ # | O a---n(CH2) # | / \ # X----X n2==X # / \ # X Arom. X # \ / # X---X ns_neighbors = self.getNeighbors(n2) if self.st.atom[ n2].element == 'C' and self.isOnlySingleBonded( n): exit = False for n3 in ns_neighbors: if n3 in ring: exit = True if exit: break if self.isAtomAromatic( n2) and not self.isAtomAromatic(n): return ("XDXCH2AR", 0) return ("XDX", score) if a_element == 'N': # FIXME move to top of function: neighbors = self.getNeighbors(a) if len(neighbors) == 2: # X --X # | \ # | N <----- # | / # X---X return ("N2", 0) elif len(neighbors) == 3: for n in neighbors: if n not in ring: natom = self.st.atom[n] if natom.element == 'O' and len(natom.bond) == 1: # X --X # | \ # | N--O <----- # | / # X---X return ("N3O", 0) # Ev:96621 # Check whether the neighbor is in another aromatic # ring with the nitrogen: for r in self.aromatic_rings: if n in r and a in r: # X --X # | \ # | N--X # | / \ <- another aromatic ring # X---X X # \.../ return ("NAr", 0) # search for: ############### # X --X # | \ # | N -- X <----- # | / # X---X return ("N3", 0) # If the atom does not meet any of the criteria above... return ("U", 0)
[docs] def areCOsPara(self, ring, c1, c2): if len(ring) != 6: return False # Determine whether the carbonyls are in in para arrangement: c1i = c2i = None for i, ratom in enumerate(ring): if ratom == c1: c1i = i if ratom == c2: c2i = i return (c1i == (c2i + 3) or c2i == (c1i + 3))
[docs] def determineAromaticAtomTypes(self, ring, ring_group_atoms): """ Goes through every atom in the supplied aromatic ring, and determines what type of atom it is, then adds the atom to that category. It returns a dictionary (list) of atoms in different groups.""" debug(" Determining aromatic atom types in the ring %s" % ring) unknowns = [] required_non_bonders = [] required_bonders = [] CO_members = [] XDXAR_members = [] XDXCH2AR_members = [] N3_members = [] N3O_members = [] NAr_members = [] N_members = [] XDX_members_dict = {} # Break existing in-ring double bonds, so that potentially they could be # moved to a different location. Example Ev:135383 ring_obj = structure._Ring(self.st, 0, ring, None) for edge in ring_obj.edge: if edge.order == 2: required_bonders.append(edge.atom1.index) required_bonders.append(edge.atom2.index) debugbonders( " Existing double bond added as required_bonders: %s, %s" % (edge.atom1.index, edge.atom2.index)) edge.order = 1 for a in ring: if a in required_bonders: continue (type, points) = self.determineAromaticAtomType(ring, a, ring_group_atoms) if type == "RNB": required_non_bonders.append(a) elif type == "RB": required_bonders.append(a) elif type == "CO": CO_members.append(a) elif type == "XDXCH2AR": XDXCH2AR_members.append(a) elif type == "XDXAR": XDXAR_members.append(a) elif type == "XDX": debugbonders("XDX found!") XDX_members_dict[a] = points elif type == "N3": N3_members.append(a) elif type == "N3O": N3O_members.append(a) elif type == "NAr": NAr_members.append(a) elif type == "N2": N_members.append(a) else: unknowns.append(a) XDX_members = list(XDX_members_dict) ringatomtypes_dict = {} ringatomtypes_dict["RNB"] = required_non_bonders ringatomtypes_dict["RB"] = required_bonders ringatomtypes_dict["CO"] = CO_members ringatomtypes_dict["XDX"] = XDX_members ringatomtypes_dict["XDXAR"] = XDXAR_members ringatomtypes_dict["XDXCH2AR"] = XDXCH2AR_members ringatomtypes_dict["N3"] = N3_members ringatomtypes_dict["N3O"] = N3O_members ringatomtypes_dict["NAr"] = NAr_members ringatomtypes_dict["N2"] = N_members ringatomtypes_dict["U"] = unknowns # Print a sorted list list: if DEBUG: atom_types_list = [] for type, atoms in ringatomtypes_dict.items(): for atomnum in atoms: atom_types_list.append("%i:%s" % (atomnum, type)) atom_types_list.sort() debug(" Atom types: %s" % ", ".join(atom_types_list)) return ringatomtypes_dict
[docs] def assignGroupTripleBonds(self, atom_group): """ Assigns triple bonds as appripriateley to the atoms in the supplied group. Returns a list of atoms which now have assigned bond orders.""" if len(atom_group) == 1: pass elif len(atom_group) == 2: if self.st.measure(atom_group[0], atom_group[1]) < 1.33: self.setBondOrder(atom_group[0], atom_group[1], 3) elif len(atom_group) == 3: len1 = self.st.measure(atom_group[0], atom_group[1]) len2 = self.st.measure(atom_group[1], atom_group[2]) len3 = self.st.measure(atom_group[0], atom_group[2]) if len1 <= len2 and len1 <= len3: self.setBondOrder(atom_group[0], atom_group[1], 3) if len2 <= len1 and len2 <= len3: self.setBondOrder(atom_group[1], atom_group[2], 3) if len3 <= len1 and len3 <= len2: self.setBondOrder(atom_group[0], atom_group[2], 3) else: debug( " 4 or more potential triple bond atoms in one group!!")
[docs] def assignAromaticRing(self, ring, ring_group_atoms): """ Returns ASSIGNED_AROMATIC if everything is okay. Returns NOT_AROMATIC if the ring is not aromatic. Returns NOT_ASSIGNED if this ring should be considered later. """ debug("\n------ assignAromaticRing(): ring: %s ---" % ring) ringatomtypes_dict = self.determineAromaticAtomTypes( ring, ring_group_atoms) try: bonders = self.findAromaticBonders(ring, ringatomtypes_dict) except RuntimeError as err: if "Not aromatic" in str(err): return NOT_AROMATIC elif "Not assigned" in str(err): return NOT_ASSIGNED raise if bonders: self.assignAromaticBonders(ring, bonders) return ASSIGNED_AROMATIC return NOT_ASSIGNED
[docs] def findAromaticBonders(self, ring, ringatomtypes_dict): """ Returns a list of suggested bonding atoms in the specified aromatic ring. Returns [] if no bonds should be made. If "required_only" is set, return only the REQUIRED BONDERS and REQUIRED NON-BONDERS Raises RuntimeError("Not aromatic") if the ring is not aromatic. Raises RuntimeError("Not assigned") if the ring could not be assigned. """ # FIXME Consider caching the list of exocyclic bonds def get_exocyclic_bond(ring_atom): for bond in self.st.atom[ring_atom].bond: if bond.atom2.index not in ring: return bond return None def get_ring_neighbors(ring_atom): neighbors = [] for bond in self.st.atom[ring_atom].bond: ni = bond.atom2.index if ni in ring: neighbors.append(ni) return neighbors # Key: bonder atom number; value: score from 0.0 (required non-bonder) # to 1.0 (required-bonder) bonder_scores = {} def get_required_bonders(): return ([a for a, score in bonder_scores.items() if score == 1.0]) def get_required_non_bonders(): return ([a for a, score in bonder_scores.items() if score == 0.0]) def get_potential_bonders(): return ([a for a, score in bonder_scores.items() if score > 0.0]) def set_required_bonder(a): bonder_scores[a] = 1.0 def set_required_non_bonder(a): bonder_scores[a] = 0.0 def get_sorted_bonders_and_scores(): decorated = [(score, a) for (a, score) in bonder_scores.items()] return sorted(decorated) def log_bonders(msg): """ Report bonders and their scores, sorted by the score """ if DEBUG_AROMATIC: decorated = get_sorted_bonders_and_scores() bonder_strings = [ "%i: %.1f" % (a, score) for (score, a) in decorated ] debugbonders(msg + " " + ", ".join(bonder_strings)) if not len(ring) in [5, 6]: debug(" ERROR! not 5 or 6 membered ring!") raise RuntimeError("Not aromatic") # Initialize bonder scores: for a in ring: if a in ringatomtypes_dict["RNB"]: bonder_scores[a] = 0.0 elif a in ringatomtypes_dict["RB"]: bonder_scores[a] = 1.0 elif a in ringatomtypes_dict["N2"]: bonder_scores[a] = 0.7 else: bonder_scores[a] = 0.5 if len(get_required_non_bonders()) >= len(ring) - 1: debug("All req-non-b: Bond orders are already assigned.") return [] # No need to do further assignment # Handle special cases when all are required bonders: # for a in ringatomtypes_dict["RB"]: # set_required_bonder(a) # FIXME this will not work for 5-membered rings! if len(get_required_bonders()) == len(ring): return get_required_bonders() if len(get_required_bonders()) == 5 and len(ring) == 6 and len( get_required_non_bonders()) == 0: # The whole 6-membered ring should get bonds added. return ring if len(get_required_bonders()) == 4 and len(ring) == 5 and len( get_required_non_bonders()) == 0: # 4 of the atoms in the 5-membered ring should get bonds added. return get_required_bonders() num_os = 0 for a in get_required_non_bonders(): if self.st.atom[a].element in ['O', 'S']: num_os += 1 if num_os > 1 and len(ring) == 5: debug(" MORE THAN 1 O or S in the ring! Not aromatic!!") raise RuntimeError("Not aromatic") CO_members = ringatomtypes_dict["CO"] XDXAR_members = ringatomtypes_dict["XDXAR"] XDXCH2AR_members = ringatomtypes_dict["XDXCH2AR"] XDX_members = ringatomtypes_dict["XDX"] N3_members = ringatomtypes_dict["N3"] NAr_members = ringatomtypes_dict["NAr"] for n in N3_members[:]: if self.st.atom[n].formal_charge == 1: set_required_bonder(n) # Do not consider for the amide search, below: N3_members.remove(n) N_members = ringatomtypes_dict["N2"] + ringatomtypes_dict[ "N3O"] # + ringatomtypes_dict["NAr"] # OBJECTIVE: make it so that there are 2, 4, or 6 bonders. # If there are 2 bonders, they have to be next to each other # If there are 4 bonders, each bonder needs to have another neighbor in # potential_bonders. # FIXME !!!!!!!!! for a in XDXCH2AR_members: set_required_non_bonder(a) for n in self.getNeighbors(a): if n not in ring: debug("Group XDXCH2AR") self.setBondOrder(a, n, 2) log_bonders("Initial bonder scores:") # Fix for 2g01, part of PPREP-940 # FIXME we need to make this rule more general if len(get_potential_bonders()) != 4: for n3o in ringatomtypes_dict["N3O"]: set_required_non_bonder(n3o) # Ev:96621 Search for nitrogens which are in 2 (or more) aromatic # rings: for n in NAr_members: # Do not make any double-bonds to this nitrogen (to avoid N+) # arrangements in bicyclic systems. if n in get_potential_bonders(): set_required_non_bonder(n) # Search for amides within the aromatic ring: amide_n = [] amide_c = [] for co in CO_members: for n in N3_members: if self.st.areBound(co, n): set_required_non_bonder(n) for n in N_members: if self.st.areBound(co, n): if n not in amide_n: amide_n.append(n) if co not in amide_c: amide_c.append(co) for c in amide_c: for n1 in self.getNeighbors(c): if n1 in ring and n1 in get_required_bonders(): for n2 in self.getNeighbors(n): if n2 != c and n2 in amide_c: debug("!!!!! CO--R.B.--CO FOUND!") len1 = 0 len2 = 0 for c1n in self.getNeighbors(c): if c1n not in ring: len1 = self.st.measure(c, c1n) for c2n in self.getNeighbors(n2): if c2n not in ring: len2 = self.st.measure(n2, c2n) debug("len1= %s" % len1) debug("len2= %s" % len2) if len(ring) == 6: if len1 > len2 and len1 > 1.3: amide_c.remove(c) break elif len2 > 1.3: amide_c.remove(n2) break else: if len1 > len2: amide_c.remove(c) break else: amide_c.remove(n2) break for co in amide_c: for atom in self.getNeighbors(co): natom = self.st.atom[atom] if (natom.element in ['O', 'S']) and len(natom.bond) == 1: debug("Functional group amide_c O/S with one neighbor") self.setBondOrder(co, atom, 2) set_required_non_bonder(co) break set_required_non_bonder(co) # Made Amide nitrogens non-bonders when the N is surrounded by 2 ketone Cs: for n in amide_n: neighbors = self.getNeighbors(n) co_count = 0 for a in neighbors: if a in amide_c: co_count += 1 if co_count > 1: try: set_required_non_bonder(n) except: pass # PPREP-739 If 2 ortho carbonyls are present in the ring, # make them both double bonds, if they are short enough: if len(CO_members) == 2 and \ self.areAtomsOrtho(ring, CO_members[0], CO_members[1]): # If both are pretty short, make them carbonyls: both_short = True for co in CO_members: if get_exocyclic_bond(co).length > 1.4: both_short = False if both_short: for co in CO_members: exo_bond = get_exocyclic_bond(co) self.setBondOrder(co, exo_bond.atom2, 2) set_required_non_bonder(co) # Ev:121918 (1w4l) Determine if any C-O bonds should be double, no matter what: for co in CO_members: exo_bond = get_exocyclic_bond(co) exo_atom = exo_bond.atom2 if exo_bond.length < 1.3 and len(get_potential_bonders()) != 6: self.setBondOrder(co, exo_atom, 2) set_required_non_bonder(co) N2N3CO = [] for i in N_members: if i in get_potential_bonders(): N2N3CO.append(i) for i in N3_members: if i in get_potential_bonders(): N2N3CO.append(i) for i in CO_members: if i in get_potential_bonders(): N2N3CO.append(i) # SEARCH FOR N2N3COs (atoms that can either bond or non-bond) # THAT ARE SURROUNDED BY NON-BONDERS # Go through potential bonders: bonders2 = [] for a in get_potential_bonders(): bonders2.append(a) for a in bonders2: if a in get_required_bonders(): continue non_bonder_neighbors = 0 for n in self.getNeighbors(a): if n in ring and n not in get_potential_bonders(): non_bonder_neighbors += 1 if non_bonder_neighbors > 1: if a in N2N3CO: N2N3CO.remove(a) set_required_non_bonder(a) if (len(N_members) + len(CO_members)) > 0: debugbonders( " making N3s non potential_bonders, because N2s or COs are present" ) for a in N3_members: if a in get_potential_bonders( ) and a not in get_required_bonders(): set_required_non_bonder(a) # Look for a required bonder surrounded by a non_bonder and a potential # bonder, and make the potential bonder a bonder log_bonders("Bonder scores after adding XDXs:") if XDX_members: LN2CO = len(N_members) + len(CO_members) add_all = False if len(N_members) + len(CO_members) == 0: fut_bonders = len(get_potential_bonders()) - len(XDX_members) if fut_bonders in [2, 4, 6]: add_all = True else: add_all = False elif LN2CO == 1 and len(ring) == 6: add_all = False elif LN2CO == 0 and len(ring) == 5: add_all = False else: add_all = True if not add_all: debug( " Can add all but last potential non_bonder, because there is no N2, N3, or CO present" ) if len(XDX_members) > 0: last = XDX_members[len(XDX_members) - 1] for i in XDX_members: # the XDX we are going through is not the one least # likely to double bond if i != last and i not in get_required_bonders(): set_required_non_bonder(i) for n in self.getNeighbors(i): if n not in ring: debug("Ring group XDX") self.setBondOrder(i, n, 2) break elif len(ring) == 5: # This code is needed to render heme-like groups correctly: # More specifically: Ar-C-Ar, where the carbon needs to get # a double bond to one of the aromatic rings. # (In other words, "heme-like" here refers to an arrangement where # there is a ring that is double-bonded to a CH2, which is # single-bonded to another ring. This occurs in hemes). for i in XDX_members: if i not in get_required_bonders(): for n in self.getNeighborsObj(i): if n.index not in ring: # This check is a fix for PPREP-1004 if self.isAtomBetweenTwoRings(n): debug("Ring group XDX") self.setBondOrder(i, n, 2) set_required_non_bonder(i) break log_bonders("Bonder scores after subtracting XDXs:") # Add another non_bonders if needed: 1, 3, or 5 potential_bonders! if len(get_potential_bonders()) in [1, 3, 5]: # Even number of potential_bonders. Try to eliminate an N3 atom: non_required_N3s = [] bonders_N3s = [] for a in N3_members: if a in get_potential_bonders(): if a not in get_required_bonders(): non_required_N3s.append(a) else: bonders_N3s.append(a) # Ev:101645 Only don't bond N3 members if there is an odd number of them: if len(non_required_N3s) >= 1: # There is at least one non-required N3 bonder. Pick one arbitrarily to remove: set_required_non_bonder(non_required_N3s[0]) elif len(bonders_N3s) >= 1: # There are no non-required N3 potential_bonders. Pick any one arbitrarily to remove: set_required_non_bonder(N3_members[0]) # Make all non-bondable CO's double bonds: for c in CO_members: if c not in get_potential_bonders(): for n in self.getNeighbors(c): natom = self.st.atom[n] if natom.element == 'O' and len(natom.bond) == 1: debug("Functional group CO found") self.setBondOrder(c, n, 2) set_required_non_bonder(c) break # Make all double-bonds that are currently present in the ring required bonders: # Ev:127473 (1fvt) if len(ring) == 5: for a1, a2 in self.forEdgeInRing(ring): bond = self.st.getBond(a1, a2) if bond.order == 2: bond.order = 1 set_required_bonder(a1) set_required_bonder(a2) debugbonders( " Existing double bond added as required_bonders: %i, %i" % (a1, a2)) if len(get_potential_bonders()) == 0: debug(" NO BONDERS!") raise RuntimeError("Not aromatic") # If the number of potential_bonders is odd and there is a nitrogen # that can be made a non=bonder, do so, giving priority to non-amide # nitrogens. if len(get_potential_bonders()) in [1, 3, 5]: # First try to remove any amide nitrogens from bonders: for a in N_members: if a in amide_n: # For each amide # If there is more than one, bond the one that is not an amide if a in get_potential_bonders( ) and a not in get_required_bonders(): set_required_non_bonder(a) break # Then try to remove any nitrogens from bonders: # NOTE: Removing the check for 5-membered ring will break 1ej3 if len(ring) == 5 and len(get_potential_bonders()) in [1, 3, 5]: for a in N_members: # There are no amide nitrogens. Add anyone that is left. if a not in get_required_bonders(): set_required_non_bonder(a) break if len(ring) == 6 and len(get_required_non_bonders()) == 1: nb = get_required_non_bonders()[0] warning("WARNING: Ring %s has 1 non-bonder (%s)" % (ring, nb)) # Check if one of the neighbors is part of another ring, # if so, make it a non-bonder (if both match, "randomly" pick # one). This help especially when the "other" ring is # in reality aromatic, but is not assigned as such by ABO. # For example 3a2c (see PPREP-945). for n in get_ring_neighbors(nb): num_rings = len(self.getAtomsRings(n)) if num_rings > 1: set_required_non_bonder(n) break if len(get_potential_bonders()) == 5 and len(ring) == 5: debug(" RING IS NOT AROMATIC!!! NO NON-BONDERS!") raise RuntimeError("Not aromatic") if len(get_potential_bonders()) == 3 and len( get_required_bonders()) == 2: atom1, atom2 = tuple(get_required_bonders()) if self.st.areBound(atom1, atom2): return get_required_bonders() if len(get_potential_bonders()) == 5: # PPREP-963 If one of these potential bonders is part of # another aromatic ring, then throw it out (hopefully it # will get a double bond when that ring will be assigned) decorated = get_sorted_bonders_and_scores() while len(decorated) > 4: decorated.pop(0) return [a for score, a in decorated] return get_potential_bonders()
[docs] def groupAromaticRings(self): # return a list of (ring lists) all_groups = [] unsorted_rings = self.aromatic_rings[:] while unsorted_rings != []: # new group entry_set = [] ring = unsorted_rings[0] unsorted_rings.remove(ring) entry_set = get_rings_in_group(ring, unsorted_rings) for r in entry_set: if r in unsorted_rings: unsorted_rings.remove(r) # SORT THE GROUP SET: # put the all carbon rings first in the order: # SEARCH FOR: #################################### # X---X # / \ # X O X---X # \ / \ # X---X O X # / \ / # X O X---X # \ / # X---X sorted_set = [] # ADD MORE CODE # add the rest of the rings. for r in entry_set: if r not in sorted_set: sorted_set.append(r) all_groups.append(sorted_set) self.aromatic_ring_groups = all_groups
[docs] def getNeighboringRings(self, ring, rings): """ Return a list of rings that share atoms with the specified ring. """ neighbor_rings = [] for nr in rings: if ring != nr: if do_rings_share_atoms(ring, nr): if nr not in neighbor_rings: neighbor_rings.append(nr) return neighbor_rings
[docs] def isKetoneCarbon(self, atom): if self.getElement(atom) == "C": for natom in self.getNeighbors(atom): if self.getElement(natom) == "O" and self.getNumNeighbors( natom) == 1: return True return False
[docs] def assignAromaticRingGroup(self, group): """ Assigns bond orders of aromatic rings (5 & 6 membered) which are arranged in a group (multi-cyclic systems). """ debug("\nWorking on aromatic ring group: %s" % group) # Make a list of all atoms in this ring group: ring_group_atoms = set() for r in group: ring_group_atoms.update(r) debug("RING GROUP ATOMS: %s" % ring_group_atoms) # Sort rings by priority: def get_sort_key(ring): priority = 0 # lower number = higher priority (needs to be assigned first) ringatomtypes_dict = self.determineAromaticAtomTypes( ring, ring_group_atoms) num_required_bonders = len(ringatomtypes_dict["RB"] + ringatomtypes_dict["XDXAR"]) num_required_non_bonders = len(ringatomtypes_dict["RNB"]) # Assign 5-membered rings that have only one way they can be assigned (3bki) if len(ring) == 5: if num_required_non_bonders == 1 or num_required_bonders == 4: priority -= 30 # Assign other rings that have only one way they can be assinged (1p2a): if len(ring) in [5, 6]: if (num_required_bonders + num_required_non_bonders) == len(ring): priority -= 20 # This is needed to get 1i6v right: if num_required_bonders >= 5: debug("RING has %i required non-bonders; increasing priority" % num_required_bonders) priority -= num_required_bonders # Give rings that have ketones higher priority: has_ketone = False for atomnum in ring: has_ketone = self.isKetoneCarbon(atomnum) if has_ketone: break if has_ketone: priority -= 10 debug("RING HAS KETONE; increasing priority") debug(' returning priority of %i' % priority) return priority sorted_rings = sorted(group, key=get_sort_key) debug('RINGS SORTED BY PRIORITY: %s' % sorted_rings) rings_not_done = sorted_rings # Take care of the more complicated cases: num_times_tried = {} while rings_not_done != []: ring = rings_not_done[0] out = self.assignAromaticRing(ring, ring_group_atoms) if out == ASSIGNED_AROMATIC: rings_not_done.remove(ring) elif out == NOT_AROMATIC: rings_not_done.remove(ring) else: # Did not assign the ring successfully if len(rings_not_done) == 1: # This was the last ring; do not try any more assignments: rings_not_done.remove(ring) else: try: num_times_tried[ring[0]] += 1 if num_times_tried[ring[0]] > 2: # Give up, the ring is not aromatic rings_not_done.remove(ring) else: # Append ring to the end of the list: rings_not_done.remove(ring) rings_not_done.append(ring) except KeyError: num_times_tried[ring[0]] = 1
[docs] def assignAromaticRingOrders(self): """ Assign all aromatic rings """ debug("\nassignAromaticRingOrders(): aromatic rings: %s" % self.aromatic_ring_groups) for group in self.aromatic_ring_groups: self.assignAromaticRingGroup(group) debug("Done assigning aromatic ring bond orders.")
[docs] def fixKetones(self): """ This function adds double bonds to ketones and aldehydes when necesary. It should be run only after ALL other bond orders have been assigned. """ debug("Fixing ketones...") for a in self.atoms: atom = self.st.atom[a] if atom.element == 'O': neighbors = self.getNeighbors(atom) if len(neighbors) != 1 or not self.isOnlySingleBonded(atom): continue catom = self.st.atom[neighbors[0]] if catom.element != 'C': continue if not self.isOnlySingleBonded(catom): continue c_num_n = len(catom.bond) if c_num_n > 3: continue the_bond = atom.bond[1] bond_length = self.st.measure(the_bond.atom1, the_bond.atom2) c_bond_angle = calculate_average_angle(self.st, catom) # FIXME: Create a more flexible scoring system.... if (c_num_n == 3 and c_bond_angle >= 119) \ or (c_num_n == 3 and c_bond_angle > 118 and bond_length < 1.4) \ or (c_num_n == 3 and c_bond_angle >= 117 and bond_length < 1.3) \ or (c_num_n < 3 and bond_length < 1.3): self.setBondOrder(atom, catom, 2)
[docs] def fix_N4s(self): """ Sets the macromodel atom type of Positively Charged Nitrognes to N4(31/sp2) or N5(32/sp3). """ for a in self.atoms: atom = self.st.atom[a] if atom.element == 'N': n_len = len(atom.bond) if n_len not in [3, 4]: continue # Skip nitrogens with ZOBs: # See 2bzh, PPREP-940 non_single_bonds_present = False zobs_present = False higher_orders_present = False for bond in atom.bond: if bond.order > 1: higher_orders_present = True elif bond.order == 0: zobs_present = True if zobs_present: continue if n_len == 3: # 3 bonds, and one is a double bond: if higher_orders_present: atom.atom_type = 31 atom.formal_charge = 1 elif n_len == 4: # 4 bonds to this nitrogen, tetrahedral: atom.atom_type = 32 atom.formal_charge = 1
[docs] def sortBondableAtomsByGroups(self, potential_double_bond_atoms): """ Takes the list of input atoms, and puts them into groups, and returns a list of those groups. Atoms are considerred in the same group if they are bonded together (in the same chain). """ group_set = [] my_copy = list(potential_double_bond_atoms) # make a copy while my_copy: atom_num = my_copy.pop() entry_set = [atom_num] new_atoms = [atom_num] while new_atoms != []: # while didn't just do last atom for atom in new_atoms: for n in self.getNeighbors(atom): if n in my_copy and n not in entry_set: entry_set.append(n) my_copy.remove(n) new_atoms.append(n) new_atoms.remove(atom) group_set.append(entry_set) return group_set
[docs] def assignTripleBonds(self): potential_triple_bond_atoms = self.findPotentialTripleBondAtoms() # This code puts the atoms in the same group if they are bound to each other while potential_triple_bond_atoms != []: atom_num = potential_triple_bond_atoms[0] potential_triple_bond_atoms.remove(atom_num) entry_set = [atom_num] new_atoms = [atom_num] while new_atoms != []: # while didn't just do last atom for atom in new_atoms: for n in self.getNeighbors(atom): if n in potential_triple_bond_atoms and n not in entry_set: entry_set.append(n) potential_triple_bond_atoms.remove(n) new_atoms.append(n) new_atoms.remove(atom) if len(entry_set) >= 2: self.assignGroupTripleBonds(entry_set)
[docs] def assignChainOrders(self): debug("\n --- assignChainOrders(): ---") potential_double_bond_atoms = self.getPotentialDoubleBondAtoms() group_set = self.sortBondableAtomsByGroups(potential_double_bond_atoms) for group in group_set: if len(group) > 1: self.assignGroupDoubleBonds(group) potential_double_bond_atoms = self.getPotentialDoubleBondAtoms() group_set2 = self.sortBondableAtomsByGroups(potential_double_bond_atoms) if group_set2 != []: for group in group_set2: if group not in group_set and len(group) > 1: self.assignGroupDoubleBonds(group)
# Go through atoms that HAVE to double bond, but are not double bonded # at this point. # if a single required bonder atom is found, look for something that it # can double bond to...
[docs] def findPotentialTripleBondAtoms(self): """ Returns a list of atoms that can potentially triple-bond. """ potential_triple_bond_atoms = [] for atom_num in self.atoms: iatom = self.st.atom[atom_num] if not self.isOnlySingleBonded(iatom): continue element = iatom.element num_n = self.getNumNeighbors(iatom) if num_n == 2: bond_angle = calculate_average_angle(self.st, atom_num) if bond_angle > 145 and element in ['C', 'N', 'Si']: potential_triple_bond_atoms.append(atom_num) elif num_n == 1 and element in ['C', 'N', 'Si', 'O']: # Terminal single-bonded atom Ev:69974 potential_triple_bond_atoms.append(atom_num) return potential_triple_bond_atoms
[docs] def getPotentialDoubleBondAtoms(self): """ Returns a list of atoms that can potentially double bond. Only bond angles are considered. """ # Ev:119181 Make a list of atoms that are part of small rings # (5 members). The minimum sp2 angle is smaller for # these atoms. atoms_in_small_rings = set() for ringatoms in self.all_rings: if len(ringatoms) <= 5: atoms_in_small_rings.update(ringatoms) potential_double_bond_atoms = [] # Not sure why we have a maximum angle. Perhaps for triple bonds? max_sp2_angle = 155.0 min_sp2_angle = 105.0 # !! CHANGING THIS MAY BREAK SOMETHING ELSE !! # Loosen up the rule if the atom is in a small ring (constrainted): # Not considering bond lengths !!! CHANGING THIS MAY BREAK SOMETHING # ELSE! min_sp2_5mem_nodist_angle = 104.3 min_sp2_5mem_withdist_angle = 100.0 # Considering bond lengths for a in self.atoms: iatom = self.st.atom[a] if not self.isOnlySingleBonded(iatom): continue if iatom.element not in ['C', 'N', 'Si']: continue num_n = self.getNumNeighbors(iatom) if num_n in (2, 3): bond_angle = calculate_average_angle(self.st, a) if bond_angle < max_sp2_angle: # If the atom is NOT in a small ring, only consider angles: if bond_angle > min_sp2_angle: potential_double_bond_atoms.append(a) elif a in atoms_in_small_rings: # Ev:119181 5 membered rings are treated specially, since # bond angles do not tell much there: if bond_angle > min_sp2_5mem_nodist_angle: potential_double_bond_atoms.append(a) elif bond_angle > min_sp2_5mem_withdist_angle: # Check whether at least one bond is short enough: short_bonds_present = False for bond in iatom.bond: dist_threshold = get_single_bond_length( iatom, bond.atom2) if bond.length < (dist_threshold - 0.1): short_bonds_present = True break if short_bonds_present: potential_double_bond_atoms.append(a) elif num_n == 1: # Terminal atoms with unknown hybridization if iatom.bond[1].order == 1: potential_double_bond_atoms.append(a) return potential_double_bond_atoms
[docs]def get_ring_atom_coords(st, ring_atoms): """ Get the coordinates of the ring atoms as a numpy array. If `st` has periodic boundary conditions (PBC), the coordinates returned will all be in the frame of reference of the first atom in ring_atoms. """ try: pbc = infrastructure.PBC(st) if not ring_atoms: return numpy.array([]) return numpy.array(pbc.getNearestImages(st, ring_atoms, ring_atoms[0])) except IndexError: # The structure does not have PBC properties associated with it. return numpy.array([st.atom[a].xyz for a in ring_atoms])
[docs]def get_edge_normals(ring_atom_coords): """ Get vectors normal to the planes described by each ring-edge and the center of the ring. Assumes that atoms are in order. """ center = numpy.mean(ring_atom_coords, 0) edge_vectors = [] shifted = numpy.roll(ring_atom_coords, 1, 0) for a1, a2 in zip(ring_atom_coords, shifted): # Calculate the ring-center -> atom vectors: atom1_vector = a1 - center atom2_vector = a2 - center normal_vector = numpy.cross(atom1_vector, atom2_vector) normal_unit_vector = normal_vector / numpy.linalg.norm(normal_vector) edge_vectors.append(normal_unit_vector) return edge_vectors
[docs]def calculate_planarity(st, ring_atoms): """ Calculate the ring planarity. Very planar rings have a planarity of < 1. Assumes that atoms are in order. """ ring_atom_coords = get_ring_atom_coords(st, ring_atoms) edge_vectors = get_edge_normals(ring_atom_coords) # Calculate the ring (average) vector: ring_vector = numpy.average(edge_vectors, 0) # Calculate average deviation from the ring vector: diffs = [] for v in edge_vectors: d = numpy.absolute(numpy.subtract(ring_vector, v)) diffs.append(d) ave_diff = numpy.average(diffs, 0) planarity = numpy.average(ave_diff) * 100 return planarity
def _residue_atom_subset(res, atoms): """ Return atoms of the residue residue which are also in the specified <atoms> list. Will return an empty list if this residue already has bond orders assigned. """ res_atoms = [] for a in res.atom: ai = a.index # Skip this residue if any atom has double bonds: total_orders = 0 for bond in a.bond: order = bond.order total_orders += order if order > 1: debug("WARNING: Residue %s has bond orders. Skipping..." % res) return [] # do not assign this residue # Give a warning if an atom has too many bonds (due to PDB error): # If so, this residue will be skipped if total_orders > MAX_VALENCE[a.atomic_number]: print("WARNING: Assign Bond Orders: Atom %i (%s) has too many bonds (%i)" \ % (ai, a.element, total_orders)) print(" Skipping", res) return [] # do not assign this residue # If <atoms> specified, assign only if this atom is in it: if not atoms or ai in atoms: res_atoms.append(ai) return res_atoms
[docs]class CCDStructureMismatchError(RuntimeError): """raise this error when structure in CCD template doesn't match HET"""
[docs]class SmilesGenerateError(RuntimeError): """raise this error when RDKit fails to generate SMILES"""
def _assign(ccd_st, ccd_order, st, st_order): # Assign bond orders and formal charges based on the mapping: # Assign bond orders and formal charges based on the mapping: assigned_bonds = [] for bond in ccd_st.bond: if bond.atom1.index not in ccd_order or bond.atom2.index not in ccd_order: # This atom is present in <st> only. continue a1_index = ccd_order.index(bond.atom1.index) a2_index = ccd_order.index(bond.atom2.index) order = bond.order if order != 1: st_atom1 = st_order[a1_index] st_atom2 = st_order[a2_index] st_bond = st.getBond(st_atom1, st_atom2) if st_bond is None: # These 2 atoms are not bonded in the original CT; e.g. 3QCR pass else: st_bond.order = order _a1, _a2 = sorted([st_atom1, st_atom2]) assigned_bonds.append((_a1, _a2, order)) for het_ai, ccd_ai in zip(st_order, ccd_order): st_atom = st.atom[het_ai] ccd_atom = ccd_st.atom[ccd_ai] st_atom.formal_charge = ccd_atom.formal_charge return assigned_bonds def _find_best_mapping(st, het_atoms, ccd_st): """ Find the best way that <het_atoms> subset of <st> can be mapped to <ccd_st>, considering all symmetrical combinations. """ # Save original atom indices: for atom in st.atom: atom.property['i_abo_original_index'] = atom.index het_heavy_atoms = [ai for ai in het_atoms if st.atom[ai].atomic_number != 1] # Create a substructure CT with only this het in it, plus few atoms of the # receptor for covalently bound ligands (if present): het_asl = analyze.generate_asl(st, het_atoms) delete_atoms = analyze.evaluate_asl(st, "NOT (withinbonds 2 (%s))" % het_asl) if delete_atoms: het_st = st.copy() rmap = het_st.deleteAtoms(delete_atoms, renumber_map=True) het_heavy_atoms = [rmap[anum] for anum in het_heavy_atoms] else: # Het is not making any covalent bonds het_st = st mapper = CCDAtomMapper(use_chirality=False) mapper.optimize_mapping = True ccd_heavy_atoms = [a.index for a in ccd_st.atom if a.atomic_number != 1] if len(ccd_heavy_atoms) != len(het_heavy_atoms): raise CCDStructureMismatchError( 'CCD and het structures have different number of atoms.') # Re-order both structures by most ideal combination: try: _, reordered_st = mapper.reorder_structures(ccd_st, ccd_heavy_atoms, het_st, het_heavy_atoms, ccd_st) except atom_mapper.ConformerError: raise CCDStructureMismatchError( 'CCD and het structure graphs are not isomorphic.') # Since <st> was now re-ordered, and we want to have the original CT # atom order unmodified, we will only need to use the identity mapping # between the <ccd_st> and <st> and not the structure itself. Only consider # heavy atoms, to handle cases where het has hydrogens or covalently bonded # protein atoms. ordered_orig_heavy = (a.property['i_abo_original_index'] for a in reordered_st.atom if a.atomic_number != 1) ordered_orig_st_atoms = [ai for ai in ordered_orig_heavy if ai in het_atoms] # Sanity check: ccd_elements = [ccd_st.atom[ai].element for ai in ccd_heavy_atoms] st_elements = [st.atom[ai].element for ai in ordered_orig_st_atoms] if ccd_elements != st_elements: raise CCDStructureMismatchError( "Atom element mismatch between the CCD and het record:\n%s\n%s" % (ccd_elements, st_elements)) for atom in st.atom: del atom.property['i_abo_original_index'] return ccd_heavy_atoms, ordered_orig_st_atoms def _assign_from_smiles(st, atoms, smiles): """ Assign the bond orders for the <atoms> in <st> based on the <smiles>. :param st: Structure containing the het. :type st: `structure.Structure` :param atoms: Atoms in the het group. :type atoms: list of ints :return: Bonds that were assigned, as a list of (atom1 index, atom2 index, bond order). :rtype: list of (int, int, int) :raises RuntimeError if het could not be assigned from CCD record for any reason. """ # Create a CT from the CCD SMILES: try: ccd_mol = Chem.MolFromSmiles(smiles) ccd_st = rdkit_adapter.from_rdkit(ccd_mol) except Exception as err: msg = "Failed to generate a CT from SMILES: %s" % smiles msg += '\n---RDkit error message:---\n%s' % str(err).strip() raise SmilesGenerateError(msg) assert atoms ccd_order, st_order = _find_best_mapping(st, atoms, ccd_st) assigned_bonds = _assign(ccd_st, ccd_order, st, st_order) return assigned_bonds def _get_ccd_smiles(hetname): """ Return the SMILES string for the given het name in the CCD database. :param hetname: 3-letter het name :type hetname: str :return: SMILES string or None :rtype: str or None """ if hetname in CCD_CACHE: return CCD_CACHE[hetname] filename = hetname + '.cif' try: fh = zipfile.ZipFile(CCD_FILE).open(filename) except KeyError: # There is no CCD record for this residue. return None # Read the SMILES for the corresponding CCD record: for line in io.TextIOWrapper(fh).readlines(): if line[0] in ('#', ';'): continue words = line.split() if len(words) < 5: continue if words[1] == 'SMILES_CANONICAL' and words[2] == 'CACTVS': # strip any leading and trailing double-quotes, if present: smiles = words[4].strip('"') CCD_CACHE[hetname] = smiles return smiles # No SMILES in CCD entry for some reason return None
[docs]def assign_st(st, atoms=None, neutralize=False, problem_only=False, skip_assigned_residues=True, use_ccd=False, _logger=None): """ Perform the assign-bond-order algorithm on the supplied structure. If the atom list is specified, only those atoms are fixed. :param st: Structure to assign bond orders in :type st: L{structure.Structure) :param atoms: List of atoms to assign. By default all atoms as assigned. :type atoms: list of ints :param neutralize: Whether to protonate carboxylic acids. :type neutralize: bool :param problem_only: Whether to assign only to atoms with PDB convert problem of 4 (orange atoms). Not compatible with <atoms> argument. :type problem_only: bool :param skip_assigned_residues: If True, bond orders are not assigned to residues that have double or triple bonds, even if that residue's atoms are in <atoms>. Not compatible with <problem_only> option. :type skip_assigned_residues: bool :param use_ccd: Whether to use the Chemical Component Dictionary for hets that have records there. Not supported for covalent ligands. :type use_ccd: bool :param _logger: logger to send messages to :type _logger: logger :return: List of (atom1, atom2, order) for every bond whose order was altered. :rtype: list of (int, int, int) """ if problem_only: unassigned_asl = "(atom.i_m_pdb_convert_problem 4)" atoms = analyze.evaluate_asl(st, unassigned_asl) if not atoms: # None have problem return # For record, set of (atom1, atom2, order) for every bond whose order was # changed, where atom1 < atom2. all_assigned_bonds = set() atoms_to_assign = [] for res in st.residue: pdbres = res.pdbres # Skip waters: if pdbres == "HOH " and len(res.atom) in (1, 3): continue if skip_assigned_residues: if DEBUG: print('Checking residue:', res) # Get a list of atoms from this residue to assign orders to: res_atoms_to_assign = _residue_atom_subset(res, atoms) if not res_atoms_to_assign: # Residue has double bonds continue else: # This residue had no double bonds (is unassigned) # res_atoms_to_assign = all residue atoms that are also in <atoms> res_assign_atoms = res_atoms_to_assign else: res_assign_atoms = [ atom.index for atom in res.atom if atoms is None or atom.index in atoms ] if use_ccd: assigned_bonds = attempt_ccd_assignment(res, res_assign_atoms, _logger) if assigned_bonds: all_assigned_bonds.update(assigned_bonds) continue atoms_to_assign.extend(res_assign_atoms) # Assign bond orders based on geometry for the remaining atoms: abo = AssignBondOrders(st, atoms_to_assign, neutralize) assigned_bonds = abo.assign() for item in assigned_bonds: all_assigned_bonds.add(item) # Return a list of bonds that have been assigned: return all_assigned_bonds
[docs]def attempt_ccd_assignment(res, res_assign_atoms=None, _logger=None): """ Attempt to use the CCD record to assign the bond orders. Returns list of bonds that were assigned. Returns empty list if assignment was not successful. :param res: Residue for the het to assign orders to. :type res: structure._Residue :param res_assign_atoms: List of atoms (substructure from the residue) to assign bond orders to. By default all residue atoms are assigned. :type res_assign_atoms: list(int) :param _logger: Logger :type _logger: logging.Logger :return: List of assigned bonds, as (index1, index2, bond order) :rtype: list of (int, int, int) """ if res_assign_atoms is None: res_assign_atoms = res.getAtomIndices() assigned_bonds = [] ccd_smiles = _get_ccd_smiles(res.pdbres.strip()) if not ccd_smiles: if _logger: _logger.info('No CCD template for HET \"%s\"' % res.pdbres) _logger.info(' Falling back to bond order assignment ' 'based on geometry.') ccd_assigned_status = 'HETNAM not in CCD' else: try: assigned_bonds = _assign_from_smiles(res._st, res.getAtomIndices(), ccd_smiles) except (CCDStructureMismatchError, SmilesGenerateError) as err: if _logger: _logger.warning('CCD template assignment failed for HET ' '\"%s\" due to:\n %s' % (res.pdbres, err)) _logger.warning(' Falling back to bond order assignment ' 'based on geometry.') if isinstance(err, SmilesGenerateError): ccd_assigned_status = 'CCD failed' elif isinstance(err, CCDStructureMismatchError): ccd_assigned_status = 'CCD mismatch' else: if _logger: _logger.info( 'CCD template assignment successfully used for HET ' '\"%s\"' % res.pdbres) # Het as assigned successfully (even if all bonds are # single); skip this het. ccd_assigned_status = 'CCD match' for atom in res.atom: atom.property[CCD_ATOM_PROPERTY] = ccd_assigned_status return assigned_bonds
def _genBondsDict(st, atoms): """ Generates the state dictionary for the specified st. """ d = {} for a in st.atom: anum = a.index if atoms and anum not in atoms: continue if a.atomic_number == 1: continue d[anum] = {} for bond in a.bond: nnum = bond.atom2.index if atoms and nnum not in atoms: continue if bond.atom2.atomic_number == 1: continue d[anum][nnum] = bond.order return d
[docs]def orders_assigned(st, atoms=None, all=False): """ Returns True if all bond orders are OK in the specified structure. Can be given a list of atoms to check bond orders of. If not list is specified and all flag is set to True, all atoms are checked; otherwise only atoms with a PDB convert error (appear orange in Maestro) are checked. NOTE: atoms and all options are mutually exclusinve. """ if all: atoms = None # Signifies all atoms elif not atoms: unassigned_asl = "(atom.i_m_pdb_convert_problem 4)" atoms = analyze.evaluate_asl(st, unassigned_asl) if not atoms: # empty list return True bonds_before = _genBondsDict(st, atoms) st2 = st.copy() # Do not change the original st assign_st(st2, atoms, neutralize=False) bonds_after = _genBondsDict(st2, atoms) return bonds_before == bonds_after
if __name__ == '__main__': # For testing purposes only import sys input_file = sys.argv[1] output_file = sys.argv[2] print("Input file:", input_file) print("Output file:", output_file) st_num = 0 for st in structure.StructureReader(input_file): st_num += 1 print("Working on structure %i..." % st_num) assign_st(st) build.add_hydrogens(st) if st_num == 1: st.write(output_file) else: st.append(output_file) print('Done.') # EOF