Source code for schrodinger.protein.assignment

"""
Module for optimizing hydroxyl, thiol and water orientiations, Chi-flips of
asparagine, glutamine and histidine, and protonation states of aspartic acid,
glutamic acid, and histidine.

Usage: ProtAssign(st)

Copyright Schrodinger, LLC. All rights reserved.

"""
import copy
import itertools
import json
import math
import operator
import random
import re
import shutil
import sys
import tempfile
from collections import defaultdict
from dataclasses import dataclass
from dataclasses import field
from pathlib import Path
from typing import Dict
from typing import Iterator
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple
from typing import Type
from typing import Union

import numpy as np
from scipy import ndimage
from scipy.sparse import dok_matrix
from scipy.sparse.csgraph import connected_components

from schrodinger import structure
from schrodinger.structure import Structure
from schrodinger.application.bioluminate import protein
from schrodinger.application.prepwizard2 import prepare
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import transform
from schrodinger.structutils.interactions import find_pi_cation_interactions
from schrodinger.structutils.interactions import find_pi_pi_interactions
from schrodinger.structutils.measure import create_distance_cell
from schrodinger.utils import mmutil
from schrodinger.utils import subprocess

# Check if toulbar2 is available to solve network exact
TOULBAR2_AVAILABLE = shutil.which('toulbar2') is not None
# Flag to use in-house developed rules-based pKa predictor if PROPKA is not
# requested.
USE_PROTASSIGN_PKA_PREDICTOR = mmutil.feature_flag_is_enabled(
    mmutil.PROTASSIGN_PKA_PREDICTOR)

LOG_NONE = 0
LOG_BASIC = 1
LOG_EXTRA = 2
LOG_DEBUG = 3
LOG_FULL_DEBUG = 4
LOG_SCORE_DEBUG = 5
DEFAULT_LOG_LEVEL = LOG_BASIC
log_level = DEFAULT_LOG_LEVEL
label_color = 13

pH_vlow = 'very_low'
pH_low = 'low'
pH_neutral = 'neutral'
pH_high = 'high'

# We are moving towards a more complex rules based internally developed pKa
# predictor that has numbers as input. During the transition phase, map the
# string based pH values to numbers if we are using the rules based predictor
# to be in line with the current GUI.
# Once the GUI interface for the simple predictor has been removed, we can get
# rid of this.
PH_STRING_TO_NUMBER = {
    pH_vlow: 2.0,
    pH_low: 4.0,
    pH_neutral: 7.0,
    pH_high: 11.0
}

pka_property = 'r_pa_PropKa_pKa'
# Atom property name containing name of residue state
STATE_PROPERTY = 's_pa_state'
ACCEPTOR_PROPERTY = 'b_pa_acceptor'
DONOR_PROPERTY = 'b_pa_donor'
CLASHER_PROPERTY = 'b_pa_clasher'
ION_PROPERTY = 'b_pa_ion'
# Flag whether a donor or acceptor is static
STATIC_PROPERTY = 'b_pa_static'
# Flag that indicates a Structure is annotated for faster self-scoring.
ANNOTATED_PROPERTY = 'b_pa_is_annotated'

# Max distance between changeables to be clustered together
CLUSTERING_DISTANCE = 4.0
# Distance between the heavies of changeables after which the interaction
# energy drops to 0
MAX_HEAVY_HEAVY_INTERACTION_DISTANCE = 4.5
# Maximum distance between certain changeables to orient its hydrogen for
# adding states.
MAX_ORIENT_HYDROGEN_DISTANCE = 5.0
# H-H clash cutoff involving two polar H. The cutoff is set such that the
# hydrogen bond network of 6GST His-14 is resolved correctly.
POLAR_CLASH_CUTOFF = 1.9
# H-H clash cutoff involving at least one non-polar H
# Unknown how this cutoff was determined originally.
NONPOLAR_CLASH_CUTOFF = 1.77
# Acceptor-acceptor clash cutoff
ACCEPTOR_ACCEPTOR_CLASH_CUTOFF = 2.80

BOND_MSG_TEMPLATE = ('Cannot determine flip state of {resstr} due to unexpected'
                     'covalent bond.')

FLIP = 'Flip'
NO_FLIP = 'No Flip'

RESNAME_FLIPSTATES_MAP = {'ASN': (NO_FLIP, FLIP), 'GLN': (NO_FLIP, FLIP)}
for resname in ('HID', 'HIE', 'HIP'):
    RESNAME_FLIPSTATES_MAP[resname] = (resname, f'{FLIP} {resname}')
RESNAME_FLIPSTATES_MAP['HIS'] = RESNAME_FLIPSTATES_MAP['HID']

ND1 = 'ND1'
ND2 = 'ND2'
NE2 = 'NE2'
CD = 'CD'
CD2 = 'CD2'
CE1 = 'CE1'
CG = 'CG'
HIS_PDBNAMES = {CD2, CE1, CG, ND1, NE2}
AMD_PDBNAME_GROUPS = ({CG, 'OD1', ND2}, {CD, 'OE1', NE2})
HIS_NEIGHBOR_MAP = {
    ND1: {CG, CE1},
    NE2: {CE1, CD2},
    CE1: {ND1, NE2},
    CD2: {NE2, CG}
}

# Properties
PKA_PROPERTY = 'r_pka_prediction'
INTERACTION_PROPERTY_TEMPLATE = 'b_pka_{0}_interaction{1}'
BULK_SOLVENT_ACCESSIBLE_PROPERTY = 'b_pka_bulk_solvent_accessible'

# Strings
METAL = 'metal'
CARBOXYL = 'carboxyl'
PI_CATION = 'pi-cation'
ARGININE = 'arginine'
PI_PI = 'pi-pi'
ACCEPTOR = 'acceptor'
FORCED_ACCEPTOR = 'forced_acceptor'
STATIC_DONOR = 'static_donor'

# Histidine parameters
HISTIDINE_NAMES = {'HIS ', 'HID ', 'HIE ', 'HIP '}
AMIDE_PDBRES = {'ASN ', 'GLN '}
ASPARTIC_ACID_NAMES = {'ASP ', 'ASH '}
GLUTAMIC_ACID_NAMES = {'GLU ', 'GLH '}
# Residue atom name that carries all the annotation for the empircal pKa
# predictions
ANNOTATED_ATOM_PDBNAME = ' CA '

# Chemical entity related
# H C N O F S Se
ORGANIC_ATOMIC_NUMBERS = {1, 6, 7, 8, 9, 16, 34}
CARBOXYLIC_ACID_SMARTS = '[CX3](=O)[OX1H0-]'

# Some distances and angles shared across classes and functions for emperical
# pKa prediction
MAX_INTERACTION_DISTANCE = 5.0
MAX_HYDROGEN_CLASHING_DISTANCE = 1.8
MAX_HBOND_DISTANCE = 3.0
MIN_HBOND_ANGLE = 120


# Simply utility functions for empircal pKa predictions
[docs]def get_annotated_atom(residue): """Returns annotated atom of residue _StructureAtom or None""" return residue.getAtomByPdbName(ANNOTATED_ATOM_PDBNAME)
[docs]def get_annotated_atom_property(residue): """Returns property dict of annotated atom or an empty dict if atom is not present""" atom = get_annotated_atom(residue) if atom is None: return {} return atom.property
[docs]def get_pka(residue): """Return predicted pKa of residue""" return get_annotated_atom_property(residue).get(PKA_PROPERTY)
[docs]def set_pka(residue, pka): """Set predicted pKa of residue""" p = get_annotated_atom_property(residue) p[PKA_PROPERTY] = pka
[docs]def shift_pka(residue, amount): """Shift the predicted pKa of residue""" p = get_annotated_atom_property(residue) p[PKA_PROPERTY] += amount
[docs]def get_interaction_label(label, suffix): """Create property label for a given interaction.""" if suffix is None: suffix = '' else: suffix = '_' + suffix return INTERACTION_PROPERTY_TEMPLATE.format(label, suffix)
[docs]def set_interaction_label(residue, label, suffix=None): """Set a label of the annoted atom of residue to True""" p = get_annotated_atom_property(residue) key = get_interaction_label(label, suffix) p[key] = True
[docs]def has_interaction_label(residue, label, suffix=None): """Check if the annotated atom of the residue has a label""" key = get_interaction_label(label, suffix) return bool(get_annotated_atom_property(residue).get(key))
[docs]def residue_to_label(residue_or_atom): """Create string from residue or atom.""" pdbname = '' if hasattr(residue_or_atom, 'getResidue'): pdbname = ' ' + residue_or_atom.pdbname.strip() residue = residue_or_atom.getResidue() else: residue = residue_or_atom return f'{residue.pdbres.strip()} {residue}{pdbname}'
[docs]def log_interaction(residue1, residue2, name, distance=None, angle=None): """Log the interaction between two residues and its distance and angle optionally""" label1 = residue_to_label(residue1) label2 = residue_to_label(residue2) msg = [f'Found {name} interaction between {label1} and {label2}'] if distance is not None: msg.append(f' Distance: {distance:.2f}') if angle is not None: msg.append(f' Angle: {angle:.2f}') report(LOG_BASIC, '\n'.join(msg))
class _Grid: """A simple grid class to detect bulk solvent water""" def __init__(self, array, spacing, origin): """ :param array: Array of grid :type array: numpy.ndarray with ndim 3 :param spacing: Spacing between grid points in angstrom :type spacing: float :param origin: Origin of grid :type origin: ndarray of length 3 """ self.array = array self.spacing = spacing self.origin = origin @classmethod def from_structure(cls, st, spacing=0.75, padding=None): """ Create grid based on structure :param spacing: Spacing between grid points in angstrom :type spacing: float :param padding: Added space to each dimension based on min and max of XYZ coordinates :type padding: float """ xyz = st.getXYZ() origin = xyz.min(axis=0) - padding right_top = xyz.max(axis=0) + padding shape = ((right_top - origin) / spacing + 0.5).astype(int)[::-1] array = np.ones(shape, bool) return cls(array, spacing, origin)
[docs]def get_bulk_solvent_accessible_atoms(st, radius=2.5, radius2=3.0, spacing=0.50, include_water=False): """ Return bulk solvent accessible atoms. Bulk solvent accessible atoms are determined by creating a grid and setting occupied voxels to 1 using a fixed radius for each atom and then checking if any non-occupied voxels are present within a slightly larger radius. :param st: Structure to determine bulk solvent accessible atoms. The atom property BULK_SOLVENT_ACCESSIBLE_PROPERTY will be set for each atom. :type st: Structure :param radius: Initial radius :type radius: float :param radius2: :type radius2: float :param spacing: Spacing of grid. Lower spacing makes the calculation more accurate but requires more compute time and memory. :type spacing: float :param include_water: Whether to keep waters or not when calculating bulk solvent accessible atoms. :type include_water: bool :return: List of atom indices that are bulk solvent accessible :rtype: List[int] """ grid = _Grid.from_structure(st, spacing=spacing, padding=2 * radius) array = grid.array if include_water: xyzs = st.getXYZ() else: xyzs = np.asarray([ a.xyz for a in st.atom if a.pdbres not in {'HOH ', 'DOD ', 'SPC '} ]) cutoff2 = (radius / grid.spacing)**2 for xyz in xyzs: center = (xyz - grid.origin) / grid.spacing start = (center - radius / grid.spacing + 0.5).astype(int) end = (center + radius / grid.spacing).astype(int) + 1 dz2 = np.square(center[2] - np.arange(start[2], end[2])).reshape( -1, 1, 1) dy2 = np.square(center[1] - np.arange(start[1], end[1])).reshape( 1, -1, 1) dx2 = np.square(center[0] - np.arange(start[0], end[0])).reshape( 1, 1, -1) distances2 = dz2 + dy2 + dx2 occupied = np.invert(distances2 <= cutoff2) array[start[2]:end[2], start[1]:end[1], start[0]:end[0]] &= occupied bulk_solvent, num_features = ndimage.label(array) value = bulk_solvent[0, 0, 0] bulk_solvent[bulk_solvent != value] = 0 iatoms = [] cutoff2 = (radius2 / grid.spacing)**2 for atom in st.atom: center = (atom.xyz - grid.origin) / grid.spacing start = (center - radius2 / grid.spacing + 0.5).astype(int) end = (center + radius2 / grid.spacing).astype(int) + 1 dz2 = np.square(center[2] - np.arange(start[2], end[2])).reshape( -1, 1, 1) dy2 = np.square(center[1] - np.arange(start[1], end[1])).reshape( 1, -1, 1) dx2 = np.square(center[0] - np.arange(start[0], end[0])).reshape( 1, 1, -1) distances2 = dz2 + dy2 + dx2 occupied = distances2 <= cutoff2 value = np.any(bulk_solvent[start[2]:end[2], start[1]:end[1], start[0]:end[0]][occupied]) atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = False if value == 0: continue iatoms.append(int(atom)) atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True if atom.atomic_number == 1: # Set neighboring heavy atom as bulk solvent accessible for ba in atom.bonded_atoms: ba.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True iatoms.append(ba.index) # if a heavy atom is bulk solvent accessible, then also its hydrogens are. for iatom in iatoms: atom = st.atom[iatom] if not atom.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY]: continue if atom.atomic_number == 1: continue for ba in atom.bonded_atoms: if ba.atomic_number == 1: ba.property[BULK_SOLVENT_ACCESSIBLE_PROPERTY] = True iatoms.append(ba.index) return sorted(set(iatoms))
[docs]def get_unsatisfied_donors(st, include_bsa=True): """ Return unsatisfied buried donors :param st: Structure to analyze :type st: Structure :param include_bsa: Include bulk solvent accessible atoms :type include_bsa: bool :return: List of unsatisfied donor atoms :rtype: List[_StructureAtom] """ dcell = create_distance_cell(st, MAX_HBOND_DISTANCE, honor_pbc=False) unsatisfied = set() for atom1 in st.atom: if atom1.atomic_number != 1: continue hydrogen = atom1 heavy = None for ba in hydrogen.bonded_atoms: heavy = ba if heavy is None: continue if heavy.atomic_number in (1, 6): continue residue1 = hydrogen.getResidue() is_hydrogen_bonded = False is_clashing = False for iatom2 in dcell.query(*hydrogen.xyz): atom2 = st.atom[iatom2] residue2 = atom2.getResidue() if str(residue1) == str(residue2): continue distance = st.measure(hydrogen, atom2) if atom2.atomic_number == 1 and distance < MAX_HYDROGEN_CLASHING_DISTANCE: is_clashing = True if atom2.atomic_number in (1, 6): continue if atom2.pdbname == ' N ': continue if distance > MAX_HBOND_DISTANCE: continue # If the hydrogen is clashing it is also unsatisfied angle = st.measure(heavy, hydrogen, atom2) if angle >= MIN_HBOND_ANGLE: is_hydrogen_bonded = True if not is_hydrogen_bonded or is_clashing: unsatisfied.add(heavy.index) if not include_bsa: bsa = set(get_bulk_solvent_accessible_atoms(st)) unsatisfied = unsatisfied - bsa return [st.atom[iatom] for iatom in sorted(unsatisfied)]
[docs]def get_carboxyl_atoms(residue): """ Get carboxyl atom groups from residue. Returns multiple groups if multiple are present. The first atom will be the carbon. It specifically adds Glu and Asp even if they are protonated and uses a SMARTS pattern otherwise to extract the carboxyl group. :param residue: Residue to check :type residue: _Residue :return: List of carboxyl atoms :rtype: List[List[_StructureAtom]] """ carboxyls = [] # Add neutral carboxyls of glu and asp also to list so that a # ProtAssign prepped structure also works, as ProtAssign will protonate # all asps and glus while processing the structure in intermediate states. is_asp = residue.pdbres in ASPARTIC_ACID_NAMES is_glu = residue.pdbres in GLUTAMIC_ACID_NAMES c = None if is_asp: c = residue.getAtomByPdbName(' CG ') elif is_glu: c = residue.getAtomByPdbName(' CD ') if c is not None: carboxyl = [c] for neighbor in c.bonded_atoms: if neighbor.atomic_number == 8: carboxyl.append(neighbor) if len(carboxyl) == 3: # Check if the carboxyl is neutral. If so add it to the # pool, otherwise it will be added in the next section for oxygen in carboxyl[1:]: if len(list(oxygen.bonded_atoms)) == 2: carboxyls.append(carboxyl) break # Extract residue structure for fast SMARTS matching st = residue.structure atom_indices = residue.getAtomIndices() residue_st = residue.extractStructure() matches = analyze.evaluate_smarts_canvas(residue_st, CARBOXYLIC_ACID_SMARTS) for match in matches: assert len(match) == 3 carboxyl = [] for m in match: atom = st.atom[atom_indices[m - 1]] assert atom.atomic_number in (6, 8) carboxyl.append(atom) continue carboxyl = sorted(carboxyl, key=lambda a: a.atomic_number) carboxyls.append(carboxyl) return carboxyls
[docs]def get_residue_neighborhoods(st, residues, distance_cell): neighborhoods = defaultdict(list) for residue in residues: neighboring_residues = {} for atom in residue.atom: for iatom2 in distance_cell.query(*atom.xyz): residue2 = st.atom[iatom2].getResidue() if str(residue) == str(residue2): continue neighboring_residues[str(residue2)] = residue2 residues = list(neighboring_residues.values()) neighborhoods[str(residue)] = residues return neighborhoods
[docs]def protonate_histidine(histidine): nd1 = histidine.getAtomByPdbName(' ND1') ne2 = histidine.getAtomByPdbName(' NE2') ce1 = histidine.getAtomByPdbName(' CE1') cd2 = histidine.getAtomByPdbName(' CD2') cg = histidine.getAtomByPdbName(' CG ') nd1.formal_charge = 1 ne2.formal_charge = 0 st = histidine.structure st.getBond(nd1, ce1).type = structure.BondType.Double st.getBond(cd2, ne2).type = structure.BondType.Single st.getBond(ce1, ne2).type = structure.BondType.Single st.getBond(cg, cd2).type = structure.BondType.Double build.add_hydrogens(st)
[docs]def charge_arginine_sidechains(st): """ Make arginine sidechains charged. Assumes bond orders have been assigned correctly. Looks for the nitrogen that has a double bond to the CZ atom, and changes the formal charge to 1 and retypes atom. """ for residue in st.residue: if residue.pdbres != 'ARG ': continue cz = residue.getAtomByPdbName(' CZ ') if cz is None: continue # Get the nitrogen that has a double bond with its neighboring heavy # atom as that is the one that should be charged. for nh_pdbname in (' NH2', ' NH1'): nh = residue.getAtomByPdbName(nh_pdbname) # Nothing to do if atom doesnt exist or formal charge is already 1 if nh is None or nh.formal_charge == 1: continue if st.getBond(nh, cz).order == 2: # Set formal charges and retype nh.formal_charge = 1 nh.retype() break
[docs]class HistidinepKaPredictor: """Empirical histidine pKa predictor""" # pKa related numbers INTERNAL_PKA = 6.5 # Negative shifts METAL_PKA_SHIFT = -4.0 STATIC_DONOR_PKA_SHIFT = -3.0 FORCED_ACCEPTOR_PKA_SHIFT = -3.0 PI_CATION_PKA_SHIFT = -1.0 # Postive shifts PI_PI_PKA_SHIFT = 1.0 CARBOXYLIC_ACID_PKA_SHIFT = 1.0 DOUBLE_SIDED_HBOND_PKA_SHIFT = 1.0 # Distance and angle cutoffs to determine interactions MAX_STATIC_DONOR_INTERACTION_DISTANCE = 3.0 MIN_STATIC_DONOR_INTERACTION_ANGLE = 120 MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE = 2.5 MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE = 120 MAX_METAL_INTERACTION_DISTANCE = 3.5 MAX_PI_PI_INTERACTION_DISTANCE = 4.5 MAX_PI_PI_INTERACTION_ANGLE = 30 MAX_PI_CATION_INTERACTION_DISTANCE = 4.5 MAX_PI_CATION_INTERACTION_ANGLE = 30 MAX_PI_ARG_INTERACTION_DISTANCE = 5.5 MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE = 3.8 MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE = 140 NITROGEN_PDBNAMES = {' ND1', ' NE2'} CARBON_PDBNAMES = {' CD2', ' CE1'} SIDECHAINS_PDBNAMES = NITROGEN_PDBNAMES | CARBON_PDBNAMES | {' CG '}
[docs] def predict(self, st, protassign=None, flip=False): """ Predict histidine pKa with empirical rules. Currently this requires the structure being annotated by ProtAssign. :param st: ProtAssign annotated structure :type st: Structure :param protassign: ProtAssign instance to calculate forced acceptor interactions :type protassign: schrodinger.protein.assignment.ProtAssign :param flip: Calculate pKa of flipped version of histidne :type flip: bool Returns None but annotates ANNOTATED_ATOM of each histidine with found interactions and predicted pKa """ self._st = st # Gather all histidines that have their nitrogen and carbon atoms self._histidines = [] for r in st.residue: if r.pdbres not in HISTIDINE_NAMES: continue for pdbname in self.SIDECHAINS_PDBNAMES: atom = r.getAtomByPdbName(pdbname) if atom is None: break else: self._histidines.append(r) self._distance_cell = create_distance_cell(st, MAX_INTERACTION_DISTANCE, honor_pbc=False) self._nitrogen_pdbnames = self.CARBON_PDBNAMES if flip else self.NITROGEN_PDBNAMES self._neighborhoods = get_residue_neighborhoods(st, self._histidines, self._distance_cell) for histidine in self._histidines: set_pka(histidine, self.INTERNAL_PKA) # Lower pKa values self.metal_interactions() self.identify_forced_amide_states() self.static_donor_interactions() self.cation_pi_interactions() self.arginine_interactions() # Increase pKa values self.pi_pi_interactions() self.carboxyl_interactions() self.static_acceptor_interactions() # Forced acceptor pKa calculations are expensive and may not be # required; it also requires a protassign instance to be provided. if protassign is not None: report(LOG_BASIC, "Determining pKa penalties for forced acceptor histidines.") self.forced_acceptor_interactions(st, protassign)
[docs] def metal_interactions(self): report(LOG_BASIC, "Determining pKa penalties for metal interactions") st = self._st for his in self._histidines: metals = [] residues = self._neighborhoods[str(his)] for residue in residues: for atom in residue.atom: if atom.atomic_number not in ORGANIC_ATOMIC_NUMBERS: metals.append(atom) if not metals: continue for pdbname in self._nitrogen_pdbnames: nitrogen = his.getAtomByPdbName(pdbname) for metal in metals: distance = st.measure(nitrogen, metal) if distance < self.MAX_METAL_INTERACTION_DISTANCE: set_interaction_label(his, METAL, pdbname.strip()) log_interaction(nitrogen, metal, METAL, distance) if has_interaction_label(his, METAL): break set_interaction_label(his, METAL) shift_pka(his, self.METAL_PKA_SHIFT)
[docs] def cation_pi_interactions(self): report(LOG_BASIC, "Determining pKa penalties for pi-cation interactions") st = self._st cation_pi_interactions = find_pi_cation_interactions(st) for cpi in cation_pi_interactions: atom = st.atom[cpi.pi_centroid.atoms[0]] if atom.pdbres not in HISTIDINE_NAMES: continue if cpi.distance > self.MAX_PI_CATION_INTERACTION_DISTANCE: continue min_angle = self.MAX_PI_CATION_INTERACTION_ANGLE max_angle = 180 - min_angle if min_angle < abs(cpi.angle) < max_angle: continue cation = st.atom[cpi.cation_centroid.atoms[0]].getResidue() # ProtAssign charges ASN and GLN to do it's stuff... if cation.pdbres in AMIDE_PDBRES: continue histidine = atom.getResidue() set_interaction_label(histidine, PI_CATION) shift_pka(histidine, self.PI_CATION_PKA_SHIFT) log_interaction(histidine, cation, PI_CATION, cpi.distance, cpi.angle)
[docs] def arginine_interactions(self): report(LOG_BASIC, "Determining pKa penalties for arginine interactions") for his in self._histidines: args = [ r for r in self._neighborhoods[str(his)] if r.pdbres == 'ARG ' ] if not args: continue cg = his.getAtomByPdbName(' CG ') nd1 = his.getAtomByPdbName(' ND1') ne2 = his.getAtomByPdbName(' NE2') nd1_xyz = np.asarray(nd1.xyz) ne2_xyz = np.asarray(ne2.xyz) centroid1 = (nd1_xyz + ne2_xyz) / 2.0 # Calculate the normal of the histidine plane v1 = nd1_xyz - cg.xyz v2 = ne2_xyz - cg.xyz n1 = np.cross(v1, v2) n1 /= np.linalg.norm(n1) for arg in args: nh2 = arg.getAtomByPdbName(' NH2') nh1 = arg.getAtomByPdbName(' NH1') ne = arg.getAtomByPdbName(' NE ') cz = arg.getAtomByPdbName(' CZ ') if None in (nh2, nh1, ne, cz): continue centroid2 = np.asarray(cz.xyz) distance = np.linalg.norm(centroid1 - centroid2) if distance > self.MAX_PI_ARG_INTERACTION_DISTANCE: continue v1 = nh2.xyz - centroid2 v2 = nh1.xyz - centroid2 n2 = np.cross(v1, v2) n2 /= np.linalg.norm(n2) theta = np.arccos(np.inner(n1, n2)) angle = abs(np.rad2deg(theta)) if self.MAX_PI_CATION_INTERACTION_ANGLE < angle < ( 180 - self.MAX_PI_CATION_INTERACTION_ANGLE): continue if not (has_interaction_label(his, ARGININE) or has_interaction_label(his, PI_CATION)): shift_pka(his, self.PI_CATION_PKA_SHIFT) set_interaction_label(his, ARGININE) log_interaction(his, arg, ARGININE, distance, angle)
[docs] def pi_pi_interactions(self): """Check if histidine is involved in pi-pi interaction""" report(LOG_BASIC, "Determining pKa penalties for pi-pi interactions") st = self._st pi_pi_interactions = find_pi_pi_interactions(st) for ppi in pi_pi_interactions: if ppi.distance > self.MAX_PI_PI_INTERACTION_DISTANCE: continue max_angle = self.MAX_PI_PI_INTERACTION_ANGLE min_angle = 180 - max_angle if max_angle < abs(ppi.angle) < min_angle: continue for ring1 in (ppi.ring1, ppi.ring2): iatom1 = ring1.atoms[0] atom1 = st.atom[iatom1] if atom1.pdbres not in HISTIDINE_NAMES: continue histidine = atom1.getResidue() if not has_interaction_label(histidine, PI_PI): shift_pka(histidine, self.PI_PI_PKA_SHIFT) set_interaction_label(histidine, PI_PI) # Get other residue to report on ring2 = ppi.ring1 if ring2.atoms[0] == iatom1: ring2 = ppi.ring2 iatom2 = ring2.atoms[0] residue = st.atom[iatom2].getResidue() log_interaction(histidine, residue, PI_PI, ppi.distance, ppi.angle)
[docs] def carboxyl_interactions(self): """Check if histidine is interacting with a carboxyl group.""" report(LOG_BASIC, "Determining pKa penalties for carboxyl interactions") st = self._st for his in self._histidines: carboxyls = [] for residue in self._neighborhoods[str(his)]: carboxyls += get_carboxyl_atoms(residue) # Now check for interactions for c, o1, o2 in carboxyls: o_ave_xyz = (np.asarray(o1.xyz) + o2.xyz) / 2.0 # Set O1 to average O position to measure distance and angle o1_xyz = o1.xyz o1.xyz = o_ave_xyz carboxyl_residue = c.getResidue() # One carboxyl only interacts once with a histidine, but can # act as an acceptor for multiple atoms in the histidine. is_interacting = False for pdbname in self._nitrogen_pdbnames: n = his.getAtomByPdbName(pdbname) distance = st.measure(o1, n) if distance > self.MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE: continue angle = st.measure(c, o1, n) if angle < self.MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE: continue # Do not count the same carboxyl more than once if not is_interacting: set_interaction_label(his, CARBOXYL) shift_pka(his, self.CARBOXYLIC_ACID_PKA_SHIFT) log_interaction(n, carboxyl_residue, CARBOXYL, distance, angle) set_interaction_label(his, ACCEPTOR, pdbname.strip()) is_interacting = True o1.xyz = o1_xyz
[docs] def static_donor_interactions(self): """Check if histidine is interacting with a static donor""" report(LOG_BASIC, "Determining pKa interactions for static donors") st = self._st for his in self._histidines: static_donors = [] for residue in self._neighborhoods[str(his)]: for atom in residue.atom: if atom.atomic_number == 1: continue # We consider lysines static p = atom.property is_static_donor = bool( p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)) is_static_donor |= atom.pdbres in { 'LYS ', 'LYH ' } and atom.pdbname == ' NZ ' if not is_static_donor: continue for bonded_atom in atom.bonded_atoms: if bonded_atom.atomic_number != 1: continue if not bonded_atom.property.get(DONOR_PROPERTY): continue static_donors.append((atom, bonded_atom)) iterator = itertools.product(self._nitrogen_pdbnames, static_donors) for pdbname, (donor, hydrogen) in iterator: nitrogen = his.getAtomByPdbName(pdbname) distance = st.measure(hydrogen, nitrogen) if distance > self.MAX_STATIC_DONOR_INTERACTION_DISTANCE: continue angle = st.measure(nitrogen, hydrogen, donor) if angle < self.MIN_STATIC_DONOR_INTERACTION_ANGLE: continue if not has_interaction_label(his, STATIC_DONOR): shift_pka(his, self.STATIC_DONOR_PKA_SHIFT) set_interaction_label(his, STATIC_DONOR) log_interaction(nitrogen, donor, STATIC_DONOR, distance, angle)
[docs] def identify_forced_amide_states(self): """ Identify amide residues that are forced in a state due to them interacting with a static donor. This is done in a single pass and not iteratively. """ st = self._st for residue in st.residue: # ProtAssign resets the flip of the amide so we need to check both # the O and N atom to see if it is interacting with a static donor. if residue.pdbres == 'ASN ': o_pdbname = ' OD1' n_pdbname = ' ND2' elif residue.pdbres == 'GLN ': o_pdbname = ' OE1' n_pdbname = ' NE2' else: continue o1 = residue.getAtomByPdbName(o_pdbname) n1 = residue.getAtomByPdbName(n_pdbname) if o1 is None or n1 is None: continue # Remove previous assigned property names of protassign to amides for a in (o1, n1): p = a.property p.pop(DONOR_PROPERTY, None) p.pop(ACCEPTOR_PROPERTY, None) for o, n in [(o1, n1), (n1, o1)]: for iatom in self._distance_cell.query(*o.xyz): atom = st.atom[iatom] if str(atom.getResidue()) == str(residue): continue if atom.atomic_number == 1: continue p = atom.property if not (p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)): continue for hydrogen in atom.bonded_atoms: if hydrogen.atomic_number != 1: continue distance = st.measure(o, hydrogen) if distance > self.MAX_STATIC_DONOR_INTERACTION_DISTANCE: continue angle = st.measure(atom, hydrogen, o) if angle < self.MIN_STATIC_DONOR_INTERACTION_ANGLE: continue o.property[ACCEPTOR_PROPERTY] = True o.property[STATIC_PROPERTY] = True n.property[DONOR_PROPERTY] = True n.property[STATIC_PROPERTY] = True report( LOG_BASIC, f"Forced acceptor/donor found for amide {residue.pdbres} {residue}" ) if o.property.get(ACCEPTOR_PROPERTY) and n.property.get( ACCEPTOR_PROPERTY): report( LOG_BASIC, f"Amide {residue.pdbres} {residue} is interacting with static donor on both sides" ) for atom in (o, n): del atom.property[ACCEPTOR_PROPERTY] del atom.property[DONOR_PROPERTY] del atom.property[STATIC_PROPERTY]
[docs] def static_acceptor_interactions(self): """Check if histidine is interacting with a static acceptor""" report(LOG_BASIC, "Determining pKa interactions for static acceptors") st = self._st for his in self._histidines: # If histidine is already involved in a static donor interaction, # don't bother. if any( has_interaction_label(his, label) for label in (METAL, STATIC_DONOR)): continue # The histidine needs to be protonated in this function. Extract # histidine and protonate hip_st = his.extractStructure() hip = next(iter(hip_st.residue)) protonate_histidine(hip) acceptors = [] for residue in self._neighborhoods[str(his)]: for atom in residue.atom: p = atom.property if p.get(ACCEPTOR_PROPERTY): if str(atom.getResidue()) == str(his): continue acceptor = hip_st.addAtom(atom.element, *atom.xyz) acceptor.pdbres = atom.pdbres acceptor.resnum = atom.resnum acceptor.chain = atom.chain acceptors.append(acceptor) iterator = itertools.product(self._nitrogen_pdbnames, acceptors) for pdbname, acceptor in iterator: nitrogen = hip.getAtomByPdbName(pdbname) hydrogen = None for ba in nitrogen.bonded_atoms: if ba.atomic_number == 1: hydrogen = ba assert hydrogen is not None distance = hip_st.measure(hydrogen, acceptor) if distance > self.MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE: continue angle = hip_st.measure(nitrogen, hydrogen, acceptor) if angle < self.MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE: continue set_interaction_label(his, ACCEPTOR, pdbname.strip()) log_interaction(nitrogen, acceptor, ACCEPTOR, distance, angle) for nitrogens in self._nitrogen_pdbnames: if all( has_interaction_label(his, ACCEPTOR, n) for n in nitrogens): if not has_interaction_label(his, ACCEPTOR): shift_pka(his, self.DOUBLE_SIDED_HBOND_PKA_SHIFT) set_interaction_label(his, ACCEPTOR) report(LOG_BASIC, f"Both nitrogens are donating of {his}")
class _CarboxylicAcidpKaPredictor: # pKa related INTERNAL_PKA = None # Shifts METAL_PKA_SHIFT = -3.0 STATIC_ACCEPTOR_PKA_SHIFT = 4.0 CARBOXYLIC_ACID_PKA_SHIFT = 4.0 # Interaction related distances and angles MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE = 3.5 MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE = 150 MAX_METAL_INTERACTION_DISTANCE = 3.5 MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE = 3.8 MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE = 140 # Some names/strings PDBRES_NAMES = None CARBON_PDBNAME = None OXYGEN_PDBNAMES = None SIDECHAIN_PDBNAMES = None def predict(self, st): self._st = st self._residues = [] for r in st.residue: if r.pdbres not in self.PDBRES_NAMES: continue for pdbname in self.SIDECHAIN_PDBNAMES: atom = r.getAtomByPdbName(pdbname) if atom is None: break else: self._residues.append(r) for residue in self._residues: set_pka(residue, self.INTERNAL_PKA) self._distance_cell = create_distance_cell(st, MAX_INTERACTION_DISTANCE, honor_pbc=False) self._neighborhoods = get_residue_neighborhoods(st, self._residues, self._distance_cell) # Shift pKa down. Favor charged state self.metal_interactions() # Shift pKa up. Favor neutral state self.carboxyl_interactions() self.static_acceptor_interactions() def static_acceptor_interactions(self): """Check if histidine is interacting with a static acceptor""" report(LOG_BASIC, "Determining pKa interactions for static acceptors") st = self._st for carboxylic_residue in self._residues: if has_interaction_label(carboxylic_residue, METAL): continue if has_interaction_label(carboxylic_residue, CARBOXYL): continue acceptors = [] for residue in self._neighborhoods[str(carboxylic_residue)]: for atom in residue.atom: p = atom.property if p.get(ACCEPTOR_PROPERTY) and p.get(STATIC_PROPERTY): acceptors.append(atom) carbon = carboxylic_residue.getAtomByPdbName(self.CARBON_PDBNAME) oxygen1 = carboxylic_residue.getAtomByPdbName( self.OXYGEN_PDBNAMES[0]) oxygen2 = carboxylic_residue.getAtomByPdbName( self.OXYGEN_PDBNAMES[1]) oxygen1_xyz = np.asarray(oxygen1.xyz, float) ave_oxygen_xyz = (oxygen1_xyz + oxygen2.xyz) / 2.0 oxygen1.xyz = ave_oxygen_xyz for acceptor in acceptors: if str(acceptor.getResidue()) == str(carboxylic_residue): continue distance = st.measure(oxygen1, acceptor) if distance > self.MAX_STATIC_ACCEPTOR_INTERACTION_DISTANCE: continue angle = st.measure(carbon, oxygen1, acceptor) if angle < self.MIN_STATIC_ACCEPTOR_INTERACTION_ANGLE: continue if not has_interaction_label(carboxylic_residue, ACCEPTOR): set_interaction_label(carboxylic_residue, ACCEPTOR) shift_pka(carboxylic_residue, self.STATIC_ACCEPTOR_PKA_SHIFT) log_interaction(oxygen1, acceptor, ACCEPTOR, distance, angle) # Reset oxygen position oxygen1.xyz = oxygen1_xyz def metal_interactions(self): report(LOG_BASIC, "Determining pKa penalties for metal interactions") st = self._st for coo_residue in self._residues: metals = [] residues = self._neighborhoods[str(coo_residue)] for residue in residues: for atom in residue.atom: if atom.atomic_number not in ORGANIC_ATOMIC_NUMBERS: metals.append(atom) if not metals: continue for pdbname in self.OXYGEN_PDBNAMES: oxygen = coo_residue.getAtomByPdbName(pdbname) if oxygen is None: continue for metal in metals: distance = st.measure(oxygen, metal) if distance > self.MAX_METAL_INTERACTION_DISTANCE: continue set_interaction_label(coo_residue, METAL) shift_pka(coo_residue, self.METAL_PKA_SHIFT) break def carboxyl_interactions(self): """Check if histidine is interacting with a carboxyl group.""" report(LOG_BASIC, "Determining pKa penalties for carboxyl interactions") st = self._st for coo_residue in self._residues: carboxyls = [] for residue in self._neighborhoods[str(coo_residue)]: carboxyls += get_carboxyl_atoms(residue) # Now check for interactions carbon = coo_residue.getAtomByPdbName(self.CARBON_PDBNAME) oxygen1 = coo_residue.getAtomByPdbName(self.OXYGEN_PDBNAMES[0]) oxygen2 = coo_residue.getAtomByPdbName(self.OXYGEN_PDBNAMES[1]) oxygen1_xyz = np.asarray(oxygen1.xyz, float) ave_oxygen_xyz = (oxygen1_xyz + oxygen2.xyz) / 2.0 oxygen1.xyz = ave_oxygen_xyz for c, o1, o2 in carboxyls: carboxyl_residue = c.getResidue() for oxygen in (o1, o2): distance = st.measure(oxygen1, oxygen) if distance > self.MAX_CARBOXYLIC_ACID_INTERACTION_DISTANCE: continue angle = st.measure(carbon, oxygen1, oxygen) if angle < self.MIN_CARBOXYLIC_ACID_INTERACTION_ANGLE: continue # Do not count the same carboxyl more than once. Also do # not protonate both carboxyls, although it is inclear # whether they should both be protonated or just a single # one. if not has_interaction_label( coo_residue, CARBOXYL) and not has_interaction_label( carboxyl_residue, CARBOXYL): set_interaction_label(coo_residue, CARBOXYL) shift_pka(coo_residue, self.CARBOXYLIC_ACID_PKA_SHIFT) log_interaction(oxygen1, carboxyl_residue, CARBOXYL, distance, angle) oxygen1.xyz = oxygen1_xyz
[docs]class AsparticAcidpKaPredictor(_CarboxylicAcidpKaPredictor): """Aspartic acid emperical pKa predictor""" # Taken from average of pKa values in cleaned up PKAD database INTERNAL_PKA = 3.4 PDBRES_NAMES = ASPARTIC_ACID_NAMES CARBON_PDBNAME = ' CG ' OXYGEN_PDBNAMES = [' OD1', ' OD2'] SIDECHAIN_PDBNAMES = OXYGEN_PDBNAMES + [CARBON_PDBNAME]
[docs]class GlutamicAcidpKaPredictor(_CarboxylicAcidpKaPredictor): """Glutamic acid emperical pKa predictor""" # Taken from average of pKa values in cleaned up PKAD database INTERNAL_PKA = 4.2 PDBRES_NAMES = GLUTAMIC_ACID_NAMES CARBON_PDBNAME = ' CD ' OXYGEN_PDBNAMES = [' OE1', ' OE2'] SIDECHAIN_PDBNAMES = OXYGEN_PDBNAMES + [CARBON_PDBNAME]
[docs]class PropKaException(Exception):
[docs] def __init__(self, value): self.parameter = value
[docs]def report(message_level=1, message=''): if message_level <= log_level: print(message) sys.stdout.flush() return
[docs]def measure(ct, atom1=None, atom2=None, atom3=None, atom4=None, use_xtal=False, max_dist=10.0): # A replacement for ct.measure() which also supports xtal symmetry, and can return the coordinates of # the nearest xtal copies of each atom when requested. def find_nearest_atom_image(ct, atom1, atom2): lowest_distance = None best_match = None atoms = analyze.evaluate_asl(ct, 'atom.i_pa_atomindex %d' % atom2) for atom in atoms: distance = ct.measure(atom1, atom) if lowest_distance is None or distance < lowest_distance: lowest_distance = distance best_match = int(atom) return best_match def extract(ct, atoms): new_ct = structure.create_new_structure(len(atoms)) for i, atom in enumerate(atoms): new_atom = new_ct.atom[i + 1] old_atom = ct.atom[atom] new_atom.element = old_atom.element new_atom.xyz = old_atom.xyz new_atom.color = old_atom.color new_atom.atom_type = old_atom.atom_type new_atom.property['i_pa_atomindex'] = old_atom.property[ 'i_pa_atomindex'] for property in [ 's_pdb_PDB_CRYST1_Space_Group', 'r_pdb_PDB_CRYST1_a', 'r_pdb_PDB_CRYST1_b', 'r_pdb_PDB_CRYST1_c', 'r_pdb_PDB_CRYST1_alpha', 'r_pdb_PDB_CRYST1_beta', 'r_pdb_PDB_CRYST1_gamma' ]: new_ct.property[property] = ct.property[property] return new_ct def merge_mates(atom_ct, atom_ct_mates): atom_ct_mates.insert(0, atom_ct) for i, mate in enumerate(atom_ct_mates): for atom in mate.atom: atom.property['i_pa_xtalatom'] = i for i in range(1, len(atom_ct_mates)): atom_ct_mates[0].extend(atom_ct_mates[i]) return atom_ct_mates def find_atom1(atom_ct_mates, atom1): for atom in atom_ct_mates[0].atom: if atom.property['i_pa_xtalatom'] == 0 and atom.property[ 'i_pa_atomindex'] == atom1: xtal_atom1 = int(atom) break return xtal_atom1 def create_list(atom1, atom2, atom3, atom4): if atom4 is not None: atoms = [atom1, atom2, atom3, atom4] elif atom3 is not None: atoms = [atom1, atom2, atom3] else: atoms = [atom1, atom2] return atoms def find_other_atoms(atom_ct_mates, xtal_atom1, atom2, atom3, atom4): xtal_atom2 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom1, atom2) if atom3 is not None: xtal_atom3 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom2, atom3) else: xtal_atom3 = None if atom4 is not None: xtal_atom4 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom3, atom4) else: xtal_atom4 = None return (xtal_atom2, xtal_atom3, xtal_atom4) # Only do the expensive xtal version of measure if it's a surface atom (or if not sure if it's a surface atom). if use_xtal and ('i_pa_surfaceatom' not in ct.atom[atom1].property or ct.atom[atom1].property['i_pa_surfaceatom'] == 1): atoms = create_list(atom1, atom2, atom3, atom4) atom_ct = extract(ct, atoms) atom_ct_mates = analyze.generate_crystal_mates(atom_ct, radius=max_dist) atom_ct_mates = merge_mates(atom_ct, atom_ct_mates) # Get atom1 (in the primary cell). xtal_atom1 = find_atom1(atom_ct_mates, atom1) # Get atom2 (the closest to atom1), and so on. (xtal_atom2, xtal_atom3, xtal_atom4) = find_other_atoms(atom_ct_mates, xtal_atom1, atom2, atom3, atom4) return atom_ct_mates[0].measure(xtal_atom1, xtal_atom2, xtal_atom3, xtal_atom4) else: value = ct.measure(atom1, atom2, atom3, atom4) return value
[docs]def calculate_interaction_matrix(ct: structure.Structure, iatoms: List[int], distance: float, use_xtal: bool = False) -> Dict[int, set]: """ Create an interaction matrix based on the changeable_index atom property :param ct: Structure with annotated atoms having set the i_pa_changeable_index corresponding the the index of the changeable :param iatoms: List of atom indices which take part in interaction :param distance: Max distance between interacting atoms :param use_xtal: Take into account crystal symmetry mates :param use_xtal: bool :return: interaction matrix allowing double indexing: interact[i, j] """ # Create a distance cell to query containing only the atoms of interest interact = defaultdict(set) if not iatoms: return interact ct_part = ct.extract(iatoms, copy_props=True) if use_xtal: mates = analyze.generate_crystal_mates(ct_part, distance) for mate in mates[1:]: ct_part.extend(mate) dcell = create_distance_cell(ct_part, distance, honor_pbc=False) for iatom1 in iatoms: atom1 = ct.atom[iatom1] index1 = atom1.property['i_pa_changeable_index'] for iatom2 in dcell.query(*atom1.xyz): index2 = ct_part.atom[iatom2].property['i_pa_changeable_index'] # Changeables can currently only interact with other # changeables. It is possible that a changeable has # self-interaction with its crystal mate, but this is not # implemented down-stream, so we ignore that for now. if index1 == index2: continue interact[index1].add(index2) interact[index2].add(index1) return interact
[docs]def annotate_structure_interactors(ct, acceptors, donors, clashers): """ Set atom property for each interactor class :param ct: Structure to annotate :type ct: Structure :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param donors: List of donor pair atom indices :type donors: List[tuple[int, int]] :param clashers: List of clasher atom indices :type clashers: List[int] :return: None but sets atom properties :rtype: NoneType """ for acceptor in acceptors: ct.atom[acceptor].property[ACCEPTOR_PROPERTY] = 1 for heavy, hydrogen in donors: if heavy == hydrogen: ct.atom[heavy].property[ION_PROPERTY] = 1 else: ct.atom[heavy].property[DONOR_PROPERTY] = 1 ct.atom[hydrogen].property[DONOR_PROPERTY] = 1 for clasher in clashers: ct.atom[clasher].property[CLASHER_PROPERTY] = 1
[docs]def generate_annotated_ct(ct, donors, acceptors, clashers, use_xtal=False): """ Generate an annotated Structure that contains crystal mates. The annotated heavily speeds up the self scoring step for large and xtal structures :param ct: Structure to annotate :type ct: Structure :param donors: List of donor pairs :type donors: List[Tuple[int, int]] :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param clashers: List of clasher atom indices :type clashers: List[int] :param use_xtal: Create crystal mates in annotated output structure :type use_xtal: bool :return: New annotated structure with property ANNOTATED_PROPERTY set to True :rtype: Structure """ # Annotate structure so we can use an efficient distance cell # to extract close contacts during self-scoring. working_ct = ct.copy() annotate_structure_interactors(working_ct, acceptors, donors, clashers) # Setup a crystal mate ct if necessary. if use_xtal: mates = analyze.generate_crystal_mates( working_ct, radius=MAX_HEAVY_HEAVY_INTERACTION_DISTANCE) # The first one is just a copy of the original for mate in mates[1:]: working_ct.extend(mate) working_ct.property[ANNOTATED_PROPERTY] = True return working_ct
[docs]def check_residue_flip_state(res: structure._Residue) -> tuple: """ Determine whether a residue cannot be flipped, is, or is not flipped. :param res: a protein residue :return: a tuple of `(state, msg)`, where `state` describes whether the residue is flipped (`True`), is not flipped (`False`), or cannot be flipped (`None`); if `None`, `msg` will contain an explanation :rtype: tuple[bool or NoneType, str] """ pdbname_atom_map = { atom.pdbname.strip(): atom for atom in res.atom if atom.atomic_number != 1 } is_amd = any( all(n in pdbname_atom_map for n in pdbnames) for pdbnames in AMD_PDBNAME_GROUPS) is_his = all(name in pdbname_atom_map for name in HIS_PDBNAMES) is_flipped, msg = None, '' if not is_amd and not is_his: msg = (f'Cannot determine flip state of {get_residue_string(res)}: only' f' {RESNAME_FLIPSTATES_MAP.keys()} can be flipped.') elif is_amd: # Residue is an amide; verify that there are no unexpected bonds nitrogen = pdbname_atom_map.get(ND2) or pdbname_atom_map.get(NE2) carbon = pdbname_atom_map.get(CD) or pdbname_atom_map.get(CG) bound_atoms = [a for a in get_heavy_neighbors(nitrogen) if a != carbon] if bound_atoms: msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res)) else: is_flipped = carbon.property.get(STATE_PROPERTY) == FLIP else: # Residue is a histidine; verify that there are no unexpected bonds for pdbname, neighbor_pdbnames in HIS_NEIGHBOR_MAP.items(): atom = pdbname_atom_map[pdbname] exp_neighbors = [ pdbname_atom_map[name] for name in neighbor_pdbnames ] bound_atoms = [ a for a in get_heavy_neighbors(atom) if a not in exp_neighbors ] if bound_atoms: msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res)) break else: carbon = pdbname_atom_map.get(CG) flip_tag = carbon.property.get(STATE_PROPERTY) is_flipped = flip_tag is not None and flip_tag.startswith(FLIP) return is_flipped, msg
[docs]def get_residue_flip_state(res: structure._Residue) -> Union[bool, type(None)]: """ Return the flip state of a protein residue. A truncated version of `check_residue_flip_state()`. :param res: a protein residue :return: the flip state of a residue """ flip_state, _ = check_residue_flip_state(res) return flip_state
[docs]def get_residue_string(residue_or_atom) -> str: """ Return a string describing a residue from a residue or atom. The string will match the format <chain>:<residue PDB code> <residue number>[<insertion code>] :param residue_or_atom: a residue or atom :type residue_or_atom: _Residue or _StructureAtom :return: a string describing the residue """ chain = '_' if residue_or_atom.chain == ' ' else residue_or_atom.chain pdbres = residue_or_atom.pdbres.strip() inscode = residue_or_atom.inscode.strip() return f'{chain}:{pdbres} {residue_or_atom.resnum}{inscode}'
[docs]def get_atom_string(atom): """Return a string describing atom""" return get_residue_string(atom) + ':' + atom.pdbname
[docs]def get_heavy_neighbors(atom: structure._StructureAtom) -> list: """ :param atom: an atom :return: a list of heavy (non-H) atoms covalently bound to `atom` :rtype: list[structure._StructureAtom] """ return [ bond.atom2 for bond in atom.bond if bond.atom2.atomic_number != 1 and bond.order != 0 ]
[docs]def get_residue_from_changeable(ct, changeable): heavies = changeable.get_heavies() return ct.atom[heavies[0]].getResidue()
@dataclass class _Interactors: """ Container for atom indices of hydrogen-bond acceptors, donors, and clashers """ acceptors: List[int] = field(default_factory=list) donors: List[Tuple[int, int]] = field(default_factory=list) clashers: List[int] = field(default_factory=list) def remap(self, mapper: Dict[int, int]): """ Remap atom indices of interactors. :param mapper: Dictionary mapping old atom indices to new indices """ self.acceptors = [mapper[a] for a in self.acceptors] self.clashers = [mapper[a] for a in self.clashers] self.donors = [(mapper[a1], mapper[a2]) for a1, a2 in self.donors] @dataclass class _WaterState: """ Container for water states :param coordinates: Hydrogen coordinates of water state :type coordinates: Tuple[List[float], List[float]] :param donating_to: List of atom indices to which water is donating :type donating_to: List[int] :param accepting_from: List of atom indices from water is accepting a hydrogen :type accepting_from: List[int] """ # hydrogen1 and hydrogen2 coordinates coordinates: Tuple[List[float], List[float]] donating_to: List[int] = field(default_factory=list) accepting_from: List[int] = field(default_factory=list)
[docs]class WaterStateEnumerator: """ Enumerate discrete water states that are hydrogen bonding with nearby acceptors and donors The goal is to sample likely states while also limiting the number of states as solving the combinatorial problem gets harder with more and more states. """ # OH bond lenght of water in angstrom OH_LENGTH = 1.000 # HOH angle of water in degrees HOH_ANGLE = 109.5 # Minimum required hydrogen to non-static heavy donor atom distance for it # to be not considered clashing. MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE = 2.5
[docs] def __init__(self, ct, oxygen, acceptors, donors): """ :param ct: Annotated structure with donor/acceptor and static flags. :type ct: Structure :param oxygen: Oxygen atom of water :type oxygen: _StructureAtom :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param donors: List of donor atom indices :type donors: List[Tuple[int, int]] """ self.ct = ct self.oxygen = oxygen self.hydrogen1, self.hydrogen2 = list(oxygen.bonded_atoms) self.acceptors = acceptors self.donors = donors # These attributes will be filled by precalculate and will contain a # mapping of atom index to distance or unit vector from oxygen to atom self._vector_lookup = {} self._distance_lookup = {} self._precalculate() self._filter_acceptor_donors() self._separate_donors()
def _precalculate(self): """ Pre-calculate all unit vectors and distances from water oxygen to acceptor and heavy donor atoms """ oxygen_xyz = np.asarray(self.oxygen.xyz, float) for iheavy in itertools.chain(self.acceptors, *self.donors): heavy = self.ct.atom[iheavy] if heavy.atomic_number == 1: continue if iheavy == self.oxygen.index: continue vector = heavy.xyz - oxygen_xyz distance = np.linalg.norm(vector) if distance < 0.01: report( LOG_BASIC, f"Water is severily clashing with {heavy.pdbres} {heavy.resnum}{heavy.inscode} {heavy.chain}" ) continue vector /= distance self._vector_lookup[iheavy] = vector self._distance_lookup[iheavy] = distance def _filter_acceptor_donors(self): """ Filter acceptor and donor atoms up to a certain distance and remove water from donor list. This is to limit the number of states that are enumerated. Resets the acceptors and donors attributes """ distance_lookup = self._distance_lookup if distance_lookup: # The typical max distance between heavy atoms is 4.0 of the # acceptors and donors. This is probably too far and we can reduce # the distance and filter the acceptors and donors for more # efficient state generation. # Look for interactors within a certain distance. To limit the number # of states generated, stop after at least 2 interactors are found. We # start at a distance of 3.5 which is pretty generous. distances = np.asarray(list(distance_lookup.values())) for max_distance in np.linspace(3.5, 4.0, num=11, endpoint=True): ninteractors = (distances <= max_distance).sum() if ninteractors > 1: break # Filter acceptors filtered_acceptors = [] for iheavy in self.acceptors: distance = distance_lookup.get(iheavy) if distance is None: continue if distance <= max_distance: filtered_acceptors.append(iheavy) # Filter donors filtered_donors = [] for iheavy, ihydrogen in self.donors: distance = distance_lookup.get(iheavy) if distance is None: continue if distance <= max_distance: filtered_donors.append((iheavy, ihydrogen)) self.acceptors = filtered_acceptors self.donors = filtered_donors def _separate_donors(self): """Separate the donors into metals and a list of heavy atom""" # The donor list also contains metals. Separate these out. donor_heavies = [] for heavy, hydrogen in self.donors: # Its a metal if heavy == hydrogen if heavy != hydrogen: donor_heavies.append(heavy) # Some heavies can be in the list multiple times, e.g. other waters # have two O-H groups where the O is the same atom. self.donor_heavies = sorted(set(donor_heavies)) def _is_clashing_with_static_donor(self, donor_heavy): """ Check if any of the water hydogens is clashing with a static hydrogen donor :param donor_heavy: Donor heavy atom :type donor_heavy: _StructureAtom :return: True if clashing, False if not :rtype: bool """ p = donor_heavy.property if not (p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)): return False for iatom in donor_heavy.bonded_atoms: atom = self.ct.atom[iatom] if atom.atomic_number != 1: continue for hydrogen in (self.hydrogen1, self.hydrogen2): distance = self.ct.measure(iatom, hydrogen) if distance < POLAR_CLASH_CUTOFF: return True return False def _might_clash_with_nonstatic_donor(self, hydrogen, donor_heavy): """ Check if hydrogen might be clashing with hydrogen of a nonstatic donor :param hydrogen: Water hydrogen to check :type hydrogen: _StructureAtom :param donor_heavy: Donor heavy atom :type donor_heavy: _StructureAtom :return: True if clashing, False if not :rtype: bool """ p = donor_heavy.property if not p.get(DONOR_PROPERTY) or p.get(STATIC_PROPERTY): return False distance = self.ct.measure(hydrogen, donor_heavy) return distance < self.MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE
[docs] def enumerate_acceptor_acceptor_states(self): """ Enumerate states where water is donating to two acceptors at the same time :return: List of water states :rtype: List[_WaterState] """ # Align waters so it is hydrogen bonded with 2 acceptors. # Rename so we don't have self everywhere oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 ct = self.ct states = [] heavies = itertools.chain(self.acceptors, self.donor_heavies) for iheavy1, iheavy2 in itertools.combinations(heavies, 2): angle = ct.measure(iheavy1, oxygen, iheavy2) is_static_donor = False for iatom in (iheavy1, iheavy2): p = ct.atom[iatom].property if p.get(STATIC_PROPERTY) and p.get(DONOR_PROPERTY): is_static_donor = True if is_static_donor: continue # Check if angle falls within a reasonable interval so the # water can actually hydrogen bond to both at the same time if not (80 < angle <= 170): continue vector1 = self._vector_lookup[iheavy1] vector2 = self._vector_lookup[iheavy2] hydrogen1.xyz = oxygen.xyz + vector1 * self.OH_LENGTH hydrogen2.xyz = oxygen.xyz + vector2 * self.OH_LENGTH # Since the scoring function used during optimization is so # dependent on distance, align perfectly with the closest # acceptor heavy1 = ct.atom[iheavy1] heavy2 = ct.atom[iheavy2] distance1 = self._distance_lookup[iheavy1] distance2 = self._distance_lookup[iheavy2] if distance1 <= distance2: ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2) else: ct.adjust(self.HOH_ANGLE, hydrogen2, oxygen, hydrogen1) # Add the state if the water is not clashing with a static donor clashing = any( self._is_clashing_with_static_donor(heavy) for heavy in (heavy1, heavy2)) if clashing: continue state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz), donating_to=[iheavy1, iheavy2]) states.append(state) return states
[docs] def enumerate_donor_donor_states(self): """ Enumerate states where water is accepting from two donors at the same time :return: List of water states :rtype: List[_WaterState] """ # Generate states where water is accepting from 2 donors oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 states = [] angle = 180 - self.HOH_ANGLE / 2.0 for donor1, donor2 in itertools.combinations(self.donor_heavies, 2): heavy1 = self.ct.atom[donor1] heavy2 = self.ct.atom[donor2] average_xyz = (np.asarray(heavy1.xyz) + heavy2.xyz) / 2.0 vector = -(average_xyz - oxygen.xyz) distance = np.linalg.norm(vector) if distance < 0.01: continue vector /= distance hydrogen1.xyz = np.asarray(oxygen.xyz) + vector * self.OH_LENGTH hydrogen2.xyz = hydrogen1.xyz # Temporarily set the heavy coordinate to the average to place # the hydrogens heavy1_xyz = heavy1.xyz heavy1.xyz = average_xyz self.ct.adjust(angle, heavy1, oxygen, hydrogen1) self.ct.adjust(-angle, heavy1, oxygen, hydrogen2) heavy1.xyz = heavy1_xyz state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz), accepting_from=[int(heavy1), int(heavy2)]) states.append(state) return states
[docs] def enumerate_acceptor_states(self): """ Enumerate states where water is donating to a single acceptor :return: List of water states :rtype: List[_WaterState] """ # Generate states where water is donating to a single acceptor, # while letting other hydrogen not point towards static donors. oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 states = [] for iheavy in sorted( set(itertools.chain(self.acceptors, self.donor_heavies))): vector = self._vector_lookup[iheavy] heavy = self.ct.atom[iheavy] if heavy.property.get(STATIC_PROPERTY) and heavy.property.get( DONOR_PROPERTY): continue # Check if angle falls within a reasonable interval so the hydrogen1.xyz = oxygen.xyz + vector * self.OH_LENGTH # Move hydrogen2 so it makes the correct angle with hydrogen1 hydrogen2.xyz = hydrogen1.xyz if self._is_clashing_with_static_donor(heavy): continue self.ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2) # Keep a list of the other heavy donors to calculate clashes other_donor_heavies = list(self.donor_heavies) try: other_donor_heavies.remove(iheavy) except ValueError: pass # Check if hydrogen2 is interacting with a static donor. If # so, generate a state where it is not clashing axis = self._vector_lookup[iheavy] for angle in [None, 120, 120]: if angle is not None: self.rotate_hydrogens_along_axis(axis, angle) clashing = any( self._is_clashing_with_static_donor(self.ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if clashing: continue state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz), donating_to=[iheavy]) states.append(state) # Check if the current configuration is interacting with a # non-static donor. If so, move on to the next dihedral angle iteration # to add another state so the donor could donate to the # water in some of its configurations. # If not, hydrogen2 is on a stable non-clashing position # and we are done. clashing = any( self._might_clash_with_nonstatic_donor( hydrogen2, self.ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if not clashing: break return states
[docs] def enumerate_donor_states(self): """ Enumerate states where water is accepting from a single donor :return: List of water states :rtype: List[_WaterState] """ oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 ct = self.ct states = [] hydrogen_angle = 180 - self.HOH_ANGLE / 2.0 oxygen_xyz = np.asarray(oxygen.xyz) heavies_aligned_to = set() for iheavy, ihydrogen in self.donors: if iheavy in heavies_aligned_to: continue # Check if the donor is a static donor, since then we know that the # hydrogen position is fixed. If it is not static then we need to # assume that the hydrogen will be pointing straight towards the # water oxygen and we can use the heavy atom to align the angle. iatom_to_alignto = ihydrogen if not ct.atom[iheavy].property.get(STATIC_PROPERTY): iatom_to_alignto = iheavy heavies_aligned_to.add(iheavy) ct.adjust(hydrogen_angle, iatom_to_alignto, oxygen, hydrogen1) hydrogen2.xyz = hydrogen1.xyz ct.adjust(-hydrogen_angle, iatom_to_alignto, oxygen, hydrogen2) axis = -self._vector_lookup[iheavy] # Keep a list of the other heavy donors to calculate clashes other_donor_heavies = list(self.donor_heavies) try: other_donor_heavies.remove(iheavy) except ValueError: pass for angle in [None, 120, 120]: if angle is not None: self.rotate_hydrogens_along_axis(axis, angle) # Check if hydrogen is clashing with static donor clashing = any( self._is_clashing_with_static_donor(ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if clashing: continue # State is valid. Add it to the pool state = _WaterState(coordinates=(hydrogen1.xyz, hydrogen2.xyz), accepting_from=[iheavy]) states.append(state) # Check if this state might be clashing with the hydrogen of a # non-static donor. If so, generate another state. clashing = any( self._might_clash_with_nonstatic_donor( hydrogen2, ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if not clashing: break return states
[docs] def rotate_hydrogens_along_axis(self, axis, angle): """ Rotate the water hydrogens along an axis by an angle. Does not return anything but moves hydrogens in place. :param axis: Axis along to rotate to :type axis: 3 floats :param angle: Angle to rotate in degrees :type angle: float """ # Rotate water along fixed oxygen - donor axis. ct = self.ct x, y, z = self.oxygen.xyz hydrogens = [int(self.hydrogen1), int(self.hydrogen2)] # Move to origin transform.translate_structure(ct, -x, -y, -z, atom_index_list=hydrogens) # Perform the rotation rotation = transform.get_rotation_matrix(axis, angle) transform.transform_structure(ct, rotation, atom_index_list=hydrogens) # Move back to original position transform.translate_structure(ct, x, y, z, atom_index_list=hydrogens)
[docs]class NetworkSolver: """Wrapper around toulbar2 that exactly solves the hydrogen bond network problem""" # Toulbar2 requires a precision to work with as it is internally working # with integers. Specify how many decimals are included. PRECISION = 5 # Maximum time that can be spend on solving the network in seconds. Either # toulbar2 finds the optimum quickly or it takes a long-long time. MAX_TIME = 30 # Name of the executable that is called TOULBAR2 = 'toulbar2'
[docs] def __init__(self, cluster, interact, upper_bound): """ :param cluster: Hydrogen bond network cluster that will be optimized :type cluster: ProtAssign.hbond_cluster :param interact: Changeable interaction lookup table. :type interact: Dict[int, Dict[int, bool]] :param upper_bound: Upper energy bound for the network energy. Required for toulbar2 :type upper_bound: float """ self.cluster = cluster self.interact = interact self.upper_bound = upper_bound # Save intermediate files in current directory if we are debugging self._dir = None if log_level < LOG_DEBUG else '.' self.setup_toulbar2_inputs()
[docs] def setup_toulbar2_inputs(self): """ Setup the input file for toulbar2. Essentially the file contains data on the variables involved, and the self and pair scores, all in a json file. """ report(LOG_BASIC, "Setting up network to solve.") cluster = self.cluster fmt = f".{self.PRECISION}f" data = { "problem": { "name": "hydrogen_network_optimization", "mustbe": "<" + format(self.upper_bound, fmt) }, "variables": { f"v{changeable.index}": changeable.nstates for changeable in cluster.changeables }, "functions": {} } # Setup the self-cost for each state functions = data["functions"] for changeable in cluster.changeables: n = changeable.index costs = [format(cost, fmt) for cost in cluster.self_scores[n]] functions[f"c{n}"] = {"scope": [f"v{n}"], "costs": costs} global_to_local_index = { changeable.index: n for n, changeable in enumerate(cluster.changeables) } # Setup the pair interaction for each state for changeable1 in cluster.changeables: for index2 in self.interact[changeable1.index]: if changeable1.index >= index2: continue # local index might not be available if the cluster size has # been capped in the ProtAssign workflow m = global_to_local_index.get(index2) if m is None: continue changeable2 = cluster.changeables[m] key = (changeable1.index, changeable2.index) pair_scores = cluster.pair_scores.get(key) if pair_scores is None: continue costs = [format(cost, fmt) for cost in pair_scores.ravel()] scope = [f"v{changeable1.index}", f"v{changeable2.index}"] key = f"c_{changeable1.index}_{changeable2.index}" functions[key] = {"scope": scope, "costs": costs} self.toulbar2_input = tempfile.NamedTemporaryFile(suffix='.cfn', dir=self._dir).name with open(self.toulbar2_input, 'w') as f: json.dump(data, f)
def __del__(self): """Clean up the json/cfn toulbar2 input file""" if log_level < LOG_DEBUG: Path(self.toulbar2_input).unlink(missing_ok=True) def _run_toulbar2(self, options=None): """ Run toulbar2 silently :param options: Additional options passed to toulbar2 :type options: Dict[str, str] :return: Output file name of toulbar2 :rtype: str """ cmd = [self.TOULBAR2, self.toulbar2_input, f'-timer={self.MAX_TIME}'] if options: for key, value in options.items(): cmd.append(f'-{key}={value}') out = tempfile.NamedTemporaryFile(dir=self._dir).name cmd.append(f'-w={out}') if log_level < LOG_DEBUG: stderr = stdout = subprocess.DEVNULL else: stdout = open("toulbar2.log", "a") stderr = open("toulbar2.err", "a") # Be very forgiving when toulbar2 fails, as ProtAssign is a central # technology. try: subprocess.run(cmd, stdout=stdout, stderr=stderr) except Exception: report(LOG_BASIC, "Toulbar2 invocation failed") return out
[docs] def parse_and_delete_output_file(self, out): """ Parse and then delete toulbar2 output file that contains the chosen state for each changeable :param out: File name of output file :type out: str :return: List of chosen state of each changeable :rtype: List[List[int]] """ solutions = [] fpath = Path(out) if not fpath.is_file(): return solutions with fpath.open() as f: for line in f: try: sol = [int(x) for x in line.strip().split()] except ValueError: report(LOG_BASIC, "Could not interpret line in toulbar2 output file") report(LOG_BASIC, f"Offending line: {line}") continue # Also check if lenght of solution is similar to number of # changeables just in case... if len(sol) != len(self.cluster.changeables): report( LOG_BASIC, "Number of changeables does not match solution length from toulbar2" ) continue solutions.append(sol) if log_level < LOG_DEBUG: fpath.unlink(missing_ok=True) return solutions
[docs] def optimal_solution(self): """ Run toulbar2 to get the optimal solution :return: Optimal state combination. If toulbar2 fails returns None :rtype: List[int] or None """ out = self._run_toulbar2() solutions = self.parse_and_delete_output_file(out) if solutions: return solutions[0]
[docs] def explore_solutions(self, upper_bound=None, number=1000): """ Run toulbar2 to greedily obtain a number of solutions with an energy below a certain upper bound. Note that there are no guarantees here about diversity, though each solution will be unique. :param upper_bound: upper bound energy. All solutions will have an energy lower than this :type upper_bound: float :param number: Max number of solutions to find :type number: int :return: List of state combinations :rtype: List[List[int]] """ report(LOG_BASIC, "Solving network.") options = {'a': f'{number}'} if upper_bound is not None: options['ub'] = format(upper_bound, f'.{self.PRECISION}f') out = self._run_toulbar2(options=options) solutions = self.parse_and_delete_output_file(out) return solutions
[docs]class ProtAssign:
[docs] class changeable: asl = 'none' max_hbond_distance = 3.5 hbond_min_angle = 150.0 hbond_heavy_min_angle = 80.0 hbond_heavy_max_angle = 140.0
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.current_state = 0 #determine self.initial_state = 0 #determine self.treated = False self.locked = False self.valid = True self.name = ''
[docs] def pre_treat_1(self, ct): return
[docs] def pre_treat_2(self, ct): return
[docs] def pre_treat(self, ct): self.pre_treat_1(ct) build.add_hydrogens(ct, atom_list=self.get_heavies()) self.pre_treat_2(ct) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): return
[docs] def lock_protonation(self): return
[docs] def add_current_to_states(self, ct): return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gap=None, verbose=False): if not self.treated: self.pre_treat(ct) self.treated = False return []
[docs] def assign_state_gap(self, atom, state_gaps, report_gaps=True): """ Write the Gap in energy between the lowest energy state and the state with different protonation states or heavy atom positions to the output ct :param atom: The atom that should have properties written to it :type atom: structure.StructureAtom :param state_gaps: The energy gaps between states for a given changeable position. :type state_gaps: A dictionary where the keys are strings (state names) and the values are floats (energy in kcals of the lowest energy combination that has that state) :param report_gaps: Whether to report the gaps to the log file as well :type report_gaps: bool """ for p in "r_pa_state_gap", : if p in atom.property: del atom.property[p] if state_gaps is None or len(state_gaps) <= 1: return min_gap = sorted(state_gaps[state] for state in state_gaps)[1] atom.property["r_pa_state_gap"] = min_gap if log_level >= LOG_EXTRA: cutoff = 100 else: cutoff = 2.5 if report_gaps and min_gap <= cutoff: report(LOG_BASIC, ( " Alternative States for {0:10s} Gap {1:5.2f} kcals/mol" .format(self.name, min_gap))) for state in sorted(state_gaps, key=lambda k: state_gaps[k]): if state_gaps[state] <= cutoff: report( LOG_BASIC, " {0:8s} {1:5.2f} kcals/mol".format( state, state_gaps[state]))
[docs] def update_atom_indices(self, ct: Structure, new_indices: Dict[int, int]): return
[docs] def get_new_index(self, ct: Structure, atom_index: int, new_indices: Dict[int, int]): return new_indices.get(atom_index)
[docs] def get_view_atoms(self): return []
[docs] def swap_atoms(self, ct, atom1, atom2): (temp_x, temp_y, temp_z) = ct.atom[atom1].xyz ct.atom[atom1].xyz = ct.atom[atom2].xyz ct.atom[atom2].xyz = (temp_x, temp_y, temp_z) return
[docs] def get_penalty(self, istate): return 0.0
[docs] def get_adjustable_atoms(self): return []
[docs] def change_pka(self, pka, pH): return False
[docs] def change_empirical_pka(self, pH): pass
[docs] def get_close_interactors(self, ct, dcell): """ Return acceptors, donors and clashers that are close to this changeable heavy atoms. :param ct: Structure with annotated atoms signfying interaction class :type ct: Structure :param dcell: Distance cell to query for neighboring atoms :type dcell: DistanceCell :returns: List of acceptors, donor heavy-hydrogen pairs, and clashers atom indices :rtype: tuple[list[int], list[tuple[int, int]], list[int]] """ acceptors = set() donors = set() clashers = set() for iatom1 in self.get_heavies(): for iatom2 in dcell.query(*ct.atom[iatom1].xyz): p = ct.atom[iatom2].property if p.get(ACCEPTOR_PROPERTY): acceptors.add(iatom2) if p.get(DONOR_PROPERTY): atom2 = ct.atom[iatom2] if atom2.atomic_number != 1: for ba in atom2.bonded_atoms: if ba.atomic_number != 1: continue if not ba.property.get(DONOR_PROPERTY): continue donor = (iatom2, int(ba)) donors.add(donor) if p.get(ION_PROPERTY): donors.add((iatom2, iatom2)) if p.get(CLASHER_PROPERTY): clashers.add(iatom2) return sorted(acceptors), sorted(donors), sorted(clashers)
[docs] class ligand_changeable(changeable): type = 'LIGAND'
[docs] def __init__(self, ct, iatom): super().__init__(ct, iatom) self.name = get_residue_string(ct.atom[iatom]) self.heavy_atom_indices = [] self.adjustable_atom_indices = [] self.iatoms_by_het = {} residue = ct.atom[iatom].getResidue() for atom in residue.atom: atom.property['s_pa_ligand_id'] = self.name atom.property['i_pa_ligand_het'] = 0 # Populate the protonation states with the current ligand state self.protonation_states = [residue.extractStructure()] self.rotatable_xyz_by_het = defaultdict(list) # List of changeables that are common in all ligand states and that # have their own changeable instance. This needs to be saved here, # to update atom indices of the changeables when ligand states are # assigned and atoms are deleted. self.shared_changeables = [] self.nstates_per_het = defaultdict(int) self.states_by_het = defaultdict(list) self.interactors_by_het = defaultdict(_Interactors)
@property def nstates(self): return sum(self.nstates_per_het.values())
[docs] def pre_treat_1(self, ct): """ Add all the protonation states to the ct. After this method the ct is a superposition of ligand states. """ # Add the ligand protonation states to the main structure. annotate_ligand_atoms(self.protonation_states, self.name) add_ligand_sts(ct, self.protonation_states) iatoms = get_ligand_atom_indices(ct, self.name) self.heavy_atom_indices = [ iatom for iatom in iatoms if ct.atom[iatom].atomic_number != 1 ]
[docs] def pre_treat_2(self, ct): """ Further annotates structure and stores data for bookkeeping and improved computational performance. Assumes pre_treat_1 has been called before. """ ligand_iatoms = get_ligand_atom_indices(ct, self.name) further_annotate_ligand_atoms(ct, ligand_iatoms) ligand_iatoms = get_ligand_atom_indices(ct, self.name) self.iatoms_by_het = group_ligand_het_states(ct, ligand_iatoms) annotate_ligand_rotatable_hydrogens(ct, ligand_iatoms) # Update annotation of rotatable hydrogens for het, iatoms in self.iatoms_by_het.items(): update_ligand_rotatables_annotation(ct, iatoms) self.interactors_by_het = defaultdict(_Interactors) for het, iatoms in self.iatoms_by_het.items(): self.interactors_by_het[het] = get_ligand_interactors( ct, iatoms)
[docs] def add_protonation_state(self, ligand_st): self.protonation_states.append(ligand_st)
[docs] def get_heavies(self): return self.heavy_atom_indices
[docs] def get_adjustable_atoms(self): return list( itertools.chain.from_iterable(self.iatoms_by_het.values()))
[docs] def get_view_atoms(self): het = self.get_het_from_state(self.current_state) return self.iatoms_by_het[het]
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): """ Enumerate ligand states The input structure is supposed to have its different protonation states available as a superposition, i.e. all protonation states of the ligand are fully integrated into the structure. A ligand state is a protonation state of the ligand and a combination of non-shared hydrogen coordinates. A non-shared hydrogen is a rotatable hydrogen that is only present in a subset of ligand protonation states. """ ## The input structure will have a super position of all ligand ## protonation states, i.e. all states are already part of the structure self.state_names = [] self.state_coordinates = defaultdict(list) self.rotatable_xyz_by_het = defaultdict(list) self.nstates_per_het = defaultdict(int) nstates = 0 for het, iatoms in self.iatoms_by_het.items(): if not include_initial and het == 0: continue self.nstates_per_het[het] = 1 self.rotatable_xyz_by_het[het] = [] for iatom in iatoms: hydrogen = ct.atom[iatom] # This checks if the atom is a hydrogen and rotatable rotatable_id = hydrogen.property.get( 'i_pa_ligand_rotatable') if rotatable_id is None: continue # Get dihedral atoms to rotate around to generate states dihedral_atoms = get_dihedral_atoms(ct, hydrogen)[::-1] hydrogen_xyzs = [] start_xyz = hydrogen.xyz for dihedral_angle in [0, 120, 240]: ct.adjust(dihedral_angle, *dihedral_atoms) hydrogen_xyzs.append(hydrogen.xyz) hydrogen.xyz = start_xyz self.rotatable_xyz_by_het[het].append(hydrogen_xyzs) self.nstates_per_het[het] *= len(hydrogen_xyzs) # Name the states for n in range(self.nstates_per_het[het]): self.state_names.append(f'State {het} | {n}') # Generate rotatable hydrogen coordinates, which is the # cartesian product of all possible hydrogen XYZs for n, xyzs in enumerate( itertools.product(*self.rotatable_xyz_by_het[het]), start=nstates): self.state_coordinates[n] = xyzs nstates += self.nstates_per_het[het]
[docs] def get_het_from_state(self, istate: int) -> int: """Get the protonation state index from the state index.""" total_states = 0 for het, n in self.nstates_per_het.items(): total_states += n if istate < total_states: return het raise RuntimeError("Could not determine hetid from state number.")
[docs] def get_rotatable_id_to_atom(self, ct, hetid: int) -> Dict[int, int]: """Return a dictionary that maps a rotatable index to its atom index""" mapper = {} for iatom in self.iatoms_by_het[hetid]: atom = ct.atom[iatom] rotatable_id = atom.property.get('i_pa_ligand_rotatable') if rotatable_id is not None: mapper[rotatable_id] = atom.index return mapper
[docs] def update_hydrogen_xyz_from_state(self, ct, istate: int): """ Update the non-shared rotatable hydrogen coordinates for a specific state. Returns None but updates xyz coordinates of non-shared rotatable hydrogens. """ rel_state = None total_states = 0 for het, n in self.nstates_per_het.items(): total_states += n if istate < total_states: rel_state = istate - (total_states - n) break if rel_state is None: raise RuntimeError( "Relative state of tautomer could not be determined.") mapper = self.get_rotatable_id_to_atom(ct, het) for rotatable_id, xyz in enumerate(self.state_coordinates[istate]): iatom = mapper[rotatable_id] atom = ct.atom[iatom] atom.xyz = xyz
[docs] def get_state_sites( self, ct, istate: int ) -> Tuple[List[int], List[Tuple[int, int]], List[int], int]: """ Updates non-shared rotatable hydrogen positions and returns atom indices for acceptors, donors, clashers and (unused) charge """ het = self.get_het_from_state(istate) current_het = self.get_het_from_state(self.current_state) if not self.treated and het != current_het: self.pre_treat(ct) self.update_hydrogen_xyz_from_state(ct, istate) i = self.interactors_by_het[het] return i.acceptors, i.donors, i.clashers, 0
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False) -> List[int]: """ Sets the ligand to a certain state and internally updates atom indices. Returns list of atom indices that need to be deleted to obtain the right state. """ het = self.get_het_from_state(istate) current_het = self.get_het_from_state(self.current_state) # Only pre-treat if the new protonation state is different than the # current one, because we only need to change some hydrogen positions # in that case. Since this function is called interactively in # Maestro, some speedup is welcome. if not self.treated and het != current_het: self.pre_treat(ct) update_rotatable_changeable_indices(ct, self.iatoms_by_het[het], self.shared_changeables) self.update_hydrogen_xyz_from_state(ct, istate) self.treated = False self.current_state = istate atoms_to_delete = [] for het2, iatoms in self.iatoms_by_het.items(): if het != het2: atoms_to_delete += iatoms return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices: Dict[int, int]): """Update stored atom indices for bookkeeping.""" het = self.get_het_from_state(self.current_state) iatoms = self.iatoms_by_het[het] self.iatoms_by_het = {het: [new_indices[iatom] for iatom in iatoms]}
[docs] def get_penalty(self, istate: int) -> float: """ Get penalty for ligand state, which is currently fully defined by the ligand protonation state. Return 0 if no penalty. """ het = self.get_het_from_state(istate) return self.protonation_states[het].property.get( 'r_pa_ligand_penalty', 0)
[docs] class amide_changeable(changeable): """ This is the primary amide -NH2 group of ASN and GLN residues. """ asl = '((res.ptype \"ASN \" AND atom.ptype \" CG \") OR (res.ptype \"GLN \" AND atom.ptype \" CD \"))' OXYGEN_PDBNAMES = [" OD1", " OE1"] NITROGEN_PDBNAMES = [" ND2", " NE2"] CARBON_PDBNAMES = [" CG ", " CD "]
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.name = '' self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.carbon = None self.oxygen = None self.nitrogen = None self.hydrogen1 = None self.hydrogen2 = None self.treated = False self.locked = False self.valid = True self.name = get_residue_string(ct.atom[iatom]) # Will contain coordinates of oxygen, nitrogen and the two # hydrogens for the two states self.state_coordinates = [] self.type = 'AMIDE' residue_atoms = ct.getResidueAtoms(iatom) for atom in residue_atoms: name = atom.pdbname if name in self.OXYGEN_PDBNAMES: self.oxygen = int(atom) elif name in self.NITROGEN_PDBNAMES: self.nitrogen = int(atom) elif name in self.CARBON_PDBNAMES: self.carbon = int(atom) if self.carbon is None or self.oxygen is None or self.nitrogen is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to missing atoms.') self.valid = False return # Make sure the nitrogen is covalently bound ONLY to the carbon # CG (for ASN) or CD (for GLN). bound_atoms = [] for bond in ct.atom[self.nitrogen].bond: if bond.atom2.atomic_number != 1 and bond.order != 0: bound_atoms.append(bond.atom2.pdbname) if len(bound_atoms ) > 1 or bound_atoms[0] not in self.CARBON_PDBNAMES: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return state_name = ct.atom[self.carbon].property.get(STATE_PROPERTY) if state_name == 'Flip': self.current_state = 1 self.initial_state = 1 else: self.current_state = 0 self.initial_state = 0
[docs] def pre_treat_2(self, ct): hydrogens = [] for ba in ct.atom[self.nitrogen].bonded_atoms: if ba.atomic_number != 1: continue ba.property['i_pa_atomindex'] = ba.index hydrogens.append(ba.index) self.hydrogen1, self.hydrogen2 = hydrogens
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): """ Generate states for amides Max 2 states are generated by swapping the oxygen and nitrogen and readjusting the hydrogens to the nitrogen. :param ct: Structure to generate states for :type ct: Structure :param do_flips: Include the flipped state of the amide :type do_flips: bool Other parameters are not used """ n_xyz = ct.atom[self.nitrogen].xyz o_xyz = ct.atom[self.oxygen].xyz h1_xyz = ct.atom[self.hydrogen1].xyz h2_xyz = ct.atom[self.hydrogen2].xyz self.state_coordinates = [[o_xyz, n_xyz, h1_xyz, h2_xyz]] self.state_names = ['No Flip'] if do_flips or self.initial_state == 1: # Extract residue structure to determine hydrogen positions in # flipped state self.swap_atoms(ct, self.nitrogen, self.oxygen) amide_st = ct.atom[ self.nitrogen].getResidue().extractStructure() self.swap_atoms(ct, self.nitrogen, self.oxygen) # Remove hydrogens and rebuild them at the nitrogen to obtain # idealized coordinates build.delete_hydrogens(amide_st) amide = next(iter(amide_st.residue)) n1, n2 = self.NITROGEN_PDBNAMES nitrogen = amide.getAtomByPdbName(n1) if nitrogen is None: nitrogen = amide.getAtomByPdbName(n2) build.add_hydrogens(amide_st, atom_list=[nitrogen]) coordinates = [n_xyz, o_xyz] for ba in nitrogen.bonded_atoms: if ba.atomic_number == 1: coordinates.append(ba.xyz) self.state_coordinates.append(coordinates) self.state_names.append('Flip') # If the state of the amide is determined in a previous run, we # want to retain the state name and its corresponding state # coordinates. To accomplish this simply reverse the state # coordinate list. if self.initial_state == 1: self.state_coordinates = self.state_coordinates[::-1] # In case there are no flips included, reduce the states if not do_flips: self.state_coordinates = self.state_coordinates[:1] self.nstates = len(self.state_coordinates)
[docs] def set_state_coordinates(self, ct, istate): """ Set coordinates of nitrogen, oxygen, and hydrogens to pre-calculated state coordinates. """ o_xyz, n_xyz, h1_xyz, h2_xyz = self.state_coordinates[istate] ct.atom[self.oxygen].xyz = o_xyz ct.atom[self.nitrogen].xyz = n_xyz ct.atom[self.hydrogen1].xyz = h1_xyz ct.atom[self.hydrogen2].xyz = h2_xyz
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): """ Assign state to amide residue. The state is changed by setting the coordinates of the nitrogen, oxygen and 2 hydrogens to pre-calculated positions. :param ct: Structure to change :type ct: Structure :param istate: State index to assign :type istate: int :param add_labels: Add labels to the carbon atom to indicate flip :type add_labels: bool :param state_gaps: List of state gaps :type state_gaps: List[float] :param verbose: Does nothing :param verbose: bool """ if not self.treated: self.pre_treat(ct) self.set_state_coordinates(ct, istate) # Annotate carbon atom carbon = ct.atom[self.carbon] carbon.label_format = "%UT" carbon.label_color = label_color state_name = self.state_names[istate] carbon.property[STATE_PROPERTY] = state_name if add_labels: label = '' if istate == 1: label = state_name carbon.label_user_text = label self.assign_state_gap(carbon, state_gaps, report_gaps=verbose) self.current_state = istate return []
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.carbon = self.get_new_index(ct, self.carbon, new_indices) self.oxygen = self.get_new_index(ct, self.oxygen, new_indices) self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices) self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices) self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices)
[docs] def get_heavies(self): return [self.oxygen, self.nitrogen]
[docs] def get_state_sites(self, ct, istate): """ Return state sites consisting of acceptors, donors, clashers and charge. :return: List of acceptor, donor, clasher atom indices and charge :rtype: Tuple[List[int], List[int, int], List[int], float] """ self.set_state_coordinates(ct, istate) return ([self.oxygen], [(self.nitrogen, self.hydrogen1), (self.nitrogen, self.hydrogen2)], [], 0.0)
[docs] def get_view_atoms(self): return [self.carbon, self.oxygen, self.nitrogen]
[docs] def get_penalty(self, istate): return 0.0
[docs] def get_adjustable_atoms(self): return [self.oxygen, self.nitrogen, self.hydrogen1, self.hydrogen2]
[docs] class histidine_changeable(changeable): """ Imidazole group of Histidine residues. """ asl = '((res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \")) AND ((atom.ptype \" CG \"))'
[docs] def __init__(self, ct, iatom): # Calculated pKa of histidine and when flipped self.pka = None self.pka_flip = None # Flags to indicate whther this histidine is forced to be neutral # or charged for pKa prediction self.force_neutral = False self.force_charged = False self.index = 0 self.name = '' self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.penalties = [] self.CG = None self.ND1 = None self.HD1 = None self.NE2 = None self.HE2 = None self.CD2 = None self.HD2 = None self.CE1 = None self.HE1 = None self.treated = False self.locked = False self.valid = True self.name = get_residue_string(ct.atom[iatom]) self.type = 'HISTIDINE' # Find hydrogen atoms: residue_atoms = ct.getResidueAtoms(iatom) for atom in residue_atoms: name = atom.pdbname.strip() if name in self.__dict__: self.__dict__[name] = int(atom) # Catch non-standard hydrogen names. elif atom.atomic_number == 1: for heavy in atom.bonded_atoms: bond = ct.getBond(heavy, atom) if bond.order == 0: continue if int(heavy) == self.ND1: self.HD1 = int(atom) elif int(heavy) == self.NE2: self.HE2 = int(atom) elif int(heavy) == self.CD2: self.HD2 = int(atom) elif int(heavy) == self.CE1: self.HE1 = int(atom) if self.CG is None or self.ND1 is None or self.NE2 is None or self.CD2 is None or self.CE1 is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to missing atoms.') self.valid = False return # Check for something covalently bound (outside of the ring): allowed_bonds = { self.ND1: {' CG ', ' CE1'}, self.NE2: {' CE1', ' CD2'}, self.CE1: {' ND1', ' NE2'}, self.CD2: {' NE2', ' CG '}, } for heavy, expected_names in allowed_bonds.items(): bound_names = [] for bond in ct.atom[heavy].bond: if bond.atom2.atomic_number != 1 and bond.order != 0: bound_names.append(bond.atom2.pdbname) if len(bound_names) > 2 or set(bound_names) != expected_names: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return flip = 0 protonate = 0 flip_tag = re.compile('Flip') if STATE_PROPERTY in ct.atom[self.CG].property and \ flip_tag.match(ct.atom[self.CG].property[STATE_PROPERTY]): flip = 1 else: flip = 0 if self.HD1 is not None: if self.HE2 is not None: protonation = 2 # HIP else: protonation = 0 # HID else: protonation = 1 # HIE self.current_state = protonation + flip * 3 self.initial_state = protonation + flip * 3 return
[docs] def pre_treat_1(self, ct): ct.atom[self.ND1].formal_charge = 1 ct.atom[self.NE2].formal_charge = 0 ct.getBond(self.ND1, self.CE1).type = structure.BondType.Double ct.getBond(self.CD2, self.NE2).type = structure.BondType.Single ct.getBond(self.CE1, self.NE2).type = structure.BondType.Single ct.getBond(self.CG, self.CD2).type = structure.BondType.Double
[docs] def pre_treat_2(self, ct): residue_atoms = ct.getResidueAtoms(self.CG) for hyd in residue_atoms: if hyd.atomic_number != 1: continue for heavy in hyd.bonded_atoms: bond = ct.getBond(heavy, hyd) if bond.order == 0: continue if int(heavy) == self.ND1: self.HD1 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) elif int(heavy) == self.NE2: self.HE2 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) if self.current_state > 2 and not self.locked: # Needs to be flipped back, for bookkeeping to be consistent. atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2), (self.NE2, self.CE1), (self.HE2, self.HE1)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) self.current_state -= 3 self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): """ Enumerate histidine states and create penalties for certain states. :param ct: Structure to generate states for :type ct: Structure :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param donors: List of donor pair atom indices :type donors: List[Tuple[int, int]] :param pH: pH of system to determine pH-related penalties :type pH: float :param do_flips: Include flipped histidine states in pool :type do_flips: bool :param include_initial: Does nothing here """ if do_flips: self.nstates = 6 self.state_names = [ 'HID', 'HIE', 'HIP', 'Flip HID', 'Flip HIE', 'Flip HIP' ] else: self.nstates = 3 self.state_names = ['HID', 'HIE', 'HIP'] if pH is None: self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] elif pH in [pH_vlow, pH_low]: self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0] else: self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0] self.contact_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for iatom in [self.HD1, self.HD2, self.HE1, self.HE2]: assert iatom is not None for iacceptor in acceptors: assert iacceptor is not None if ct.measure(iatom, iacceptor) < 2.1: formal_charge = 0.0 if ct.atom[iacceptor].formal_charge < 0.0: formal_charge = ct.atom[iacceptor].formal_charge # Check if it's a carboxylate. elif ct.atom[iacceptor].atomic_number == 8: connected_atoms = analyze.evaluate_asl( ct, 'not (atom.num %d) and withinbonds 2 (atom.num %d)' % (iacceptor, iacceptor)) for jatom in connected_atoms: if ct.atom[jatom].atomic_number == 8 and ct.atom[ jatom].formal_charge < 0.0: formal_charge = ct.atom[jatom].formal_charge if iatom == self.HD1: report( LOG_DEBUG, self.name + ' : HD1 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[1] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 self.contact_penalties[5] += 10.0 if iatom == self.HD2: report( LOG_DEBUG, self.name + ' : HD2 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[2] += 10.0 self.contact_penalties[4] += 10.0 if iatom == self.HE1: report( LOG_DEBUG, self.name + ' : HE1 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[2] += 10.0 self.contact_penalties[3] += 10.0 if iatom == self.HE2: report( LOG_DEBUG, self.name + ' : HE2 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[0] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 self.contact_penalties[5] += 10.0 # Only want to enforce this rule if not using PropKa. if (formal_charge < 0.0 or ct.atom[iacceptor].pdbname in [' OD1', ' OD2', ' OE1', ' OE2' ]) and (pH is not None): report( LOG_DEBUG, self.name + ' salt-bridged to ' + ct.atom[iacceptor].pdbres + str(ct.atom[iacceptor].resnum)) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 break for donor in donors: # ASP and GLU can also be in donors, if they're being predicted. if ct.atom[donor[0]].pdbres in [ "ASP ", "GLU " ] and ct.atom[donor[0]].pdbname in [ ' OD1', ' OD2', ' OE1', ' OE2' ]: # Must be in contact, and we aren't using PropKa. if ct.measure(iatom, donor[0]) < 2.0 and pH is not None: report( LOG_DEBUG, self.name + ' salt-bridged to ' + ct.atom[donor[0]].pdbres + str(ct.atom[donor[0]].resnum)) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 # Metal will force a particular state, which overrides all else. if donor[0] == donor[1]: if (ct.atom[donor[0]].atomic_number not in [ 7, 8 ]) and (ct.measure(iatom, donor[0]) < 2.5): if iatom == self.HD1: self.contact_penalties = [ 10.0, 0.0, 10.0, 10.0, 10.0, 10.0 ] elif iatom == self.HD2: self.contact_penalties = [ 10.0, 10.0, 10.0, 10.0, 0.0, 10.0 ] elif iatom == self.HE1: self.contact_penalties = [ 10.0, 10.0, 10.0, 0.0, 10.0, 10.0 ] elif iatom == self.HE2: self.contact_penalties = [ 0.0, 10.0, 10.0, 10.0, 10.0, 10.0 ] report( LOG_DEBUG, self.name + ' Metal ion detected, restricting state.') break self.penalties = [] for i in range(6): self.penalties.append(self.pH_penalties[i] + self.contact_penalties[i]) report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties)) return
[docs] def lock_protonation(self): if self.initial_state in [2, 5]: self.contact_penalties = [ 1000.0, 1000.0, 0.0, 1000.0, 1000.0, 0.0 ] else: self.contact_penalties = [0.0, 0.0, 1000.0, 0.0, 0.0, 1000.0] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(6) ] return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) if (istate > 2 and self.current_state <= 2) or \ (istate <= 2 and self.current_state > 2): atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2), (self.NE2, self.CE1), (self.HE2, self.HE1)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) protonation = istate - (istate // 3) * 3 atoms_to_delete = [] protonation_name = '' if protonation == 0: ct.atom[self.ND1].formal_charge = 0 ct.getBond(self.ND1, self.CE1).type = structure.BondType.Single ct.getBond(self.NE2, self.CE1).type = structure.BondType.Double protonation_name = 'HIS' atoms_to_delete = [self.HE2] elif protonation == 1: ct.atom[self.ND1].formal_charge = 0 protonation_name = 'HIE' atoms_to_delete = [self.HD1] elif protonation == 2: protonation_name = 'HIP' ct.atom[self.CG].label_format = "%UT" ct.atom[self.CG].label_color = label_color if istate > 2: prot_label = 'Flip ' + protonation_name ct.atom[self.CG].property[ STATE_PROPERTY] = 'Flip ' + protonation_name else: prot_label = protonation_name ct.atom[self.CG].property[STATE_PROPERTY] = protonation_name self.assign_state_gap(ct.atom[self.CG], state_gaps, report_gaps=verbose) if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas: pka = self.pka if istate > 2 and self.pka_flip is not None: pka = self.pka_flip label += ' %.2f' % pka ct.atom[self.CG].property[pka_property] = pka ct.atom[self.CG].label_user_text = label residue_atoms = ct.getResidueAtoms(self.CG) for atom in residue_atoms: atom.pdbres = protonation_name self.current_state = istate self.treated = False return (atoms_to_delete)
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.CG = self.get_new_index(ct, self.CG, new_indices) self.ND1 = self.get_new_index(ct, self.ND1, new_indices) self.HD1 = self.get_new_index(ct, self.HD1, new_indices) self.NE2 = self.get_new_index(ct, self.NE2, new_indices) self.HE2 = self.get_new_index(ct, self.HE2, new_indices) self.CD2 = self.get_new_index(ct, self.CD2, new_indices) self.HD2 = self.get_new_index(ct, self.HD2, new_indices) self.CE1 = self.get_new_index(ct, self.CE1, new_indices) self.HE1 = self.get_new_index(ct, self.HE1, new_indices) return
[docs] def get_heavies(self): return [self.ND1, self.NE2, self.CD2, self.CE1]
[docs] def get_state_sites(self, ct, istate): if istate == 0: return ([self.NE2], [(self.ND1, self.HD1)], [self.HD2, self.HE1], 0.0) elif istate == 1: return ([self.ND1], [(self.NE2, self.HE2)], [self.HD2, self.HE1], 0.0) elif istate == 2: return ([], [(self.ND1, self.HD1), (self.NE2, self.HE2)], [self.HD2, self.HE1], 1) elif istate == 3: return ([self.CE1], [(self.CD2, self.HD2)], [self.HD1, self.HE2], 0.0) elif istate == 4: return ([self.CD2], [(self.CE1, self.HE1)], [self.HD1, self.HE2], 0.0) elif istate == 5: return ([], [(self.CD2, self.HD2), (self.CE1, self.HE1)], [self.HD1, self.HE2], 1) return
[docs] def get_view_atoms(self): return [self.CG, self.ND1, self.NE2, self.CD2, self.CE1]
[docs] def get_penalty(self, istate): return self.penalties[istate]
[docs] def get_adjustable_atoms(self): return [ self.ND1, self.HD1, self.NE2, self.HE2, self.CD2, self.HD2, self.CE1, self.HE1 ]
[docs] def change_pka(self, pka, pH): if pka is None: reference_pka = 6.10 else: reference_pka = pka self.pka = pka # If the pKa is within 0.5 of the pH, let the scoring function # determine the protonation state if math.fabs(pH - reference_pka) <= 0.5: self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] elif pH < reference_pka: self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0] else: self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0] new_penalties = [] something_changed = False for i in range(6): new_penalties.append(self.pH_penalties[i] + self.contact_penalties[i]) if self.penalties[i] != new_penalties[i]: something_changed = True self.penalties = new_penalties return something_changed
[docs] def change_empirical_pka(self, pH): if self.pka is None: return penalty = 4.0 states_to_penalize = [] delta = self.pka - pH if delta > 0.3: states_to_penalize += [0, 1] elif delta < -0.3: states_to_penalize.append(2) if self.pka_flip is not None: delta = self.pka_flip - pH if delta > 0.3: states_to_penalize += [3, 4] elif delta < -0.3: states_to_penalize.append(5) pH_penalties = [0.0] * 6 for state in states_to_penalize: pH_penalties[state] += penalty if self.force_neutral: pH_penalties[2] += 100 * penalty pH_penalties[5] += 100 * penalty if self.force_charged: for i in (0, 1, 3, 4): pH_penalties[i] += 100 * penalty self.pH_penalties = pH_penalties self.penalties = [ p1 + p2 for p1, p2 in zip(self.contact_penalties, self.pH_penalties) ]
[docs] class carboxyl_changeable(changeable): asl = '(res.ptype \"ASP \",\"ASH \" AND atom.ptype \" CG \") OR (res.ptype \"GLU \",\"GLH \" AND atom.ptype \" CD \")'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.name = '' self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.carbon = None self.oxygen1 = None self.oxygen2 = None self.hydrogen1 = None self.hydrogen2 = None self.treated = False self.locked = False self.valid = True self.name = get_residue_string(ct.atom[iatom]) self.type = 'CARBOXYL' residue_atoms = ct.getResidueAtoms(iatom) for atom in residue_atoms: name = atom.pdbname if (atom.pdbres in ["ASP ", "ASH "] and name == " CG ") or (atom.pdbres in ["GLU ", "GLH "] and name == " CD "): self.carbon = int(atom) elif name in [' OD1', ' OE1']: self.oxygen1 = int(atom) for bonded_atom in atom.bonded_atoms: if bonded_atom.atomic_number == 1: self.hydrogen1 = int(bonded_atom) elif name in [' OD2', ' OE2']: self.oxygen2 = int(atom) for bonded_atom in atom.bonded_atoms: if bonded_atom.atomic_number == 1: self.hydrogen2 = int(bonded_atom) if self.carbon is None or self.oxygen1 is None or self.oxygen2 is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to missing atoms.') self.valid = False return # Check for something covalently bound. for oxygen in [self.oxygen1, self.oxygen2]: for bond in ct.atom[oxygen].bond: if bond.atom2.atomic_number != 1 and bond.atom2.pdbname not in [ ' CG ', ' CD ' ] and bond.order != 0: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return if self.hydrogen1 is not None: self.current_state = 1 self.initial_state = 1 elif self.hydrogen2 is not None: self.current_state = 2 self.initial_state = 2 else: self.current_state = 0 self.initial_state = 0 return
[docs] def pre_treat_1(self, ct): ct.atom[self.oxygen1].formal_charge = 1 ct.atom[self.oxygen2].formal_charge = 0 ct.getBond(self.carbon, self.oxygen1).type = structure.BondType.Double ct.getBond(self.carbon, self.oxygen2).type = structure.BondType.Single
[docs] def pre_treat_2(self, ct): residue_atoms = ct.getResidueAtoms(self.carbon) for hyd in residue_atoms: if hyd.atomic_number != 1: continue for heavy in hyd.bonded_atoms: bond = ct.getBond(heavy, hyd) if bond.order == 0: continue if int(heavy) == self.oxygen1: self.hydrogen1 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) elif int(heavy) == self.oxygen2: self.hydrogen2 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): self.contact_penalties = [0.0, 0.0, 0.0] self.nstates = 3 self.state_names = ['Charged', 'Neutral 1', 'Neutral 2'] if pH is None: self.pH_penalties = [0.0, 0.0, 0.0] elif pH in [pH_vlow]: self.pH_penalties = [5.0, 0.0, 0.0] else: self.pH_penalties = [0.0, 5.0, 5.0] for iatom in [self.hydrogen1, self.hydrogen2]: # If paired with another ASP/GLU, remove protonation penalties (but don't force protonation) for donor in donors: if ct.atom[donor[0]].pdbname in [ ' OD1', ' OD2', ' OE1', ' OE2' ] and ct.atom[donor[0]].resnum != ct.atom[iatom].resnum: if ct.measure(iatom, donor[0]) < 2.0: if iatom == self.hydrogen1: report( LOG_DEBUG, self.name + ' : hydrogen1 is near ' + str(ct.atom[donor[0]].resnum) + ':' + ct.atom[donor[0]].pdbname) self.contact_penalties = [0.0, 0.0, 0.0] elif iatom == self.hydrogen2: report( LOG_DEBUG, self.name + ' : hydrogen2 is near ' + str(ct.atom[donor[0]].resnum) + ':' + ct.atom[donor[0]].pdbname) self.contact_penalties = [0.0, 0.0, 0.0] # Check for a metal elif donor[0] == donor[1]: if (ct.atom[donor[0]].atomic_number not in [7, 8]) and ( (ct.measure(self.oxygen1, donor[0]) < 3.0) or (ct.measure(self.oxygen2, donor[0]) < 3.0)): self.contact_penalties = [0.0, 10.0, 10.0] report( LOG_DEBUG, self.name + ' Metal ion detected, restricting state.') break for iacceptor in acceptors: if ct.measure(iatom, iacceptor) < 2.0: if iatom == self.hydrogen1: report( LOG_DEBUG, self.name + ' : hydrogen1 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[2] += 10.0 elif iatom == self.hydrogen2: report( LOG_DEBUG, self.name + ' : hydrogen2 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[1] += 10.0 break self.penalties = [] for i in range(3): self.penalties.append(self.pH_penalties[i] + self.contact_penalties[i]) report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties)) return
[docs] def lock_protonation(self): if self.initial_state == 0: self.contact_penalties = [0.0, 1000.0, 1000.0] else: self.contact_penalties = [1000.0, 0.0, 0.0] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(3) ] return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) atoms_to_delete = [] ct.atom[self.carbon].label_format = "%UT" ct.atom[self.carbon].label_color = label_color if (istate == 1 and self.current_state != 1) or (istate != 1 and self.current_state == 1): atoms_to_swap = [(self.oxygen1, self.oxygen2), (self.hydrogen1, self.hydrogen2)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) # Unlike His/Asn/Gln, we aren't preserving the flip info, so update these. self.oxygen1, self.oxygen2 = self.oxygen2, self.oxygen1 self.hydrogen1, self.hydrogen2 = self.hydrogen2, self.hydrogen1 if istate == 0: protonated = False ct.atom[self.oxygen1].formal_charge = 0 ct.atom[self.oxygen2].formal_charge = -1 ct.getBond(self.carbon, self.oxygen1).type = structure.BondType.Double ct.getBond(self.carbon, self.oxygen2).type = structure.BondType.Single atoms_to_delete = [self.hydrogen1, self.hydrogen2] prot_label = '' elif istate == 1: protonated = True ct.atom[self.oxygen1].formal_charge = 0 ct.atom[self.oxygen2].formal_charge = 0 ct.getBond(self.carbon, self.oxygen1).type = structure.BondType.Single ct.getBond(self.carbon, self.oxygen2).type = structure.BondType.Double atoms_to_delete = [self.hydrogen2] prot_label = 'Neutral' elif istate == 2: protonated = True ct.atom[self.oxygen1].formal_charge = 0 ct.atom[self.oxygen2].formal_charge = 0 ct.getBond(self.carbon, self.oxygen1).type = structure.BondType.Double ct.getBond(self.carbon, self.oxygen2).type = structure.BondType.Single atoms_to_delete = [self.hydrogen1] prot_label = 'Neutral' if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas: label += ' %.2f' % self.pka ct.atom[self.carbon].property[pka_property] = self.pka ct.atom[self.carbon].label_user_text = label # Set the residue name. if ct.atom[self.carbon].pdbres[0:2] == 'AS': if protonated: protonation_name = 'ASH ' else: protonation_name = 'ASP ' if ct.atom[self.carbon].pdbres[0:2] == 'GL': if protonated: protonation_name = 'GLH ' else: protonation_name = 'GLU ' self.assign_state_gap(ct.atom[self.carbon], state_gaps, report_gaps=verbose) residue_atoms = ct.getResidueAtoms(self.carbon) for atom in residue_atoms: atom.pdbres = protonation_name self.current_state = istate self.treated = False return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.carbon = self.get_new_index(ct, self.carbon, new_indices) self.oxygen1 = self.get_new_index(ct, self.oxygen1, new_indices) self.oxygen2 = self.get_new_index(ct, self.oxygen2, new_indices) self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices) self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices) return
[docs] def get_heavies(self): return [self.oxygen1, self.oxygen2]
[docs] def get_state_sites(self, ct, istate): if istate == 0: return ([self.oxygen1, self.oxygen2], [], [], 0.0) elif istate == 1: return ([self.oxygen1, self.oxygen2], [(self.oxygen1, self.hydrogen1)], [], 0.0) elif istate == 2: return ([self.oxygen1, self.oxygen2], [(self.oxygen2, self.hydrogen2)], [], 0.0) return
[docs] def get_view_atoms(self): return [self.carbon, self.oxygen1, self.oxygen2]
[docs] def get_penalty(self, istate): return self.penalties[istate]
[docs] def get_adjustable_atoms(self): return [self.oxygen1, self.oxygen2]
[docs] def change_pka(self, pka, pH): if pka is None: reference_pka = 4.00 else: reference_pka = pka self.pka = pka # propka sometimes has some very large pKas for ASP and GLU # so we never want to force carboxlyate to be protonated. If the # propKa is lower than the reference then we should (almost) # always deprotonate. If the pKa is larger than the reference # then we should allow the hbonding pattern to determine whether # the system should be protonated. Unlike other residues, a # very large pKa will never (almost) force a protonated state. if (reference_pka >= pH): self.pH_penalties = [0.0, 0.0, 0.0] else: self.pH_penalties = [0.0, 4.0, 4.0] new_penalties = [] something_changed = False for i in range(3): new_penalties.append(self.pH_penalties[i] + self.contact_penalties[i]) if self.penalties[i] != new_penalties[i]: something_changed = True self.penalties = new_penalties return something_changed
[docs] def change_empirical_pka(self, pH): if self.pka is None: return delta = self.pka - pH if delta < 0.3: # We really do not want to protonate the Glu/Asp unless the pKa # is significanlty lower than the pH pH_penalties = [0, 100, 100] elif delta > 0.3: # Only add a modest pH penalty to the charged state in case a # clash occurs in the protonated state. pH_penalties = [10, 0, 0] self.pH_penalties = pH_penalties self.penalties = [ p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties) ]
[docs] class rotatable_changeable(changeable): asl = '((res.ptype \"CYS \",\"CYT \") AND (atom.ptype \" SG \") AND (atom.formal -1)) OR ((res.ptype \"TYR \") AND (atom.ptype \" OH \") AND (atom.formal -1)) OR (( atom.ele H AND not /C0-H0/ AND not /N0-H0/ ) AND NOT (res.ptype \"HOH\",\"DOD\",\"SPC\",\"ASH\",\"GLH\",\"ASP\",\"GLU\" ))' type = 'ROTATABLE'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.dihedrals = [] self.valid = True self.name = get_atom_string(ct.atom[iatom]) self.ionizable = False self.current_state = 0 self.initial_state = 0 # If it's a deprotonated CYS/TYR if ct.atom[iatom].pdbname == " SG " or ( ct.atom[iatom].pdbres == "TYR " and ct.atom[iatom].pdbname == " OH "): # Abort if it's coordinated to a metal for heavy_bond in ct.atom[iatom].bond: if heavy_bond.order == 0: report( LOG_EXTRA, 'Rejecting ' + self.name + ' - coordinated to a metal.') self.valid = False return # Set state to -1 now, and map it to the last state later in enumerate_states. self.current_state = -1 self.initial_state = -1 ct.atom[iatom].formal_charge = 0 build.add_hydrogens(ct, atom_list=[iatom]) for bonded_atom in ct.atom[iatom].bonded_atoms: if bonded_atom.atomic_number == 1: self.hydrogen = int(bonded_atom) break else: self.hydrogen = iatom try: dihedral_atoms = get_dihedral_atoms(ct, self.hydrogen) except: self.valid = False return if len(dihedral_atoms) < 4: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to lack of rotatability.') self.valid = False return if ct.atom[dihedral_atoms[1]].bond_total != 2: report( LOG_EXTRA, 'Rejecting ' + self.name + ' - parent has too many bonds.') self.valid = False return self.parent = dihedral_atoms[1] self.parentparent = dihedral_atoms[2] self.parentparentparent = dihedral_atoms[3] self.treated = True self.locked = False nbonds = ct.atom[self.parentparent].bond_total if nbonds == 3: self.periodicity = 2 elif nbonds == 4: self.periodicity = 3 # Whether or not the atom at a given dihedral position is heavy. self.eclipse_is_heavy = [] # Find out which are heavy/hydrogen. for atom in ct.atom[self.parentparent].bonded_atoms: if atom.atomic_number == 1: heavy = False else: heavy = True if int(atom) == self.parentparentparent: # 0 position. self.eclipse_is_heavy.append((0.0, heavy)) elif int(atom) != self.parent: dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, atom) self.eclipse_is_heavy.append((dihedral, heavy)) else: self.periodicity = 0
# Never ionize a rotatable as we don't have data a dataset to test # our accuracy #if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ', 'TYR ']: # self.ionizable = True
[docs] def pre_treat_1(self, ct): ct.atom[self.parent].formal_charge = 0 return
[docs] def pre_treat_2(self, ct): residue_atoms = ct.getResidueAtoms(self.parent) for hyd in residue_atoms: if hyd.atomic_number != 1: continue for heavy in hyd.bonded_atoms: bond = ct.getBond(heavy, hyd) if bond.order == 0: continue if int(heavy) == self.parent: self.hydrogen = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): distance_check = self.max_hbond_distance if include_initial: # Include initial state. dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, self.hydrogen) self.dihedrals.append(dihedral) self.state_names.append('Initial') self.nstates = 1 initial_nstates = self.nstates # Keep trying until at least one state is found, increasing the distance each time. while self.nstates == initial_nstates: for (atom1, atom2) in [ (acceptor, None) for acceptor in acceptors ] + donors: if atom1 == self.parent or atom1 == self.parentparent: continue # Trim any states pointing head-to-head with an N-H. distance = ct.measure(self.parent, atom1) if distance <= distance_check: angle = ct.measure(self.parentparent, self.parent, atom1) if (angle <= self.hbond_heavy_max_angle) and ( angle >= self.hbond_heavy_min_angle): # Measure what the angle to the atom is. dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, atom1) self.nstates += 1 self.dihedrals.append(dihedral) self.state_names.append("Orientation") used_donor_heavies = set() for pair in donors: if pair[0] == self.parent or pair[0] == self.parentparent: continue if pair[0] in used_donor_heavies: continue used_donor_heavies.add(pair[0]) distance = ct.measure(self.parent, pair[0]) if distance <= distance_check: # Measure what the angle to the atom is (should it be from the heavy or the H?). dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, pair[0]) # Add +/-120 to it. dihedral += 120.0 if dihedral > 180.0: dihedral -= 360.0 elif dihedral < -180.0: dihedral += 360.0 self.nstates += 1 self.dihedrals.append(dihedral) self.state_names.append("Orientation") dihedral -= 240.0 if dihedral > 180.0: dihedral -= 360.0 elif dihedral < -180.0: dihedral += 360.0 self.nstates += 1 self.dihedrals.append(dihedral) self.state_names.append("Orientation") distance_check += 0.25 if distance_check >= 50.0: self.nstates += 1 self.dihedrals.append(0.0) self.state_names.append("Orientation") break # Add one state that doesn't touch anything, just in case. if ct.atom[self.hydrogen].pdbres != "TYR ": dihedral_save = ct.measure(self.parentparentparent, self.parentparent, self.parent, self.hydrogen) for idihedral in range(36): ct.adjust( float(idihedral) * 10.0, self.parentparentparent, self.parentparent, self.parent, self.hydrogen) nclashes = analyze.evaluate_asl( ct, '(not (atom.num %d) and within 2 (atom.num %d)) and not (withinbonds 2 (atom.num %d))' % (self.hydrogen, self.hydrogen, self.hydrogen)) if len(nclashes) == 0: self.nstates += 1 self.dihedrals.append(float(idihedral) * 10.0) self.state_names.append("Default") break ct.adjust(dihedral_save, self.parentparentparent, self.parentparent, self.parent, self.hydrogen) # Trim any disallowed states. for istate in range(len(self.dihedrals) - 1, -1, -1): idihedral = self.dihedrals[istate] iname = self.state_names[istate] # Don't trim original state if (istate == 0) and include_initial: continue if ct.atom[self.hydrogen].pdbres == "TYR " and math.fabs( idihedral) > 30.0 and math.fabs(idihedral) < 150.0: if math.fabs(idihedral) < 40.0: report( LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' + str(idihedral) + ') to be more planar.') if idihedral < 0.0: self.dihedrals[istate] = -30.0 report(LOG_FULL_DEBUG, ' Adjusted to -30.0') else: self.dihedrals[istate] = 30.0 report(LOG_FULL_DEBUG, ' Adjusted to 30.0') elif math.fabs(idihedral) > 140.0: report( LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' + str(idihedral) + ') to be more planar.') if idihedral < 0.0: self.dihedrals[istate] = -150.0 report(LOG_FULL_DEBUG, ' Adjusted to -150.0') else: self.dihedrals[istate] = 150.0 report(LOG_FULL_DEBUG, ' Adjusted to 150.0') else: report( LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' + str(idihedral) + ') - too far out of plane.') self.dihedrals.remove(idihedral) self.state_names.remove(iname) self.nstates -= 1 elif self.periodicity == 3: for (angle, heavy) in self.eclipse_is_heavy: if heavy: cutoff = 30.0 else: cutoff = 10.0 if math.fabs(idihedral - angle) < cutoff: report( LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' + str(idihedral) + ') - too close to eclipsed.') try: self.dihedrals.remove(idihedral) self.state_names.remove(iname) self.nstates -= 1 except: report( LOG_NONE, 'ERROR: Failure removing state for ' + self.name + ' - Please check this residue for structural inconsistencies.' ) if ct.atom[self.hydrogen].pdbres == "TYR ": self.nstates += 1 self.dihedrals.append(0.0) self.state_names.append("Default 1") self.nstates += 1 self.dihedrals.append(180.0) self.state_names.append("Default 2") # Add standard staggered states. elif self.periodicity == 3: self.nstates += 1 self.dihedrals.append(60.0) self.state_names.append("Stagger 1") self.nstates += 1 self.dihedrals.append(180.0) self.state_names.append("Stagger 2") self.nstates += 1 self.dihedrals.append(300.0) self.state_names.append("Stagger 3") # Trim any redundant states. for istate in range(len(self.dihedrals) - 1, 0, -1): idihedral = self.dihedrals[istate] iname = self.state_names[istate] for jstate in range(istate - 1, -1, -1): jdihedral = self.dihedrals[jstate] jname = self.state_names[jstate] if abs(idihedral - jdihedral) < 10.0: report( LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' + str(idihedral) + ') - too similar to ' + jname + '(' + str(idihedral) + ')') self.dihedrals.remove(idihedral) self.state_names.remove(iname) self.nstates -= 1 break # Name the states if include_initial: for istate in range(1, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate, self.nstates - 1) else: for istate in range(0, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate + 1, self.nstates) # TODO Do not create a neutral state for these, as # there we have no insight into the accuracy of pKa prediction for # these. #if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']: # self.state_names.append('CYT ') # self.dihedrals.append(None) # self.nstates += 1 #elif ct.atom[self.parent].pdbres in ['TYR ']: # self.state_names.append('Charged') # self.dihedrals.append(None) # self.nstates += 1 self.pH_penalties = [] for istate in range(self.nstates): if self.ionizable: # Penalties will come later if using PropKa. if pH is None: self.pH_penalties.append(0.0) continue # Not using PropKa. if self.dihedrals[istate] is None: if pH is None: self.pH_penalties.append(0.0) elif pH == pH_high: self.pH_penalties.append(0.0) else: # Really don't want to deprotonate at normal pH self.pH_penalties.append(200.0) else: if pH is None: self.pH_penalties.append(0.0) elif pH == pH_high: # Really don't want to deprotonate at normal pH self.pH_penalties.append(200.0) else: self.pH_penalties.append(0.0) else: self.pH_penalties.append(0.0) if self.current_state == -1: self.current_state = self.nstates - 1 if self.initial_state == -1: self.initial_state = self.nstates - 1 self.contact_penalties = [0.0 for i in range(self.nstates)] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ] return
[docs] def lock_protonation(self): if not self.ionizable: return if self.initial_state == (self.nstates - 1): self.contact_penalties = [1000.0 for i in range(self.nstates)] self.contact_penalties[-1] = 0.0 else: self.contact_penalties = [0.0 for i in range(self.nstates)] self.contact_penalties[-1] = 1000.0 self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ] return
[docs] def add_current_to_states(self, ct): dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, self.hydrogen) self.dihedrals.append(dihedral) self.nstates += 1 self.num_user_states += 1 self.state_names.append("User %d" % self.num_user_states) self.penalties.append(0.0)
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) atoms_to_delete = [] ct.atom[self.parent].label_format = "%UT" ct.atom[self.parent].label_color = label_color if self.dihedrals[istate] is None: ct.atom[self.parent].formal_charge = -1 atoms_to_delete = [self.hydrogen] if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']: residue_name = 'CYT ' prot_label = 'CYT' elif ct.atom[self.parent].pdbres == 'TYR ': residue_name = 'TYR ' prot_label = 'Charged' else: ct.adjust(self.dihedrals[istate], self.parentparentparent, self.parentparent, self.parent, self.hydrogen) if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']: residue_name = 'CYS ' elif ct.atom[self.parent].pdbres == 'TYR ': residue_name = 'TYR ' prot_label = '' if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas: label += ' %.2f' % self.pka ct.atom[self.parent].property[pka_property] = self.pka ct.atom[self.parent].label_user_text = label self.assign_state_gap(ct.atom[self.parent], state_gaps, report_gaps=verbose) if self.ionizable: residue_atoms = ct.getResidueAtoms(self.parent) for atom in residue_atoms: atom.pdbres = residue_name self.current_state = istate self.treated = False return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.hydrogen = self.get_new_index(ct, self.hydrogen, new_indices) self.parent = self.get_new_index(ct, self.parent, new_indices) self.parentparent = self.get_new_index(ct, self.parentparent, new_indices) self.parentparentparent = self.get_new_index( ct, self.parentparentparent, new_indices)
[docs] def get_heavies(self): return [self.parent]
[docs] def get_state_sites(self, ct, istate): if self.dihedrals[istate] is None: return ([self.parent], [], [], 0.0) else: ct.adjust(self.dihedrals[istate], self.parentparentparent, self.parentparent, self.parent, self.hydrogen) return ([self.parent], [(self.parent, self.hydrogen)], [], 0.0)
[docs] def get_view_atoms(self): return [self.parent]
[docs] def get_penalty(self, istate): return self.penalties[istate]
[docs] def get_adjustable_atoms(self): return [self.hydrogen, self.parent]
[docs] def change_pka(self, pka, pH): if pka is None: reference_pka = 15.00 else: reference_pka = pka self.pka = pka if not self.ionizable: return False if reference_pka < pH: ionized = True else: ionized = False new_penalties = [] changed = False for istate, dihedral in enumerate(self.dihedrals): if dihedral is None: if ionized: new_penalties.append(0.0 + self.contact_penalties[istate]) else: # Really don't want to deprotonate at normal pH new_penalties.append(200.0 + self.contact_penalties[istate]) else: if ionized: new_penalties.append(200.0 + self.contact_penalties[istate]) else: new_penalties.append(0.0 + self.contact_penalties[istate]) if self.penalties[istate] != new_penalties[istate]: changed = True self.penalties = new_penalties return changed
[docs] def change_empirical_pka(self, pH): # Never deprotonate penalty = 200.0 pH_penalties = [] for dihedral in self.dihedrals: # Deprotonated state if dihedral is None: pH_penalties.append(penalty) else: pH_penalties.append(0) self.pH_penalties = pH_penalties self.penalties = [ p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties) ]
[docs] class amine_changeable(changeable): # Could be used for all primary amines, but for now let's stick to LYS sidechains. asl = '((res.ptype \"LYS \",\"LYN \") AND (atom.ptype \" NZ \"))'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.nitrogen = None self.carbon = None self.hyds = [] self.hyd_coords = [] self.initial_coords = [] self.locked = False self.valid = True self.name = get_residue_string(ct.atom[iatom]) self.type = 'AMINE' self.initial_coords = [] for atom in ct.atom[iatom].bonded_atoms: if atom.atomic_number == 1: self.hyds.append(int(atom)) elif self.carbon is None: self.carbon = int(atom) else: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return if len(self.hyds) not in [2, 3]: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to incorrect bound count.') self.valid = False return self.nitrogen = iatom for hyd in self.hyds: self.initial_coords.append(ct.atom[hyd].xyz) self.current_state = 0 self.initial_state = 0 return
[docs] def pre_treat_1(self, ct): ct.atom[self.nitrogen].formal_charge = 1 return
[docs] def pre_treat_2(self, ct): self.hyds = [] for hyd in ct.atom[self.nitrogen].bonded_atoms: if hyd.atomic_number == 1: self.hyds.append(int(hyd)) hyd.property['i_pa_atomindex'] = int(hyd) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, sample_neutral_states=False, include_initial=False): """ Generate states for lysines. States are generated by rotating hydrogens for acceptor/donor interactions and by optionally including the neutral state. :param ct: Structure to generate states for :type ct: Structure :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param donors: List of donor atom indices :type donors: List[(int, int)] :param pH: pH of system :type pH: float :param do_flips: Does nothing :type do_flips: bool :param sample_neutral_states: Include neutral states. Since PROPKA's pKa prediction is unreliable for Lys, currently we have no method of confidently assess whether it is neutral. So it's turned off by default. :type sample_neutral_states: bool :param include_initial: Include the initial state of the Lys :type include_initial: bool """ dihedral_atoms = get_dihedral_atoms(ct, self.hyds[0]) distance_check = self.max_hbond_distance if include_initial: self.hyd_coords.append(self.initial_coords) self.state_names.append('Initial') self.nstates += 1 initial_nstates = self.nstates while self.nstates == initial_nstates: for (atom1, atom2) in [ (acceptor, None) for acceptor in acceptors ] + donors: if atom1 == self.nitrogen: continue distance = ct.measure(self.nitrogen, atom1) if distance <= distance_check: angle = ct.measure(self.carbon, self.nitrogen, atom1) if (angle <= self.hbond_heavy_max_angle) and ( angle >= self.hbond_heavy_min_angle): dihedral = ct.measure(atom1, dihedral_atoms[1], dihedral_atoms[2], dihedral_atoms[3]) eclipsed = False for eclipse_ang in [0.0, 120.0, 240.0]: delta = math.fabs( math.fabs(dihedral) - eclipse_ang) if delta >= 360.0: delta -= 360.0 if delta < 30.0: eclipsed = True break if not eclipsed: ct.adjust(dihedral, dihedral_atoms[3], dihedral_atoms[2], dihedral_atoms[1], dihedral_atoms[0]) coords = [] for hyd in self.hyds: coords.append(ct.atom[hyd].xyz) self.hyd_coords.append(coords) self.state_names.append("Orientation") self.nstates += 1 distance_check += 0.25 if distance_check > MAX_ORIENT_HYDROGEN_DISTANCE: break # Add indexes to the names if include_initial: for istate in range(1, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate, self.nstates - 1) else: for istate in range(len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate + 1, self.nstates) # Add a standard staggered state. ct.adjust(60.0, dihedral_atoms[3], dihedral_atoms[2], dihedral_atoms[1], dihedral_atoms[0]) coords = [] for hyd in self.hyds: coords.append(ct.atom[hyd].xyz) self.hyd_coords.append(coords) self.state_names.append("Stagger") self.nstates += 1 self.pH_penalties = [] # Protonation penalties for charged forms. for istate in range(self.nstates): self.pH_penalties.append(0.0) # After generating the charged states, now generate 3 neutral # states for each charged state. if sample_neutral_states: first_to_expand = 1 if include_initial else 0 for istate in range(first_to_expand, self.nstates): coords = self.hyd_coords[istate] self.hyd_coords.append([coords[0], coords[1]]) self.state_names.append(self.state_names[istate] + ' LYN1') self.hyd_coords.append([coords[0], coords[2]]) self.state_names.append(self.state_names[istate] + ' LYN2') self.hyd_coords.append([coords[1], coords[2]]) self.state_names.append(self.state_names[istate] + ' LYN3') self.nstates += 3 if pH is None: self.pH_penalties.extend([0.0, 0.0, 0.0]) else: self.pH_penalties.extend([100.0, 100.0, 100.0]) self.contact_penalties = [0.0 for i in range(self.nstates)] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ]
[docs] def lock_protonation(self): nhyd_desired = len(self.initial_coords) for istate in range(self.nstates): if len(self.hyd_coords[istate]) == nhyd_desired: self.contact_penalties[istate] = 0.0 else: self.contact_penalties[istate] = 1000.0 self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ] return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) atoms_to_delete = [] ct.atom[self.nitrogen].label_format = "%UT" ct.atom[self.nitrogen].label_color = label_color residue_name = 'LYS ' prot_label = '' if len(self.hyd_coords[istate]) == 2: residue_name = 'LYN ' prot_label = 'Neutral' atoms_to_delete.append(self.hyds[2]) self.hyds[2] = None ct.atom[self.nitrogen].formal_charge = 0 else: ct.atom[self.nitrogen].formal_charge = 1 for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]): ct.atom[self.hyds[ihyd]].xyz = hyd_coords if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas: label += ' %.2f' % self.pka ct.atom[self.nitrogen].property[pka_property] = self.pka ct.atom[self.nitrogen].label_user_text = label self.assign_state_gap(ct.atom[self.nitrogen], state_gaps, report_gaps=verbose) residue_atoms = ct.getResidueAtoms(self.nitrogen) for atom in residue_atoms: atom.pdbres = residue_name self.current_state = istate self.treated = False return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices) self.carbon = self.get_new_index(ct, self.carbon, new_indices) for i in range(len(self.hyds)): if self.hyds[i] is not None: self.hyds[i] = self.get_new_index(ct, self.hyds[i], new_indices) return
[docs] def get_heavies(self): return [self.nitrogen]
[docs] def get_state_sites(self, ct, istate): acceptors = [] donors = [] for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]): ct.atom[self.hyds[ihyd]].xyz = hyd_coords donors.append((self.nitrogen, self.hyds[ihyd])) if len(self.hyd_coords[istate]) == 2: acceptors = [self.nitrogen] return (acceptors, donors, [], 0.0)
[docs] def get_view_atoms(self): return [self.nitrogen]
[docs] def get_penalty(self, istate): return self.penalties[istate]
[docs] def change_pka(self, pka, pH): if pka is None: reference_pka = 15.00 else: reference_pka = pka self.pka = pka neutral = reference_pka < pH new_penalties = [] changed = False for istate, hyd_coords in enumerate(self.hyd_coords): nhyds = len(hyd_coords) pH_penalty = 0 if (nhyds == 2 and not neutral) or (nhyds == 3 and neutral): pH_penalty = 100 new_penalty = pH_penalty + self.contact_penalties[istate] new_penalties.append(new_penalty) if self.penalties[istate] != new_penalty: changed = True self.penalties = new_penalties return changed
[docs] def change_empirical_pka(self, pH): # Never deprotonate pH_penalties = [] penalty = 100 for hyd_coords in self.hyd_coords: nhyds = len(hyd_coords) if nhyds == 2: pH_penalties.append(penalty) else: pH_penalties.append(0) self.pH_penalties = pH_penalties self.penalties = [ p1 + p2 for p1, p2 in zip(self.contact_penalties, pH_penalties) ]
[docs] class water_changeable(changeable): asl = "(water) AND (atom.ele O)" redundancy_tolerance = 0.5
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.name = '' self.num_user_states = 0 self.state_names = [] self.state_coordinates = [] self.oxygen = iatom self.hydrogen1 = None self.hydrogen2 = None self.valid = True self.name = get_residue_string(ct.atom[iatom]) self.type = 'WATER' self.interacts_with_nonwater = False for atom in ct.atom[iatom].bonded_atoms: if atom.atomic_number != 1: continue if self.hydrogen1 is None: self.hydrogen1 = int(atom) elif self.hydrogen2 is None: self.hydrogen2 = int(atom) else: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to excess hydrogens.') self.valid = False return if self.hydrogen1 is None or self.hydrogen2 is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to lack of hydrogens.') self.valid = False return self.treated = True self.locked = False self.current_state = 0 self.initial_state = 0
@property def nstates(self): """Return number of enumerated states""" return len(self.state_coordinates)
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): """ Generate discrete states for water, where a state is defined by the coordinates of its two hydrogens. :param ct: Structure :type ct: Structure :param acceptors: Acceptor atom indices :type acceptors: List[int] :param donors: Donor heavy-hydrogen atom indices :type donors: List[Tuple(int, int)] :param pH: Does nothing here :type pH: float :param do_flips: Does nothing here :type do_flips: bool :param include_initial: Include the current water orientation in the state list :type include_initial: bool """ # Get the atom objects for easy access to attributes oxygen = ct.atom[self.oxygen] hydrogen1 = ct.atom[self.hydrogen1] hydrogen2 = ct.atom[self.hydrogen2] # Include initial state. if include_initial: self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz)) self.state_names.append("Initial") enumerator = WaterStateEnumerator(ct, ct.atom[self.oxygen], acceptors, donors) states = enumerator.enumerate_donor_donor_states() states += enumerator.enumerate_acceptor_acceptor_states() # Only enumerate additional less favorable states if the current # pool is not too large. if len(states) < 30: states += enumerator.enumerate_donor_states() states += enumerator.enumerate_acceptor_states() for state in states: self.state_coordinates.append(state.coordinates) self.state_names.append("Orientation") # In case no acceptors or donors are nearby and no states are # generated, just include the initial state. if self.nstates == 0: self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz)) self.state_names.append("Initial") # Name the states if include_initial: for istate in range(1, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate, self.nstates - 1) else: for istate in range(0, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate + 1, self.nstates)
[docs] def add_current_to_states(self, ct): xyz1 = ct.atom[self.hydrogen1].xyz xyz2 = ct.atom[self.hydrogen2].xyz self.state_coordinates.append((xyz1, xyz2)) self.num_user_states += 1 self.state_names.append("User %d" % self.num_user_states)
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): xyz1, xyz2 = self.state_coordinates[istate] ct.atom[self.hydrogen1].xyz = xyz1 ct.atom[self.hydrogen2].xyz = xyz2 self.current_state = istate return []
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.oxygen = self.get_new_index(ct, self.oxygen, new_indices) self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices) self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices) return
[docs] def get_heavies(self): return [self.oxygen]
[docs] def get_state_sites(self, ct, istate): xyz1, xyz2 = self.state_coordinates[istate] ct.atom[self.hydrogen1].xyz = xyz1 ct.atom[self.hydrogen2].xyz = xyz2 return ([self.oxygen], [(self.oxygen, self.hydrogen1), (self.oxygen, self.hydrogen2)], [], 0.0)
[docs] def get_view_atoms(self): return [self.oxygen, self.hydrogen1, self.hydrogen2]
[docs] def get_penalty(self, istate): return 0.0
[docs] def get_adjustable_atoms(self): return [self.oxygen, self.hydrogen1, self.hydrogen2]
[docs] class hbond_cluster:
[docs] def __init__(self): self.changeables = [] self.self_scores = {} self.pair_scores = {} self.self_charges = {} self.combinations = [] self.static_acceptors = [] self.static_donors = [] self.static_clashers = [] self.max_keep = 10 self.abort_sampling = False self.num_scored = 0 self.max_score = 100000 self.use_xtal = False self.trouble = False self.failed = False # Save the cutoff values as an attribute as these can be changed # during optimization. self.polar_clash_cutoff = POLAR_CLASH_CUTOFF self.nonpolar_clash_cutoff = NONPOLAR_CLASH_CUTOFF self.acceptor_acceptor_clash_cutoff = ACCEPTOR_ACCEPTOR_CLASH_CUTOFF
[docs] def setup_xtal(self, ct, interact, clustering_distance): # Set up a lookup dictionary with changeable index as key as a set # of changeable indices for which it requres an xtal mate. self.need_xtal = defaultdict(set) if not self.use_xtal: return global_to_local_index = { c.index: n for n, c in enumerate(self.changeables) } for ichangeable in range(len(self.changeables)): changeable1 = self.changeables[ichangeable] heavies1 = changeable1.get_heavies() for index2 in interact[changeable1.index]: if index2 <= changeable1.index: continue jchangeable = global_to_local_index.get(index2) if jchangeable is None: continue changeable2 = self.changeables[jchangeable] heavies2 = changeable2.get_heavies() # See if they interact without xtal for iatom, jatom in itertools.product(heavies1, heavies2): if ct.measure(iatom, jatom) <= clustering_distance: # We don't need crystal mate for this changeable break else: self.need_xtal[changeable1.index].add(changeable2.index) self.need_xtal[changeable2.index].add(changeable1.index) report( LOG_EXTRA, self.changeables[ichangeable].name + ' and ' + self.changeables[jchangeable].name + ' interact via crystal symmetry only')
[docs] def optimize(self, ct: structure.Structure, interact: Dict[int, Set[int]], static_donors: List[Tuple[int, int]], static_acceptors: List[int], static_clashers: List[int], max_comb: int, num_sequential_cycles: int, use_propka: bool, propka_pH: float = 7.0, annotated_ct: Optional[structure.Structure] = None): """ Optimize hydrogen bond network and protonation states of changeables in this cluster :param ct: Structure containing changeables to optimize :param interact: Interaction lookup dict for changeable-changeable interactions :param static_donors: List of static donor atom indices :param static_acceptors: List of static acceptor atom indices :param static_clashers: List of static clasher atom indices :param max_comb: Maximum number of combinations at which an exhaustive search is performed. :param num_sequential_cycles: Number of optimization cycles when using heuristic search. :param use_propka: Use PROPKA to determine pKa values of changeables :param propka_pH: pH of system when using PROPKA for pKa determination :param annotated_ct: Annotated structure that may contain xtal mates for faster optimization. Passing this, skips the creation of crystal mates and structure annotation during the self-scoring phase. """ for changeable in self.changeables: if not changeable.treated: changeable.pre_treat(ct) self.static_acceptors = static_acceptors self.static_donors = static_donors self.static_clashers = static_clashers potential_combinations = 1 for changeable in self.changeables: potential_combinations *= changeable.nstates if annotated_ct is None: annotated_ct = generate_annotated_ct(ct, static_donors, static_acceptors, static_clashers, self.use_xtal) # Initialize and pre-calculate scores self.combinations = [] report(LOG_DEBUG, " Prescoring self") self.pre_score_self(annotated_ct) self.pre_score_pairs(ct, interact) report(LOG_BASIC) if potential_combinations <= max_comb: report( LOG_BASIC, ' Scoring exhaustively (%d potential combinations)...' % potential_combinations) self.score_exhaustively(ct, interact, find_all_solutions=True, tolerate_clashes=True) else: if TOULBAR2_AVAILABLE: report(LOG_BASIC, " Trying exact network solver") report(LOG_BASIC, " Stage1: Solving network exact and diversify") combinations = self.solve_network_exact_and_diversify( ct, interact, ncombinations=1) for combination in combinations: energy = self.score_combination(ct, interact, combination) self.combinations.append((combination, 0.0, energy)) if not self.combinations: if TOULBAR2_AVAILABLE: report( LOG_BASIC, " Exact network solver failed. Falling back to stochastic optimization" ) report( LOG_BASIC, ' Scoring via sequential iteration and hybridization (%d potential combinations)...' % potential_combinations) report(LOG_BASIC, ' Beginning sequential iteration...') self.score_sequentially(ct, interact, num_sequential_cycles) self.expand_solutions(ct, interact) self.recombine_solutions(ct, interact) self.trim_redundant_combinations() # Log some results report(LOG_BASIC) report(LOG_BASIC, ' Cluster results:') for i, changeable in enumerate(self.changeables): state_name = changeable.state_names[self.combinations[0][0][i]] if changeable.pka is None: msg = f' {changeable.name} : {state_name}' else: msg = f' {changeable.name} : {state_name} (pKa = {changeable.pka:.2f})' report(LOG_BASIC, msg) report(LOG_EXTRA, ' Top %d results:' % len(self.combinations)) for combination in self.combinations: report(LOG_EXTRA, ' ' + str(combination)) report(LOG_BASIC)
[docs] def solve_network_exact_and_diversify(self, ct, interact, ncombinations, delta_energy=2.0): """ Solve the network exact :param ct: Structure :type ct: Structure :param interact: Changeable interaction lookup table :type interact: Dict[int, Set[int]] :param ncombinations: Number of maximum combinations to return :type ncombinations: int :param delta_energy: Maximum energy difference of combinations generated compared to the global minimum :type delta_energy: float :return: List of combinations with first element being the global optimum combination or empty list if no optimum found :rtype: List[List[int]] """ # Get an upper bound energy by just calculating the ennergy of the # current combination and add 1 to it. current_combination = [c.current_state for c in self.changeables] upper_bound = self.score_combination(ct, interact, current_combination) + 1 solver = NetworkSolver(self, interact, upper_bound) optimal_combination = solver.optimal_solution() if optimal_combination is None: return [] optimal_energy = self.score_combination(ct, interact, optimal_combination) report( LOG_BASIC, f" Optimal network combination found with energy: {optimal_energy:.5f}" ) if ncombinations <= 1: return [optimal_combination] # Generate additional combinations for diversity To increase # diversity, reduce the set down by picking some random # combinations. We reduce here, because toulbar2 is greedy in its # generation of combinations and subsequent combinations might be # very similar to previous ones. combinations = solver.explore_solutions(upper_bound=optimal_energy + delta_energy, number=1000) # Make sure the global minimum is part of the list. if combinations: k = max(min(len(combinations), ncombinations - 1), 0) combinations = random.sample(combinations, k=k) return [optimal_combination] + combinations
[docs] def score_combination(self, ct, interact, states): # This is for calculating the energy of a single set of states, and assumes prescoring of self terms. energy = 0.0 global_to_local_index = { c.index: n for n, c in enumerate(self.changeables) } for ichangeable, istate in enumerate(states): changeable = self.changeables[ichangeable] energy += self.self_scores[changeable.index][istate] (iacceptors, idonors, iclashers, icharge) = changeable.get_state_sites(ct, istate) for index2 in interact[changeable.index]: if index2 <= changeable.index: continue jchangeable = global_to_local_index.get(index2) if jchangeable is None: continue jstate = states[jchangeable] key = (changeable.index, index2) pair_scores = self.pair_scores.get(key) if pair_scores is not None: energy += pair_scores[istate, jstate] return energy
[docs] def single_point(self, ct, interact, static_donors, static_acceptors, static_clashers, xtal_ct=None): # This is for calling from the GUI, and assumes nothing is precalculated. combination = [] for changeable in self.changeables: combination.append(changeable.current_state) if not changeable.treated: changeable.pre_treat(ct) if xtal_ct is None: self.setup_local_static(ct, static_acceptors, static_donors, static_clashers) else: self.setup_local_static_alt(xtal_ct, static_acceptors, static_donors, static_clashers) global_to_local_index = { c.index: n for n, c in enumerate(self.changeables) } self.single_point_score = 0.0 for ichangeable in range(len(self.changeables)): changeable1 = self.changeables[ichangeable] #istate = self.changeables[ichangeable].current_state istate = combination[ichangeable] (iacceptors, idonors, iclashers, icharge) = changeable1.get_state_sites(ct, istate) self.single_point_score += self.score_pair( ct, iacceptors, idonors, iclashers, icharge, self.static_acceptors, self.static_donors, self.static_clashers, 0.0, use_xtal=self.use_xtal) self.single_point_score += changeable1.get_penalty(istate) for index2 in interact[changeable1.index]: if index2 <= changeable1.index: continue jchangeable = global_to_local_index.get(index2) if jchangeable is None: continue changeable2 = self.changeables[jchangeable] jstate = combination[jchangeable] (jacceptors, jdonors, jclashers, jcharge) = changeable2.get_state_sites(ct, jstate) self.single_point_score += self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=index2 in self.need_xtal[changeable1.index]) # Restore to proper state atoms_to_delete = [] for i, changeable in enumerate(self.changeables): atoms = changeable.assign_state(ct, combination[i], False) atoms_to_delete.extend(atoms) return atoms_to_delete
[docs] def setup_local_static_alt(self, ct, static_acceptors, static_donors, static_clashers): self.static_acceptors = [] self.static_donors = [] self.static_clashers = [] base_distance = 4.0 heavies = [] for changeable in self.changeables: heavies.extend(changeable.get_heavies()) max_interaction_distance = base_distance while static_acceptors and static_donors: atoms = analyze.evaluate_asl(ct, 'within %d atom.i_pa_atomindex %s' % \ (max_interaction_distance, ','.join(str(i) for i in heavies))) for acceptor in static_acceptors: if acceptor in atoms and acceptor not in self.static_acceptors: self.static_acceptors.append(acceptor) for donor in static_donors: if donor[0] in atoms and donor not in self.static_donors: self.static_donors.append(donor) for clasher in static_clashers: if clasher in atoms and clasher not in self.static_clashers: self.static_clashers.append(clasher) if len(self.static_acceptors) > 0 and len( self.static_donors) > 0: break max_interaction_distance += 0.5 return
[docs] def setup_local_static(self, ct, static_acceptors, static_donors, static_clashers): self.static_acceptors = [] self.static_donors = [] self.static_clashers = [] base_distance = 4.0 heavies = [] for changeable in self.changeables: for atom in changeable.get_heavies(): heavies.append(atom) if len(static_acceptors) > 0: max_interaction_distance = base_distance while len(self.static_acceptors) == 0: for acceptor in static_acceptors: for atom in heavies: if measure(ct, atom1=atom, atom2=acceptor, use_xtal=self.use_xtal ) <= max_interaction_distance: self.static_acceptors.append(acceptor) break max_interaction_distance += 0.5 if len(static_donors) > 0: max_interaction_distance = base_distance while len(self.static_donors) == 0: for donor in static_donors: for atom in heavies: if measure(ct, atom1=atom, atom2=donor[0], use_xtal=self.use_xtal ) <= max_interaction_distance: self.static_donors.append(donor) break max_interaction_distance += 0.5 if len(static_clashers) > 0: max_interaction_distance = base_distance for clasher in static_clashers: for atom in heavies: if measure(ct, atom1=atom, atom2=clasher, use_xtal=self.use_xtal ) <= max_interaction_distance: self.static_clashers.append(clasher) break max_interaction_distance += 0.5 return
[docs] def pre_score_self(self, ct): """ Calculate the self score for each state. Interactions are calculated between the changeable and its static environment. """ if ct.property.get(ANNOTATED_PROPERTY): working_ct = ct else: working_ct = generate_annotated_ct(ct, self.static_donors, self.static_acceptors, self.static_clashers, use_xtal=self.use_xtal) dcell = create_distance_cell(working_ct, MAX_HEAVY_HEAVY_INTERACTION_DISTANCE, honor_pbc=False) for changeable in self.changeables: self.self_scores[changeable.index] = np.zeros( changeable.nstates, dtype=np.double) self.self_charges[changeable.index] = np.zeros( changeable.nstates, dtype=np.double) if changeable.locked: self.self_scores[changeable.index].fill(100) self.self_scores[changeable.index][ changeable.current_state] = 0 continue for istate in range(changeable.nstates): # Should be okay to use working_ct here, even though # get_state_sites alters coordinates for rotatables. The # alteration only need to persist through score_pair(). (iacceptors, idonors, iclashers, icharge) = changeable.get_state_sites(working_ct, istate) static_acceptors, static_donors, static_clashers = changeable.get_close_interactors( working_ct, dcell) # Get the score (don't need use_xtal even if xtal is on, # because we have explicit crystalmates. score = self.score_pair(working_ct, iacceptors, idonors, iclashers, icharge, static_acceptors, static_donors, static_clashers, 0.0, use_xtal=False) penalty = changeable.get_penalty(istate) total_score = score + penalty self.self_scores[changeable.index][istate] = total_score self.self_charges[changeable.index][istate] = icharge
[docs] def pre_score_pairs(self, ct, interact: Dict[int, Set[int]]): mapper = { changeable.index: changeable for changeable in self.changeables } for changeable1 in self.changeables: if changeable1.locked: istates = [changeable1.current_state] else: istates = list(range(changeable1.nstates)) for index2 in interact[changeable1.index]: if index2 <= changeable1.index: continue changeable2 = mapper.get(index2) if changeable2 is None: continue if changeable2.locked: jstates = [changeable2.current_state] else: jstates = list(range(changeable2.nstates)) key1 = (changeable1.index, changeable2.index) key2 = (changeable2.index, changeable1.index) need_xtal = changeable2.index in self.need_xtal[ changeable1.index] pair_scores = np.zeros( (changeable1.nstates, changeable2.nstates), dtype=np.double) for istate in istates: (iacceptors, idonors, iclashers, icharge) = changeable1.get_state_sites(ct, istate) for jstate in jstates: (jacceptors, jdonors, jclashers, jcharge) = changeable2.get_state_sites(ct, jstate) score = self.score_pair(ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=need_xtal) pair_scores[istate, jstate] = score self.pair_scores[key1] = pair_scores self.pair_scores[key2] = pair_scores.T
[docs] def score_pair(self, ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=False): score = 0.0 # Clasher-clasher interaction for clash1, clash2 in itertools.product(iclashers, jclashers): distance = measure(ct, atom1=clash1, atom2=clash2, use_xtal=use_xtal) clashing_term = self.calculate_clash_term( distance, self.nonpolar_clash_cutoff, base=30) score += clashing_term # Clasher-donor interaction for clashers, donors in [(iclashers, jdonors), (jclashers, idonors)]: for donor, clasher in itertools.product(donors, clashers): hydrogen = donor[1] distance = measure(ct, atom1=clasher, atom2=hydrogen, use_xtal=use_xtal) clashing_term = self.calculate_clash_term( distance, self.nonpolar_clash_cutoff, base=30) score += clashing_term # Clashing part of donor-donor interaction for donor1, donor2 in itertools.product(idonors, jdonors): h1 = donor1[1] h2 = donor2[1] distance = measure(ct, atom1=h1, atom2=h2, use_xtal=use_xtal) clashing_term = self.calculate_clash_term( distance, self.polar_clash_cutoff, base=50) score += clashing_term # Metals are part of the donor list by convention. Penalize # interaction if a donor is pointing to a metal. We do not know # upfront whether a donor is a metal, so we need to calculate # both combinations. metal_term = self.score_metal_donor(ct, donor1, donor2, use_xtal=use_xtal) metal_term += self.score_metal_donor(ct, donor2, donor1, use_xtal=use_xtal) score += metal_term # Acceptor-acceptor repulsion. Only counts for static acceptors, # i.e. acceptors that cannot donate. donor_acceptors = {d[0] for d in itertools.chain(idonors, jdonors)} for iacceptor, jacceptor in itertools.product( iacceptors, jacceptors): if iacceptor in donor_acceptors or jacceptor in donor_acceptors: continue score += self.score_acceptor_acceptor(ct, iacceptor, jacceptor, use_xtal=use_xtal) # Donor-acceptor interactions. Donors can accept from other donors acceptor_donor_pairs = ((idonors, jacceptors), (jdonors, iacceptors)) for donors, acceptors in acceptor_donor_pairs: for (heavy, hydrogen), acceptor in itertools.product( donors, acceptors): # Do not score metal - acceptor interactions if heavy == hydrogen: continue score += self.score_donor_acceptor(ct, heavy, hydrogen, acceptor, use_xtal=use_xtal) return score
[docs] def score_acceptor_acceptor(self, ct: Structure, iacceptor: int, jacceptor: int, use_xtal=False) -> float: """ Scoring function for acceptor-acceptor interactions. These interactions are considered worse than hydrogen clashes, and its base penalty is thus higher. It is set to 70 as it makes the scoring function more robust for the Asn and Gln state of 2X9E and 3UZA that interact with the ligand. No other optimization has been performed. :return: Score """ distance = measure(ct, atom1=iacceptor, atom2=jacceptor, use_xtal=use_xtal) score = self.calculate_clash_term( distance, self.acceptor_acceptor_clash_cutoff, base=40) return score
[docs] def score_donor_acceptor(self, ct, donor_heavy, donor_hydrogen, acceptor_heavy, use_xtal=False): heavy_heavy_distance = measure(ct, atom1=donor_heavy, atom2=acceptor_heavy, use_xtal=use_xtal) if heavy_heavy_distance > MAX_HEAVY_HEAVY_INTERACTION_DISTANCE: return 0.0 if heavy_heavy_distance == 0.0: name1 = get_atom_string(ct.atom[acceptor_heavy]) name2 = get_atom_string(ct.atom[donor_heavy]) report(LOG_BASIC, f'WARNING: Overlapping atoms found: {name1} - {name2}') return 0.0 # Acceptor-metal interaction score is only based on distance if donor_heavy == donor_hydrogen: return self.calculate_distance_term(heavy_heavy_distance) angle = measure(ct, atom1=donor_hydrogen, atom2=donor_heavy, atom3=acceptor_heavy, use_xtal=use_xtal) angle_term = self.calculate_angle_term(angle) if angle_term == 0: return 0.0 distance_term = self.calculate_distance_term(heavy_heavy_distance) score = distance_term * angle_term return score
[docs] @staticmethod def score_metal_donor(ct: Structure, metal: Tuple[int, int], donor: Tuple[int, int], use_xtal=False) -> float: """ Score a metal-donor interaction. Donors should not be pointing towards metals. If so, this will result in a penalty """ # Check if metal is actually a metal. It is a metal if the heavy # atom index is the same as the hydrogen index. m1, m2 = metal if m1 != m2: return 0.0 # Check that the donor is NOT a metal. dheavy, dhydrogen = donor if dheavy == dhydrogen: return 0.0 distance = measure(ct, atom1=m1, atom2=dheavy, use_xtal=use_xtal) if distance >= 3.0: return 0.0 angle = measure(ct, atom1=m1, atom2=dheavy, atom3=dhydrogen) if angle < 95: # Return a score of +5 that is between the value of a clash but # bigger than an attractive potential. return 5.0 return 0.0
[docs] @staticmethod def calculate_distance_term(distance): """Return distance dependent part of the hydrogen-bond potential functions.""" return -(3.0 / distance)**3
[docs] @staticmethod def calculate_angle_term(angle): """ Return angle dependent part of the hydrogen bond potential. :param angle: Angle in degrees formed by H-D-A, with Hydrogen, Donor and Acceptor :param angle: float :return: Score :rtype: float """ if angle >= 90.0: return 0 return math.cos(math.radians(angle))**2
[docs] @staticmethod def calculate_clash_term(distance, cutoff, base=50): """Return clash term""" if distance < cutoff: # Distinghuish between bad and very bad clashes return base * (1 + (cutoff - distance)) return 0.0
[docs] def score_exhaustively(self, ct, interact, find_all_solutions=True, tolerate_clashes=False): def recurse(changeables, combination, energy, charge, ct, interact, find_all_solutions, tolerate_clashes): if len(combination) > 0: # Abort this branch if we've picked up any clashes with the static. ichangeable = len(combination) - 1 istate = combination[-1] changeable1 = self.changeables[ichangeable] (iacceptors, idonors, iclashers, icharge) = changeable1.get_state_sites(ct, istate) self_energy = self.self_scores[changeable1.index][istate] if self_energy > 10.0 and not tolerate_clashes: report(LOG_DEBUG, ' Self clash.') return -1 energy += self_energy charge += self.self_charges[changeable1.index][istate] # Abort this branch if we've picked up any clashes with an existing changeable. for jchangeable in range(len(combination) - 2, -1, -1): changeable2 = self.changeables[jchangeable] if changeable2.index not in interact[changeable1.index]: continue jstate = combination[jchangeable] key = (changeable1.index, changeable2.index) pair_energy = self.pair_scores[key][istate, jstate] if pair_energy > 10.0 and not tolerate_clashes: report( LOG_DEBUG, ' Clash with changeable %d - %s.' % (jchangeable, changeable2.name)) return jchangeable else: energy += pair_energy if not find_all_solutions: report(LOG_DEBUG, ' Current combination: %s' % str(combination)) if changeables: # Delve another level down. clashes = [] if changeables[0].locked: new_combination = combination + [ changeables[0].current_state ] self.num_scored += 1 if not find_all_solutions and self.num_scored > self.max_score: report( LOG_BASIC, ' Failed to find valid initial combination in alotted time.' ) self.abort_sampling = True return clash = recurse(changeables[1:], new_combination, energy, charge, ct, interact, find_all_solutions, tolerate_clashes) clashes.append(clash) if self.abort_sampling: return None else: for state in range(changeables[0].nstates): new_combination = combination + [state] self.num_scored += 1 if not find_all_solutions and self.num_scored > self.max_score: report( LOG_DEBUG, ' Failed to find valid initial combination in alotted time.' ) self.abort_sampling = True return report( LOG_DEBUG, ' Trying state %d (%s) for changeable %d (%s).' % (state, changeables[0].state_names[state], len(combination), changeables[0].name)) clash = recurse(changeables[1:], new_combination, energy, charge, ct, interact, find_all_solutions, tolerate_clashes) clashes.append(clash) if self.abort_sampling: return None if self.abort_to_level is not None: if self.abort_to_level == len(combination): report(LOG_DEBUG, ' Ceasing abort.') self.abort_to_level = None else: return if None not in clashes: highest_clash = 0 for clash in clashes: if clash > highest_clash: highest_clash = clash self.abort_to_level = highest_clash report( LOG_DEBUG, ' Aborting to level %d' % self.abort_to_level) else: if energy > 10.0 and not tolerate_clashes: return # Finished. report( LOG_DEBUG, ' Final score = ' + str(energy) + ' (' + str(combination) + ')') self.combinations.append((combination, charge, energy)) if not find_all_solutions: # Only wanted one. self.abort_sampling = True return None if len(self.combinations) > self.max_keep: self.combinations.sort(key=operator.itemgetter(-1)) self.combinations = self.combinations[0:self.max_keep] self.abort_sampling = False self.abort_to_level = None self.num_scored = 0 energy = 0.0 charge = 0.0 recurse(self.changeables, [], energy, charge, ct, interact, find_all_solutions, tolerate_clashes) # Final sort. self.combinations.sort(key=lambda item: item[-1])
[docs] def score_sequentially(self, ct: structure.Structure, interact: Dict[int, Set[int]], num_sequential_cycles: int): """ This routine uses an algorithm similar to Prime's iteration to convergence. Starting from a random configuration, each species is optimized in turn, keeping the others fixed in their current state. This continues until the system reaches convergence (no more changes in the most optimal state for all residues). :param ct: input/output structure, will be modified :param interact: Interaction lookup table :param num_sequential_cycles: Number of cycles of randomization and optimization to conduct """ report(LOG_BASIC, ' Stage 1: Sequential iteration with hybridization') local_combinations = [] randomize = True num_failed_hybrids = 0 # Passes with different random starting points for iter in range(num_sequential_cycles): # Whether or not to create a random starting point, or just work with the current one if randomize: comb = [ random.randint(0, changeable.nstates - 1) for changeable in self.changeables ] # Main iteration routine. (total_energy, problem_children) = self.iterate_to_convergence( ct, interact, comb) # Append the converged solution local_combinations.append( ((comb, 0.0, total_energy), problem_children)) report( LOG_EXTRA, ' Energy for this pass: ' + str(total_energy) + ' (' + str(len(problem_children)) + ' problematic members)') for problem in problem_children: report(LOG_EXTRA, ' ' + self.changeables[problem].name) #for combi in local_combinations: # print str(combi) # Successfully found a good solution. Append it to the list and start again. if len(problem_children) == 0: report( LOG_EXTRA, ' No problematic regions for this structure, storing it and starting again.' ) self.combinations.append((comb, 0.0, total_energy)) local_combinations = [] randomize = True num_failed_hybrids = 0 continue # Try to fix the best solution with pieces of the other solutions. if (iter >= 3) and (iter % 2 == 1): comb = self.create_hybrid(local_combinations, interact) if comb is None: num_failed_hybrids += 1 if num_failed_hybrids == 5: report( LOG_EXTRA, ' Too many failed attempts at hybrid. Storing this solution and starting again.' ) local_combinations.sort(key=lambda x: x[0][-1]) self.combinations.append(local_combinations[0][0]) local_combinations = [] randomize = True num_failed_hybrids = 0 report(LOG_EXTRA, ' Randomizing...') randomize = True else: num_failed_hybrids = 0 randomize = False else: randomize = True # If none were found, at least add the current ones. if len(self.combinations) == 0: for combination in local_combinations: self.combinations.append(combination[0]) self.combinations.sort(key=operator.itemgetter(-1)) report( LOG_BASIC, ' Best solution after Stage 1: ' + str(self.combinations[0][2])) report(LOG_EXTRA, ' ' + str(self.combinations[0][0])) return
[docs] def expand_solutions(self, ct, interact): """ This takes an existing set of good solutions and generates more by deconverging them and then iterating them back to convergence. Generates at least 10 new solutions. """ report(LOG_BASIC, ' Stage 2: Diversification of existing solutions') new_combinations = [] while len(new_combinations) < 10: for comb in self.combinations: (local_comb, charge, initial_energy) = copy.deepcopy(comb) report( LOG_EXTRA, ' Diverging solution with initial energy of ' + str(initial_energy)) self.deconverge(ct, interact, local_comb) report(LOG_EXTRA, ' Now reconverging...') (final_energy, problem_children) = self.iterate_to_convergence( ct, interact, local_comb) report( LOG_EXTRA, ' Energy for reconverged solution: ' + str(final_energy)) new_combinations.append((local_comb, 0.0, final_energy)) self.combinations.extend(new_combinations) self.combinations.sort(key=operator.itemgetter(-1)) report( LOG_BASIC, ' Best solution after Stage 2: ' + str(self.combinations[0][2])) report(LOG_EXTRA, ' ' + str(self.combinations[0][0])) return
[docs] def recombine_solutions(self, ct, interact): """ This is similar to score_sequentially, but begins with some pre-existing good solutions in self.combinations, and then creates hybrids to try to improve on them. """ report(LOG_BASIC, ' Stage 3: Recombination of existing solutions') # Set up the list of combinations, plus their problem children. # Iterations should do nothing. local_combinations = [] for (comb, charge, energy) in self.combinations: (total_energy, problem_children) = self.iterate_to_convergence( ct, interact, comb, problem_cutoff=0.0) local_combinations.append( ((comb, 0.0, total_energy), problem_children)) for iter in range(30): comb = self.create_hybrid(local_combinations, interact, random_scaffold=True) if comb is None: continue (total_energy, problem_children) = self.iterate_to_convergence( ct, interact, comb, problem_cutoff=-0.5) report( LOG_EXTRA, ' Energy for this pass: ' + str(total_energy) + ' (' + str(len(problem_children)) + ' problematic members)') local_combinations.append( ((comb, 0.0, total_energy), problem_children)) for comb in local_combinations: self.combinations.append(comb[0]) self.combinations.sort(key=operator.itemgetter(-1)) report( LOG_BASIC, ' Best solution after Stage 3: ' + str(self.combinations[0][2])) report(LOG_EXTRA, ' ' + str(self.combinations[0][0])) return
[docs] def deconverge(self, ct, interact, comb, problem_cutoff=50.0): """ This starts with what is assumed to be a good solution, and then randomizes the states, but not to anything that produces a problem. """ global_to_local_index = { c.index: n for n, c in enumerate(self.changeables) } for (ichangeable, changeable) in enumerate(self.changeables): states = [i for i in range(changeable.nstates)] random.shuffle(states) for istate in states: state_energy = self.self_scores[changeable.index][istate] for changeable2_index in interact[changeable.index]: jchangeable = global_to_local_index.get( changeable2_index) # local index might not be available if the cluster size has # been capped in the ProtAssign workflow if jchangeable is None: continue jstate = comb[jchangeable] key = (changeable.index, changeable2_index) pair_scores = self.pair_scores.get(key) if pair_scores is not None: state_energy += pair_scores[istate, jstate] if state_energy < problem_cutoff: comb[ichangeable] = istate break
[docs] def iterate_to_convergence(self, ct, interact, comb, problem_cutoff=50.0): """ This iterates the combination 'comb' to convergence. Maximum of 10 cycles. """ # Create a list of indices for shuffling later. Using a different # order each time may help avoid a few local minima. global_to_local_index = { c.index: n for n, c in enumerate(self.changeables) } changeable_list = list(range(len(self.changeables))) for ipass in range(10): random.shuffle(changeable_list) problem_children = [] nchanges = 0 #for ichangeable,changeable in enumerate(self.changeables): for ichangeable in changeable_list: changeable = self.changeables[ichangeable] if changeable.locked: comb[ichangeable] = changeable.current_state continue best_state = -1 best_state_energy = 1000000 for istate in range(changeable.nstates): state_energy = self.self_scores[ changeable.index][istate] for changeable2_index in interact[changeable.index]: jchangeable = global_to_local_index.get( changeable2_index) # local index might not be available if the cluster size has # been capped in the ProtAssign workflow if jchangeable is None: continue jstate = comb[jchangeable] key = (changeable.index, changeable2_index) pair_scores = self.pair_scores.get(key) if pair_scores is None: continue state_energy += pair_scores[istate, jstate] if state_energy < best_state_energy: best_state = istate best_state_energy = state_energy if best_state != comb[ichangeable]: nchanges += 1 comb[ichangeable] = best_state report( LOG_DEBUG, ' Best score for ' + changeable.name + ' is ' + str(best_state_energy) + ' for state ' + changeable.state_names[best_state]) if best_state_energy > problem_cutoff: problem_children.append(ichangeable) if nchanges == 0: break total_energy = self.score_combination(ct, interact, comb) return (total_energy, problem_children)
[docs] def create_hybrid(self, local_combinations: List[Tuple[Tuple[List[int], float, float], List[int]]], interact: Dict[int, Set[int]], random_scaffold: bool = False) -> Optional[List[int]]: """ This takes the lowest energy solution, and for each problematic region it searches other solutions (in random order) for any which may have had better luck for just that part of the overall cluster. It then splices those solutions into the lowest energy one. If random_scaffold, then it selects a random solution as the basis in stead of the lowest energy one. :param local_combinations: List of combinations, where a combination is specified as a tuple holding the combination, total charge, and total energy, and another list of problem children. :param interact: Interaction lookup table for changeables :param random_scaffold: Choose a random starting combination, otherwise start with the first combiation """ global_to_local_index = { c.index: n for n, c in enumerate(self.changeables) } # Try to fix the best solution with pieces of the other solutions report(LOG_EXTRA, ' Creating hybrid solution...') # Sort based on energy local_combinations.sort(key=lambda x: x[0][-1]) fixes = {} neighbours = {} if random_scaffold: iscaffold = random.randint(0, len(local_combinations) - 1) else: iscaffold = 0 for problem_child in local_combinations[iscaffold][1]: # Randomize the order in which solutions are searched randomized_combinations = copy.deepcopy(local_combinations) random.shuffle(randomized_combinations) changeable1 = self.changeables[problem_child] for solution in randomized_combinations: if problem_child in solution[1]: continue combination = solution[0][0] fixes[problem_child] = combination[problem_child] for index2 in interact[changeable1.index]: jchangeable = global_to_local_index.get(index2) if jchangeable is None: continue neighbours[jchangeable] = combination[jchangeable] break fixes.update(neighbours) if len(fixes) != 0: comb = copy.deepcopy(local_combinations[0][0][0]) for (problem_child, istate) in fixes.items(): comb[problem_child] = istate return comb else: report(LOG_EXTRA, ' No valid hybrid found.') return None
[docs] def trim_redundant_combinations(self): nonredund_combs = [] existing_scores = set() for comb in self.combinations: if comb[2] not in existing_scores: nonredund_combs.append(comb) existing_scores.add(comb[2]) self.combinations = nonredund_combs return
[docs] def assign_combination(self, ct, icombination, add_labels, label_pkas, verbose=False): """ Assign a given combination to this cluster :param ct: The structure to operate on :type ct: schrodinger.Structure :param icombination: The index of the combination to assign or if this number is larger then the stored combinations, just keep the current state :type icombinations: integer :param add_labels: Whether to add labels to atoms to be seen in maestro with the current protonation state :type add_labels: bool :param label_pka: Whether to add labels for the pKa of each residue :type label_pka: bool :param verbose: Whether to report additional information to the log file about the combination chosen :type verbose: bool """ atoms_to_delete = [] if icombination < len(self.combinations): for ichangeable in range(len(self.changeables)): state_gaps = self.determine_gap(icombination, ichangeable) atoms = self.changeables[ichangeable].assign_state( ct, self.combinations[icombination][0][ichangeable], add_labels, label_pkas, state_gaps=state_gaps, verbose=verbose) atoms_to_delete.extend(atoms) else: for ichangeable in range(len(self.changeables)): changeable = self.changeables[ichangeable] atoms = changeable.assign_state(ct, changeable.current_state, add_labels, label_pkas) atoms_to_delete.extend(atoms) return atoms_to_delete
[docs] def determine_gap(self, icombination, ichangeable): """ Create a dictionary with the energy gaps to each of the various states. States that differ by only a hydrogen rotation are not considered unique :type icombination: integer :param icombination: the combination to use as the zero point. In most situations this will be the lowest energy combination ( 0 when sorted) :type ichangeable: integer :param ichangeable: The residue number ( or position number) within the cluster which will be analyzed :rparam: dictionary where the key is the name of the state or "Default" when the state is one of the staggers :rtype: dictionary with a key of string and value of a float """ def dedup_state(state): if (re.match(r"Stagger \d+ \d+/\d+", state) or re.match("Stagger", state) or re.match(r"Orientation \d+/\d+", state) or re.match(r"Default \d+ \d+/\d+", state) or re.match(r"Default \d+/\d+", state)): return "Default" return state changeable = self.changeables[ichangeable] state_gaps = {} min_score = self.combinations[icombination][2] for combination in self.combinations: state_name = dedup_state( changeable.state_names[combination[0][ichangeable]]) if state_name not in state_gaps: state_gaps[state_name] = combination[2] - min_score else: state_gaps[state_name] = min(combination[2] - min_score, state_gaps[state_name]) return state_gaps
# ProtAssign class start
[docs] def __init__( self, ct, interactive=False, do_flips=True, asl='', noprot_asl='', atoms=[], # noqa: M511 use_xtal=False, sample_waters=True, sample_acids=True, freeze_existing=False, include_initial=False, max_comb=10000, num_sequential_cycles=30, max_cluster_size=None, # The maximum number of residues seed: Optional[int] = None, # to include in each cluster or None # if no such maximum should be used logging_level=DEFAULT_LOG_LEVEL, quiet_flag=False, debug_flag=False, add_labels=True, label_pkas=False, pH=pH_neutral, use_propka=True, propka_pH=7.0, user_states=[], # noqa: M511 minimize=False, ligand_sts=None, include_epik_states=False, ): """ :param seed: Seed for random number generator :param ligand_sts: Ligand states to consider during optimization. :type ligand_sts: List[Structure] """ if seed is not None: random.seed(seed) report(LOG_DEBUG, f"Set random seed to {seed}") global log_level log_level = logging_level self.include_epik_states = include_epik_states # Alternate (backwards compatible) way of specifying output level. if quiet_flag: log_level = LOG_NONE elif debug_flag: log_level = LOG_DEBUG self.species = [ self.amide_changeable, self.amine_changeable, self.histidine_changeable, self.rotatable_changeable ] self.interactive = interactive self.do_flips = do_flips self.noprot_asl = noprot_asl self.use_xtal = use_xtal self.freeze_existing = freeze_existing self.include_initial = include_initial self.max_comb = max_comb self.num_sequential_cycles = num_sequential_cycles self.max_cluster_size = max_cluster_size self.add_labels = add_labels self.label_pkas = label_pkas self.changeables = [] self.clusters = [] self.interact = {} self.donors = [] self.acceptors = [] self.clashers = [] self.xtal_ct = None self.pH = pH self.use_propka = use_propka self.propka_pH = propka_pH self.propka_failed = False self.pka_predictors = {} # use_propka can be changed to False if PROPKA fails, so always # initialize the rule-based predictors. if USE_PROTASSIGN_PKA_PREDICTOR: # Holds the pH number for the internal pH predictor # pH should be always defined, but otherwise resort to neutral pH self.pH_predictor = PH_STRING_TO_NUMBER.get(self.pH, 7.0) self.pka_predictors = { 'his': HistidinepKaPredictor(), 'asp': AsparticAcidpKaPredictor(), 'glu': GlutamicAcidpKaPredictor(), } self.unsatisfied_donors_by_cluster = defaultdict(list) self.ligand_sts = ligand_sts or [] if self.include_epik_states: self.ligand_sts += extract_epik_states(ct, self.include_initial) self.user_states = user_states self.minimize = minimize if sample_acids: self.species.append(self.carboxyl_changeable) if sample_waters: self.species.append(self.water_changeable) # If using PropKa, don't want to use the coarse pH rules if self.use_propka: self.pH = None # Record it though self.pH_backup = pH # Fix some element types, in case they're wrong. self.fix_elements(ct) # Set up the atoms to process. asl_atoms = [] if asl == '' and len(atoms) == 0: asl = 'all' if asl: asl_atoms = analyze.evaluate_asl(ct, asl) self.target_atoms = set(atoms + asl_atoms) H_atoms = analyze.evaluate_asl(ct, 'atom.ele H') self.target_hydrogens = set(H_atoms).intersection(self.target_atoms) if self.freeze_existing: self.frozen_hydrogens = self.target_hydrogens.copy() self.freeze_existing_hydrogens(ct) else: self.frozen_hydrogens = set() self.setup(ct) self.set_user_states(ct) if not self.interactive: self.optimize(ct) self.cleanup(ct) self.summarize_pkas()
[docs] def fix_elements(self, ct): for atom in ct.atom: if atom.pdbname in [' OD1', ' OE1', ' OD2', ' OE2']: atom.atomic_number = 8 elif atom.pdbname in [' ND1', ' ND2', ' NE2']: atom.atomic_number = 7 elif atom.pdbname in [' CG ', ' CD ', ' CD2', ' CE1']: atom.atomic_number = 6 ct.retype() return
[docs] def freeze_existing_hydrogens(self, ct): atoms_to_remove = set() labile_H_atoms = set( analyze.evaluate_asl( ct, 'atom.ptype \" HD1\" OR atom.ptype \" HE2\" OR atom.ptype \"1HD2\" OR atom.ptype \"2HD2\" OR atom.ptype \"1HE2\" OR atom.ptype \"2HE2\" OR atom.ptype \"HD21\" OR atom.ptype \"HD22\" OR atom.ptype \"HE21\" OR atom.ptype \"HE22\"' )) for hydrogen in self.target_hydrogens.intersection(labile_H_atoms): if hydrogen in atoms_to_remove: continue residue_atoms = ct.getResidueAtoms(hydrogen) for residue_atom in residue_atoms: atom = int(residue_atom) atoms_to_remove.add(atom) for hydrogen in self.target_hydrogens.difference(labile_H_atoms): if hydrogen in atoms_to_remove: continue residue_atoms = ct.getResidueAtoms(hydrogen) for residue_atom in residue_atoms: atom = int(residue_atom) if ct.areBound(hydrogen, atom): atoms_to_remove.add(atom) self.target_atoms.difference_update(atoms_to_remove | self.frozen_hydrogens) return
[docs] def identify_changeables(self, ct): changeables = identify_ligand_changeables(ct, self.ligand_sts) changeables += identify_species(ct, self.species, self.target_atoms) changeables = filter_nonshared_rotatables(ct, changeables) # Give an index to each changeable for bookkeeping for n, changeable in enumerate(changeables): changeable.index = n return changeables
[docs] def setup(self, ct): charge_arginine_sidechains(ct) build.add_hydrogens(ct) self.extend_targeted_to_hyds(ct) self.changeables = self.identify_changeables(ct) if self.use_propka: try: pkas = self.run_propka(self.changeables, ct, use_xtal=self.use_xtal) except PropKaException: report( LOG_BASIC, 'WARNING: PropKa failed on this structure - disabling use of PropKa.' ) self.use_propka = False self.pH = self.pH_backup self.propka_failed = True ct.property['b_pa_PROPKA_failed'] = 1 self.remove_zero_order_bonds(ct) for item in self.changeables: item.pre_treat_1(ct) build.add_hydrogens(ct) for item in self.changeables: item.pre_treat_2(ct) annotate_atom_indices(ct) self.enumerate_changeable_states(ct) self.lock_protonation_states(ct) self.cluster(ct) if self.use_propka: self.apply_pkas(self.changeables, pkas, self.propka_pH) elif USE_PROTASSIGN_PKA_PREDICTOR: # Use new internal predictor if PROPKA is not requested and we are # using simple rules self.empirical_pka_predictor(ct) if self.interactive: # Reset back to initial states. atoms_to_delete = [] for changeable in self.changeables: atoms = changeable.assign_state(ct, changeable.initial_state, add_labels=False, label_pkas=self.label_pkas) if atoms is not None: atoms_to_delete.extend(atoms) self.delete_atoms(ct, atoms_to_delete)
[docs] def empirical_pka_predictor(self, ct): """Predict pKa of histidine and Asp/Glu based on empirical rules""" ct = ct.copy() self.annotate_structure(ct) if self.use_xtal: try: mates = analyze.generate_crystal_mates(ct, radius=4.0) for mate in mates[1:]: ct.extend(mate) except Exception: pass for resname, predictor in self.pka_predictors.items(): report(LOG_BASIC, f"Calculating pKa for residue {resname.upper()}") predictor.predict(ct) for changeable in self.changeables: residue = get_residue_from_changeable(ct, changeable) if residue is None: continue for atom in residue.atom: pka = atom.property.get('r_pka_prediction') if pka is None: continue changeable.pka = pka # For histidine we calculate two pKa for each of the flip states if self.do_flips: # Remove previous annotation for pKa prediction for atom in ct.atom: for key in atom.property: if key.startswith(('b_pka_', 'r_pka_')): del atom.property[key] self.pka_predictors['his'].predict(ct, flip=True) for changeable in self.changeables: if changeable.type != 'HISTIDINE': continue residue = get_residue_from_changeable(ct, changeable) if residue is None: continue for atom in residue.atom: pka = atom.property.get('r_pka_prediction') if pka is None: continue changeable.pka_flip = pka # Adjust penalties for changeable in self.changeables: changeable.change_empirical_pka(self.pH_predictor)
[docs] def remove_zero_order_bonds(self, ct): num_zobs = 0 for item in self.changeables: atoms = item.get_adjustable_atoms() for iatom in atoms: if iatom is None: continue deleting = True while deleting: deleting = False for bond in ct.atom[iatom].bond: if bond.order == 0: bond.atom1.deleteBond(bond.atom2) bond.atom1.property['i_pa_zob%d' % num_zobs] = 1 bond.atom2.property['i_pa_zob%d' % num_zobs] = 1 num_zobs += 1 deleting = True break ct.property['i_pa_num_zobs'] = num_zobs return
[docs] def extend_targeted_to_hyds(self, ct): # If a heavy atom is targets, so are its attached hyds. H_atoms = set(analyze.evaluate_asl( ct, 'atom.ele H')).difference(self.frozen_hydrogens & self.target_atoms) for H_atom in H_atoms: for atom in ct.atom[H_atom].bonded_atoms: if int(atom) in self.target_atoms: self.target_atoms.add(H_atom) return
[docs] def delete_atoms(self, ct, iatoms: List[int]): """Delete atoms and update stored atom indices""" ct.deleteAtoms(iatoms) new_indices = { atom.property['i_pa_atomindex']: atom.index for atom in ct.atom } for changeable in self.changeables: changeable.update_atom_indices(ct, new_indices) # Update atom indices of interactors self.acceptors = [ new_indices[iatom] for iatom in self.acceptors if iatom in new_indices ] self.clashers = [ new_indices[iatom] for iatom in self.clashers if iatom in new_indices ] self.donors = [(new_indices[iatom1], new_indices[iatom2]) for iatom1, iatom2 in self.donors if iatom1 in new_indices] annotate_atom_indices(ct)
[docs] def run_propka(self, changeables, ct, use_xtal=False): # See if we even need to bother. # If there's just a single carboxylate with an # already assigned pKa, it can't have changed. if ((len(changeables) == 1) and (changeables[0].type == 'CARBOXYL') and (changeables[0].pka is not None)): return {} # If there's nothing that depends on pH. proceed = False for changeable in changeables: if (changeable.type in ['HISTIDINE', 'CARBOXYL', 'AMINE'] or (changeable.type == 'ROTATABLE' and changeable.ionizable)): proceed = True break if not proceed: return {} report(LOG_BASIC, ' Running PROPKA to get pKas') if use_xtal: propka_ct = self.generate_mates(ct) else: propka_ct = ct pkas = {} try: calculations = ['pka'] calculator = protein.PropertyCalculator(propka_ct, 'hbondopt_propkajob') results = {} for res, data in calculator.calculateOverResidues(*calculations): if data['pka'] is not None: results[str(res)] = float(data['pka']) except: raise PropKaException('PROPKA failed') for changeable in changeables: # Need to convert to same format as PropKa uses. name = changeable.name.split(':')[0] + ':' + changeable.name.split( ':')[1][4:] name = name.rstrip() if name in results: pkas[name] = (changeable.pka, results[name]) return pkas
[docs] def generate_mates(self, ct): chains = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' ] for chain in ct.chain: if chain.name in chains: chains.remove(chain.name) try: mates = analyze.generate_crystal_mates(ct) except: report( LOG_NONE, 'Failure in generating crystal mates. Please check crystal properties.' ) sys.exit() new_ct = copy.copy(ct) for atom in new_ct.atom: atom.property['i_pa_propka_maincell'] = 1 # Need to renumber, rechain resnum = 1 chain = chains.pop() for i, mate in enumerate(mates): if i == 0: continue for res in mate.residue: res.chain = chain res.resnum = resnum resnum += 1 if resnum > 9999: chain = chains.pop() resnum = 1 new_ct.extend(mate) important_atoms = analyze.evaluate_asl( new_ct, 'fillres within 20 (atom.i_pa_propka_maincell 1)') truncated_new_ct = new_ct.extract(important_atoms) build.add_hydrogens(truncated_new_ct) return truncated_new_ct
[docs] def apply_pkas(self, changeables, changes, propka_pH): """Update pKa's coming from PROPKA""" something_changed = False for changeable in changeables: # Need to convert to same format as PropKa uses. name = changeable.name.split(':')[0] + ':' + changeable.name.split( ':')[1][4:] name = name.rstrip() if name in changes: old_pka = changeable.pka changed = changeable.change_pka(changes[name][1], propka_pH) if changed: something_changed = True if old_pka is not None: report( LOG_BASIC, ' Updating protonation penalties for %s: pKa %.2f' % (changeable.name, changeable.pka)) else: changed = changeable.change_pka(None, propka_pH) return something_changed
[docs] def find_protonation_state_changes(self, ct, clusters='all'): clusters_to_redo = [] pkas = self.run_propka(self.changeables, ct, use_xtal=self.use_xtal) for i, cluster in enumerate(self.clusters): if clusters != 'all' and i not in clusters: continue something_changed = self.apply_pkas(cluster.changeables, pkas, self.propka_pH) if something_changed: clusters_to_redo.append(i) return clusters_to_redo
[docs] def annotate_structure(self, ct): """ Annotate atoms in structure by their interaction class and whether or not they are static Internally updates the acceptors, donors and clashers attributes """ interactors = identify_all_hbonders(ct) self.acceptors = interactors.acceptors self.clashers = interactors.clashers self.donors = interactors.donors annotate_structure_interactors(ct, self.acceptors, self.donors, self.clashers) # Indicate if interactor is static. This is important to know during # generation of states. self.remove_changeables_from_hbonders() atom_iterator = itertools.chain(self.acceptors, self.clashers, *self.donors) for iatom in atom_iterator: ct.atom[iatom].property[STATIC_PROPERTY] = True return ct
[docs] def enumerate_changeable_states(self, ct): """ Enumerate all states for each changeable. Crystal symmetry mates are taken into account if requested. """ working_ct = ct.copy() self.annotate_structure(working_ct) if self.use_xtal: try: mates = analyze.generate_crystal_mates(working_ct, radius=4.0) except: report( LOG_NONE, 'Failure in generating crystal mates. Please check crystal properties.' ) sys.exit() for i, mate in enumerate(mates): for atom in mate.atom: atom.property['i_pa_xtalatom'] = i if i > 0: mates[0].extend(mate) self.xtal_ct = mates[0] # Atoms in primary image which are close to a crystal image (assume atom indices in ct and xtal_ct are the same). surface_atoms = set( analyze.evaluate_asl( self.xtal_ct, "(within 5.0 (atom.i_pa_xtalatom > 0)) and not (atom.i_pa_xtal_atom > 0)" )) for atom in ct.atom: if int(atom) in surface_atoms: atom.property['i_pa_surfaceatom'] = 1 else: atom.property['i_pa_surfaceatom'] = 0 # Get rid of extra atoms. atoms_to_delete = analyze.evaluate_asl( self.xtal_ct, "not (fillres within 4 (atom.i_pa_xtalatom 0 ) )") self.xtal_ct.deleteAtoms(atoms_to_delete) working_ct = self.xtal_ct dcell = create_distance_cell(working_ct, MAX_ORIENT_HYDROGEN_DISTANCE, honor_pbc=False) for changeable in self.changeables: acceptors, donors, _ = changeable.get_close_interactors( working_ct, dcell) changeable.enumerate_states(working_ct, acceptors, donors, self.pH, do_flips=self.do_flips, include_initial=self.include_initial)
[docs] def lock_protonation_states(self, ct): if self.noprot_asl == '': return locked_atoms = analyze.evaluate_asl(ct, self.noprot_asl) for changeable in self.changeables: atoms = changeable.get_heavies() for atom in atoms: if atom in locked_atoms: changeable.lock_protonation() return
[docs] def remove_changeables_from_hbonders(self): heavy_atom_set = set() for changeable in self.changeables: heavy_atom_set |= set(changeable.get_heavies()) self.acceptors = [a for a in self.acceptors if a not in heavy_atom_set] self.donors = [d for d in self.donors if d[0] not in heavy_atom_set] adjustable_atom_set = set() for changeable in self.changeables: adjustable_atom_set |= set(changeable.get_adjustable_atoms()) self.clashers = [ c for c in self.clashers if c not in adjustable_atom_set ]
@staticmethod def _maybe_break_clusters(labels: np.ndarray, nclusters: int, max_size: int) -> int: """ Breaks clusters larger than max_size & returns new nclusters This method will break clusters in place to with the limiting their max size with max_size. It will return the new number of clusters. """ for id in range(nclusters): indices_with_id = np.flatnonzero(labels == id) size = indices_with_id.shape[0] # Skip the first max_size atoms, and start updating from there on. start = max_size while start < size: end = start + max_size # Existing cluster ids will be [0, ..., nclusters - 1]. So we # are using nclusters as the next available cluster id and # incrementing it. labels[indices_with_id[start:end]] = nclusters nclusters += 1 start = end return nclusters @staticmethod def _get_index_clusters( N: int, interaction_matrix: Dict[int, Set[int]], max_cluster_size: Optional[int]) -> Iterator[np.ndarray]: """ Cluster interactors based on an interaction matrix :param N: Number of changeables :param interaction_matrix: Interaction lookup table :param max_cluster_size: Maximum cluster size. None is no maximum. :return: Clustered interactors """ interaction_matrix_sparse = dok_matrix((N, N), dtype=bool) for i, js in interaction_matrix.items(): interaction_matrix_sparse[i, list(js)] = True nclusters, labels = connected_components(interaction_matrix_sparse, directed=False) if max_cluster_size is not None: nclusters = ProtAssign._maybe_break_clusters( labels, nclusters, max_cluster_size) return ( np.flatnonzero(labels == icluster) for icluster in range(nclusters))
[docs] def cluster(self, ct): """Cluster changeables based on their heavies.""" # Extract all heavies from the changeables as these determine whether # they interact with other changeables. Annotate to which changeable a # heavy belongs. heavies = [] for changeable in self.changeables: for heavy in changeable.get_heavies(): ct.atom[heavy].property[ 'i_pa_changeable_index'] = changeable.index heavies.append(heavy) self.interact = calculate_interaction_matrix(ct, heavies, CLUSTERING_DISTANCE, self.use_xtal) # Clean up ct for heavy in set(heavies): del ct.atom[heavy].property['i_pa_changeable_index'] # Create clusters based on interaction N = len(self.changeables) index_clusters = self._get_index_clusters(N, self.interact, self.max_cluster_size) self.clusters = [] # Create the hbond_cluster instances from the index clusters for icluster in index_clusters: new_cluster = self.hbond_cluster() new_cluster.use_xtal = self.use_xtal new_cluster.changeables.extend( self.changeables[index] for index in icluster) # Setup record of which species need xtal symmetry to interact. new_cluster.setup_xtal(ct, self.interact, CLUSTERING_DISTANCE) self.clusters.append(new_cluster) # Report. report(LOG_BASIC) report(LOG_BASIC, 'Clusters to optimize:') for i, cluster in enumerate(self.clusters): report(LOG_BASIC, (" Cluster %d:" % (i + 1))) for item in cluster.changeables: report(LOG_BASIC, (" " + item.name)) report(LOG_BASIC)
[docs] def set_user_states(self, ct): for choice in self.user_states: (user_name, user_state) = choice found_name = False for ichangeable, changeable in enumerate(self.changeables): # Remove the pdbres name = changeable.name[0] + ':' + changeable.name[6:].rstrip() if name == user_name: found_name = True found_state = False for istate, state in enumerate(changeable.state_names): if state == user_state: self.assign_state_of_changeable( ct, ichangeable, istate) self.changeables[ichangeable].locked = True found_state = True if not found_name: print('ERROR: did not find species %s' % user_name) sys.exit() elif not found_state: print('ERROR: did not find state %s for species %s' % (user_state, user_name)) sys.exit() return
[docs] def assign_state_of_changeable(self, ct, ichangeable, istate): atoms_to_delete = self.changeables[ichangeable].assign_state( ct, istate, self.add_labels, self.label_pkas) if atoms_to_delete is not None: self.delete_atoms(ct, atoms_to_delete) return
[docs] def increment_state_of_changeable(self, ct, ichangeable): if (self.changeables[ichangeable].current_state + 1) == self.changeables[ichangeable].nstates: self.assign_state_of_changeable(ct, ichangeable, 0) else: self.assign_state_of_changeable( ct, ichangeable, self.changeables[ichangeable].current_state + 1) return
[docs] def decrement_state_of_changeable(self, ct, ichangeable): if self.changeables[ichangeable].current_state == 0: self.assign_state_of_changeable( ct, ichangeable, self.changeables[ichangeable].nstates - 1) else: self.assign_state_of_changeable( ct, ichangeable, self.changeables[ichangeable].current_state - 1) return
[docs] def assign_best_combinations(self, ct, last_time=False): """ Assign the best combinations to the ct and report output :param ct: The structure to operate on :type ct: schrodinger.Structure :param last_time: Whether or not this is the last time through when we should be extra verbose :type last_time: bool """ atoms_to_delete = [] for cluster in self.clusters: atoms = cluster.assign_combination(ct, 0, self.add_labels, self.label_pkas, verbose=last_time) atoms_to_delete.extend(atoms) self.delete_atoms(ct, atoms_to_delete)
[docs] def assign_cluster_combination(self, ct, icluster, icombination): atoms = self.clusters[icluster].assign_combination( ct, icombination, self.add_labels, self.label_pkas) self.delete_atoms(ct, atoms) return
[docs] def single_point_cluster(self, ct, icluster): atoms = self.clusters[icluster].single_point(ct, self.interact, self.donors, self.acceptors, self.clashers) self.delete_atoms(ct, atoms) atoms = self.clusters[icluster].assign_combination( ct, 1000000000, self.add_labels, self.label_pkas) self.delete_atoms(ct, atoms) return self.clusters[icluster].single_point_score
[docs] def optimize_cluster(self, ct, icluster, assign=True): self.clusters[icluster].optimize(ct, self.interact, self.donors, self.acceptors, self.clashers, self.max_comb, self.num_sequential_cycles, self.use_propka, propka_pH=self.propka_pH) if assign: self.assign_cluster_combination(ct, icluster, 0) return
[docs] def optimize(self, ct): clusters_to_do = [i for i in range(len(self.clusters))] niter = 0 niter_max = 2 if USE_PROTASSIGN_PKA_PREDICTOR and not self.use_propka: niter_max = 20 total_score = 0.0 while (len(clusters_to_do) > 0) and (niter < niter_max): niter += 1 total_score = 0.0 # Generate annotated structure for faster optimization, especially # for Xtal structures. for icluster, cluster in enumerate(self.clusters): if icluster in clusters_to_do: for changeable in cluster.changeables: if not changeable.treated: changeable.pre_treat(ct) annotated_ct = generate_annotated_ct(ct, self.donors, self.acceptors, self.clashers, use_xtal=self.use_xtal) for i, cluster in enumerate(self.clusters): if i not in clusters_to_do: total_score += cluster.combinations[0][-1] continue report(LOG_BASIC, 'Working on cluster #%d...' % (i + 1)) for changeable in cluster.changeables: report( LOG_BASIC, ' ' + changeable.name + ' : ' + str(changeable.nstates) + ' states') cluster.optimize(ct, self.interact, self.donors, self.acceptors, self.clashers, self.max_comb, self.num_sequential_cycles, self.use_propka, propka_pH=self.propka_pH, annotated_ct=annotated_ct) # Just in case something went wrong. try: total_score += cluster.combinations[0][-1] except: pass self.assign_best_combinations(ct) # Now see if any protonation states need to be redone. if self.use_propka and niter < niter_max: clusters_to_do = self.recalculate_propka_pkas(ct) elif USE_PROTASSIGN_PKA_PREDICTOR and niter < niter_max: clusters_to_do = self.recalculate_empirical_pkas(ct, niter) else: clusters_to_do = [] self.assign_best_combinations(ct, last_time=True) if self.minimize: report(LOG_BASIC, 'Minimizing hydrogens...\n') self.minimize_hydrogens(ct) report(LOG_BASIC, 'Total score for structure: ' + str(total_score))
[docs] def recalculate_empirical_pkas(self, ct, iteration): """Only recalculate pKa's of histidines for forced acceptor interactions""" assert iteration > 0 report(LOG_BASIC, "Checking for unsatisfied donors in hydrogen bond networks") working_ct = ct.copy() self.annotate_structure(working_ct) # Check unsatisfied donors before creating crystal mates, as # determining bulk solvent is not yet working with crystal mates # present. unsatisfied_donors = get_unsatisfied_donors(working_ct, include_bsa=False) unsatisfied_donors_indices = {a.index for a in unsatisfied_donors} if self.use_xtal: try: mates = analyze.generate_crystal_mates(working_ct, radius=4.0) for mate in mates[1:]: working_ct.extend(mate) except Exception: pass clusters_to_do = [] for icluster, cluster in enumerate(self.clusters): prev_unsatisfied_donors = None if iteration > 1: prev_unsatisfied_donors = self.unsatisfied_donors_by_cluster[ icluster] # Extract unsatisfied donors that are not bulk solvent accessible # and histidines unsatisfied_donors_in_cluster = [] hips = [] for changeable in cluster.changeables: for iheavy in changeable.get_heavies(): if iheavy in unsatisfied_donors_indices: unsatisfied_donors_in_cluster.append(iheavy) if changeable.type == 'HISTIDINE': hips.append(changeable) self.unsatisfied_donors_by_cluster[ icluster] = unsatisfied_donors_in_cluster if unsatisfied_donors_in_cluster: report(LOG_BASIC, f"Unsatisfied donors found in cluster {icluster}:") for idonor in unsatisfied_donors_in_cluster: atom = working_ct.atom[idonor] report( LOG_BASIC, f" {atom.pdbres} {atom.chain}{atom.resnum}{atom.inscode} {atom.pdbname}" ) # There is nothing to change, since there are no HIP residues or # forced neutral states. if not hips: continue # Penalize the charged states for a histidine. check the current # penalties to determine which histidines have already been # penalized. current_unsatisfied_donors = self.unsatisfied_donors_by_cluster[ icluster] if iteration == 1: hip_to_penalize_index = 0 else: hip_to_penalize_index = None redo_cluster = False for n, hip in enumerate(hips): if hip.force_neutral: redo_cluster = True hip.force_neutral = False hip_to_penalize_index = n + 1 if len(current_unsatisfied_donors) < len( prev_unsatisfied_donors): # Forced accepter interaction found for histidine. pKa # is acutally lowered cause the shift is a negative number. if hip.current_state in (0, 1): hip.pka += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT elif hip.current_state in (3, 4): hip.pka_flip += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT report( LOG_BASIC, f"Found forced acceptor state for {hip.name}. Lowering pKa to {hip.pka:.2f}" ) # This will reset the pH penalties etc. hip.change_empirical_pka(self.pH_predictor) break if hip.force_charged: redo_cluster = True hip.force_charged = False hip_to_penalize_index = n + 1 pkas = [ pka for pka in (hip.pka, hip.pka_flip) if pka is not None ] # Don't apply forced acceptor rule if pKa is already very # low or if we still get the neutral form. if hip.current_state not in [2, 5] or min(pkas) < 4.0: hip.change_empirical_pka(self.pH_predictor) break if len(current_unsatisfied_donors) > len( prev_unsatisfied_donors): if hip.current_state == 2: hip.pka += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT elif hip.current_state == 5: hip.pka_flip += HistidinepKaPredictor.FORCED_ACCEPTOR_PKA_SHIFT report( LOG_BASIC, f"Found forced acceptor state for {hip.name}. Lowering pKa to {hip.pka:.2f}" ) # Reset the number of unsatisfied donors to the lower # number as we had to force this histidine to be # charged and it would be neutral in the non-forced # state. self.unsatisfied_donors_by_cluster[ icluster] = prev_unsatisfied_donors # This will reset the pH penalties etc. hip.change_empirical_pka(self.pH_predictor) break if redo_cluster: clusters_to_do.append(icluster) if hip_to_penalize_index is None or hip_to_penalize_index >= len( hips): continue # Don't force neutral or charged state of histidine if the pKa is # already very low. for i in range(hip_to_penalize_index, len(hips)): hip = hips[i] pkas = [ pka for pka in (hip.pka, hip.pka_flip) if pka is not None ] if min(pkas) > 4.0: break report( LOG_BASIC, f"Skipping forced acceptor state search for {hip.name} due to already low pKa." ) else: # If we reach the end we are done, and can check the next cluster. continue if hip.current_state in [2, 5]: hip.force_neutral = True report( LOG_BASIC, f"Recalculating hydrogen bond network using forced neutral state for {hip.name}" ) elif hip.pka: hip.force_charged = True report( LOG_BASIC, f"Recalculating hydrogen bond network using forced charged state for {hip.name}" ) # This will update the penalties hip.change_empirical_pka(self.pH_predictor) clusters_to_do.append(icluster) return sorted(set(clusters_to_do))
[docs] def recalculate_propka_pkas(self, ct): report(LOG_BASIC, '') report( LOG_BASIC, 'Recalculating pKas to identify changes in protonation penalties...' ) self.assign_best_combinations(ct) try: clusters_to_do = self.find_protonation_state_changes(ct) except PropKaException: report( LOG_NONE, 'WARNING: PropKa failed when looking for changes in protonation state penalties.' ) clusters_to_do = [] if len(clusters_to_do) > 0: report( LOG_BASIC, ' The following clusters have new protonation state penalties, and will be re-run:' ) report(LOG_BASIC, ' ' + ','.join([str(i + 1) for i in clusters_to_do])) else: report(LOG_BASIC, ' No changes in protonation penalties found.') report(LOG_BASIC, '') return clusters_to_do
[docs] def minimize_hydrogens(self, ct): hydrogens = [] for cluster in self.clusters: for changeable in cluster.changeables: if changeable.type in ['ROTATABLE', 'WATER']: changeable_sites = changeable.get_state_sites( ct, changeable.current_state) hydrogens += [i[1] for i in changeable_sites[1]] minimizer = minimize.Minimizer(min_method=minimize.MinCG) minimizer.setStructure(ct) for atom in ct.atom: if int(atom) not in hydrogens: minimizer.addPosFrozen(int(atom)) minimizer.minimize()
[docs] def restore_zobs(self, ct): num_zobs = ct.property.get('i_pa_num_zobs') if num_zobs is None: return for i in range(num_zobs): atoms = analyze.evaluate_asl(ct, '(atom.i_pa_zob%d = 1)' % i) if len(atoms) == 2: ct.addBond(atoms[0], atoms[1], 0) for iatom in atoms: del ct.atom[iatom].property['i_pa_zob%d' % i] del ct.property['i_pa_num_zobs'] return
[docs] def cleanup(self, ct): self.restore_zobs(ct) atoms_to_delete = analyze.evaluate_asl( ct, "withinbonds 1 (atom.i_pa_xtalatom 1)") for atom in ct.atom: for name in [ 's_pa_ligand_id', 'i_pa_ligand_het', 'i_pa_ligand_rotatable' ]: atom.property.pop(name, None) if len(atoms_to_delete) == 0: ct.retype() return ct.deleteAtoms(atoms_to_delete) for atom in ct.atom: if 'i_pa_atomindex' in atom.property: del atom.property['i_pa_atomindex'] if 'i_pa_xtalatom' in atom.property: del atom.property['i_pa_xtalatom'] if 'i_pa_surfaceatom' in atom.property: del atom.property['i_pa_surfaceatom'] ct.retype() return
[docs] def summarize_pkas(self): first = True for changeable in self.changeables: if changeable.pka is None: continue if first: report(LOG_BASIC, '\nFinal pKas:') first = False pka = changeable.pka # Histidine has two pKa's, one for each flip state if changeable.type == 'HISTIDINE' and changeable.current_state > 2: if changeable.pka_flip is not None: pka = changeable.pka_flip report(LOG_BASIC, f' {changeable.name} : {pka:.2f}')
[docs]def annotate_atom_indices(ct): """Add atom index as an atom property to each atom""" for atom in ct.atom: atom.property['i_pa_atomindex'] = atom.index
[docs]def identify_species(ct, species: List[Type], target_atoms) -> List[ProtAssign.changeable]: """ Identify all changeables, i.e. elements in the network that can be changed apart from ligand changeables. """ # Now identify the other changeable types changeables = [] for species in species: iatoms = analyze.evaluate_asl(ct, species.asl) for iatom in iatoms: if iatom not in target_atoms: continue new_changeable = species(ct, iatom) if not new_changeable.valid: continue changeables.append(new_changeable) return changeables
[docs]def identify_ligand_changeables( ct, ligand_sts: List[Structure]) -> List[ProtAssign.ligand_changeable]: """ Identify ligand changeables given a list of structures. Ligand changeables are identified by comparing the residue ID of given ligand structures with residues that are present in the Structure. """ changeables = [] if not ligand_sts: return changeables # Create a lookup table from ligand id to the ligand structure. ligand_id_to_sts = defaultdict(list) for ligand_st in ligand_sts: residues = list(ligand_st.residue) if len(residues) != 1: name = ' - '.join([residue.pdbres.strip() for residue in residues]) report( LOG_BASIC, f"Only ligand consisting of a single residue are currently supported. Skipping ligand ({name})" ) continue residue = list(ligand_st.residue)[0] ligand_id = (residue.chain, residue.resnum, residue.inscode, residue.pdbres) ligand_id_to_sts[ligand_id].append(ligand_st) # Check for each its residue if it's a ligand by generating a ligand-id # and if so, generate a ligand_changeable for residue in ct.residue: residue_id = (residue.chain, residue.resnum, residue.inscode, residue.pdbres) ligand_sts = ligand_id_to_sts[residue_id] if not ligand_sts: continue changeable = ProtAssign.ligand_changeable(ct, residue.atom[1]) for ligand_st in ligand_sts: changeable.add_protonation_state(ligand_st) changeables.append(changeable) return changeables
[docs]def identify_nonshared_rotatable(ct, ligand_changeables: List[ ProtAssign.ligand_changeable], changeable: ProtAssign.changeable) -> bool: """ Check if changeable is part of ligand and whether it can be its own changeable or needs to be sampled within the ligand states. :return: True if changeable is a non-shared rotatable across ligand states, False otherwise """ if not ligand_changeables or changeable.type != 'ROTATABLE': return False atom1 = ct.atom[changeable.parent] ligand_id = atom1.property.get('s_pa_ligand_id') if ligand_id is None: return False # Get the specific ligand changeable this atom is part of. ligand_changeable = None for tmp in ligand_changeables: if tmp.name == ligand_id: ligand_changeable = tmp break if ligand_changeable is None: raise RuntimeError( "Could not identify ligand_changeable corresponding to given changeable." ) # Check if the atom is available in all protonation states. # If so it will be its own changeable, a # rotatable_changeable. If not, the hydrogen position will # be sampled within the ligand changeable. identical_atoms = [] for ligand_st in ligand_changeable.protonation_states: for atom2 in ligand_st.atom: if atom2.atom_type != atom1.atom_type: continue d = max(abs(x1 - x2) for x1, x2 in zip(atom1.xyz, atom2.xyz)) if d > 0.01: continue identical_atoms.append(atom2) break if len(identical_atoms) == len(ligand_changeable.protonation_states): # Register the shared changeable to the ligand_changeable # for bookkeeping. ligand_changeable.shared_changeables.append(changeable) for atom in identical_atoms: atom.property['i_pa_ligand_shared_changeable_index'] = 1 return False return True
[docs]def update_rotatable_changeable_indices( ct, iatoms: List[int], changeables: List[ProtAssign.rotatable_changeable]): """ Update the atom indices of the shared changeables, in this case specifically for rotatables. :param ct: Structure :current_state_indices: Ligand atom indices of current state """ parent = 'parent' attr_names = (parent * 3, parent * 2, parent) for changeable in changeables: for attr in attr_names: iatom1 = getattr(changeable, attr) xyz = ct.atom[iatom1] for iatom2 in iatoms: d = ct.measure(iatom1, iatom2) if d < 0.02: setattr(changeable, attr, iatom2) # Update hydrogen which can have moved parent = changeable.parent for ba in ct.atom[parent].bonded_atoms: if ba.atomic_number == 1: changeable.hydrogen = ba.index
[docs]def filter_nonshared_rotatables( ct, changeables: List[ProtAssign.changeable] ) -> List[ProtAssign.changeable]: """ Given a list of changeables, filter rotatable_changeable instances if they are not shared across all ligand states, as these nonshared rotatables will be dealed with by the ligand_changeable itself. """ ligand_changeables = [ changeable for changeable in changeables if changeable.type == 'LIGAND' ] return [ changeable for changeable in changeables if not identify_nonshared_rotatable(ct, ligand_changeables, changeable) ]
[docs]def annotate_ligand_atoms(ligand_sts: List[Structure], ligand_id: str): """Annotate atoms of ligand structures with an ID and state ID""" for n, ligand_st in enumerate(ligand_sts): for atom in ligand_st.atom: atom.property['s_pa_ligand_id'] = ligand_id atom.property['i_pa_ligand_het'] = n
[docs]def update_ligand_rotatables_annotation(ct, iatoms: List[int]): """ Update atom properties of rotatable hydrogens in the ligand reindexing them """ n = 0 for iatom in iatoms: atom = ct.atom[iatom] if atom.property.get('i_pa_ligand_rotatable') is not None: atom.property['i_pa_ligand_rotatable'] = n n += 1
[docs]def add_ligand_sts(ct, ligand_sts: List[Structure]): """ Extend structure with ligand states Checks if a ligand state is already part of the structure to determine whether to add based on specific atom properties. """ # Check if certain ligand protonation states are already part of # the structure ligand_id = ligand_sts[0].atom[1].property['s_pa_ligand_id'] used_ligand_hets = set() for atom in ct.atom: if atom.property.get('s_pa_ligand_id') != ligand_id: continue ligand_het = atom.property.get('i_pa_ligand_het') if ligand_het is not None: used_ligand_hets.add(ligand_het) for n, ligand_st in enumerate(ligand_sts): if n not in used_ligand_hets: ct.extend(ligand_st)
[docs]def get_ligand_atom_indices(ct, ligand_id: str) -> List[int]: """Return atom indices annotated by ligand_id""" return analyze.evaluate_asl(ct, f"atom.s_pa_ligand_id '{ligand_id}'")
[docs]def further_annotate_ligand_atoms(ct, iatoms: List[int]): """ Further annotate ligand atoms notably hydrogen Assumes ligand atoms are annotated already with s_pa_ligand_id and i_pa_ligand_het """ for iatom in iatoms: atom = ct.atom[iatom] if atom.atomic_number == 1: continue atom.property['i_pa_atomindex'] = iatom # Annotate the hydrogens too, as they might have been added # since last annotating the ligand. for neighbor in atom.bonded_atoms: if neighbor.atomic_number != 1: continue neighbor.property.update({ 's_pa_ligand_id': atom.property['s_pa_ligand_id'], 'i_pa_ligand_het': atom.property['i_pa_ligand_het'], 'i_pa_atomindex': neighbor.index, })
[docs]def group_ligand_het_states(ct, iatoms: List[int]) -> Dict[int, List[int]]: """Return ligand atom indices grouped by het""" # Group atoms by their het-state iatoms_by_het = defaultdict(list) for iatom in iatoms: het = ct.atom[iatom].property['i_pa_ligand_het'] iatoms_by_het[het].append(iatom) return iatoms_by_het
[docs]def get_ligand_interactors(ct, iatoms: List[int]): """Get ligand interactors while excluding shared acceptors/donors""" sub_st = ct.extract(iatoms, copy_props=True) mapper = { atom.index: atom.property.get('i_pa_atomindex') for atom in sub_st.atom } interactors = identify_all_hbonders(sub_st) interactors.remap(mapper) # Filter out shared changeable donors as these interactions are accounted # for by that specific shared changeable. interactors.donors = [ (a1, a2) for a1, a2 in interactors.donors if 'i_pa_ligand_shared_changeable_index' not in ct.atom[a1].property ] # Alcohols can also accept donors, so add oxygen to acceptor list. for iheavy, ihydrogen in interactors.donors: if ct.atom[iheavy].atomic_number == 8: interactors.acceptors.append(iheavy) return interactors
[docs]def annotate_ligand_rotatable_hydrogens(ct, ligand_iatoms: List[int]): """ Annotate rotatable hydrogens of the ligand. These are either hydrogens that are shared with a rotatable_changeable or rotatable in a subset of ligand states """ for iatom in ligand_iatoms: atom = ct.atom[iatom] # Check if we are dealing with an alcohol/hydroxyl group. If so obtain # the hydrogen. if atom.atomic_number != 8: continue hydrogens = [ba for ba in atom.bonded_atoms if ba.atomic_number == 1] if len(hydrogens) != 1: continue # Now check whether the hydrogen is part of an independent # rotatable_changeable. hydrogen = hydrogens[0] changeable_index = atom.property.get( 'i_pa_ligand_shared_changeable_index') if changeable_index is None: try: dihedral_atoms = get_dihedral_atoms(ct, hydrogen) if len(dihedral_atoms) == 4: hydrogen.property['i_pa_ligand_rotatable'] = 1 except Exception: pass else: hydrogen.property[ 'i_pa_ligand_shared_changeable_index'] = changeable_index
[docs]def extract_epik_states(ct, include_initial=False) -> List[Structure]: """ Extract Epik embedded states Epik states are embedded into a structure property during the PPW pipeline. States are extracted and returned. """ ligand_sts = [] # Returns a list if states cannot be deserialised, and a dict # otherwise. het_states = prepare.deserialize_het_states(ct) if not het_states: return ligand_sts ct_copy = ct.copy() for hetnum, serialized_states in het_states.items(): if len(serialized_states) == 1 and not include_initial: continue for serialized_state in serialized_states: hetstate = prepare.HetState.initFromSerializedState( hetnum, serialized_state) hetstate.applyState(ct_copy) iatoms = hetstate.findHetAtoms(ct_copy) ligand_st = ct_copy.extract(iatoms) ligand_st.property['r_pa_ligand_penalty'] = hetstate.penalty or 0.0 report( LOG_DEBUG, f"Set ligand penalty to {ligand_st.property['r_pa_ligand_penalty']} for ligand {ligand_st.atom[1].pdbres}" ) ligand_sts.append(ligand_st) return ligand_sts
[docs]def identify_all_hbonders(ct): """Identify all acceptor, donors and clashers in a structure""" donors = [] donor_Hs = analyze.evaluate_asl(ct, "(atom.ele H AND not /C0-H0/)") donor_Hs += analyze.evaluate_asl( ct, "(res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \") AND (atom.ptype \" HD2\" or atom.ptype \" HE1\")" ) for H in donor_Hs: H_atom = ct.atom[H] if H_atom.bond_total == 1: for atom in H_atom.bonded_atoms: donors.append((int(atom), H)) acceptors = analyze.evaluate_asl(ct, "not atom.ele H AND not atom.ele C") # Move metal ions from acceptor list to donor list (ion is both heavy atom and "hydrogen"). atoms_to_remove = [] for acceptor in acceptors: atom = ct.atom[acceptor] if atom.formal_charge > 0 and atom.atomic_number not in [7, 8]: donors.append((acceptor, acceptor)) atoms_to_remove.append(acceptor) for atom in atoms_to_remove: acceptors.remove(atom) # Remove any atoms accounted for in donors from list of acceptors. for donor in donors: if donor[0] in acceptors: acceptors.remove(donor[0]) # All of the non-polar hydrogens clashers = analyze.evaluate_asl(ct, "atom.ele H AND /C0-H0/") return _Interactors(acceptors=acceptors, donors=donors, clashers=clashers)
[docs]def get_dihedral_atoms(ct: Structure, h: int) -> List[int]: """ Get atoms to define a dihedral angle given a hydrogen atom index Return list is smaller than 4 elements if dihedral cannot be determined. """ ret_list = [] # The first atom will be H itself: ret_list.append(h) # Now find the O or S attached to the H. Note that bonds (like everything # associated with structures) are indexed from "1" so we are actually # looking for the first bond: Xatom = ct.atom[h].bond[1].atom2 ret_list.append(int(Xatom)) # Now find a suitable non-H atom bonded to the X: C1atom = -1 for b in Xatom.bond: conn_atom = b.atom2 # This is probably the easiest way to find a non-H atom: if not conn_atom.atomic_number == 1: C1atom = int(conn_atom) ret_list.append(C1atom) # Leave the loop: break # Check to see that we did find a C1 atom: if C1atom < 0: return ret_list # Now look for the final atom we need: for b in ct.atom[C1atom].bond: conn_atom = b.atom2 if int(conn_atom) != ret_list[1]: # This is probably the easiest way to find a non-H atom: C2atom = int(conn_atom) ret_list.append(C2atom) # Leave the loop: break return ret_list
if __name__ == '__main__': if len(sys.argv) != 3: print("Please specify 2 arguments: input and output file names.") sys.stdout.flush() sys.exit(1) inputfile_name = sys.argv[1] outputfile_name = sys.argv[2] try: structure_ct = Structure.read(inputfile_name) except: print("ERROR: unable to open file " + inputfile_name) sys.stdout.flush() sys.exit(1) print("Structure extracted from file: " + inputfile_name) print("Output structure will be written to: " + outputfile_name) sys.stdout.flush() instance = ProtAssign( structure_ct, interactive=False, asl='', noprot_asl='', use_xtal=False, do_flips=True, freeze_existing=False, include_initial=False, max_comb=500000, num_sequential_cycles=30, max_cluster_size=None, add_labels=True, label_pkas=True, pH=pH_neutral, use_propka=True, propka_pH=7.0, user_states=[], minimize=False, ) structure_ct.write(outputfile_name) print("\nComplete. Output file: %s" % outputfile_name) sys.stdout.flush()