Source code for schrodinger.application.prepwizard2.prepare

"""
Shared functionality between PrepWizard GUI and command-line PrepWizard.

Copyright Schrodinger, LLC. All rights reserved.
"""
# Note: this file must not import any gui-dependent modules (QtGui, QtWidgets,
# maestro_ui) so the tasks can run without display dependency

import base64
import enum
import itertools
import json
import os
import sys
import zlib

import schrodinger
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.models import parameters
from schrodinger.protein import biounit
from schrodinger.protein import findhets
from schrodinger.protein import getpdb
from schrodinger.protein import annotation
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils.interactions import hbond
from schrodinger.ui.qt.appframework2 import application
from schrodinger.utils import mmutil

from . import tasks as pwtasks

maestro = schrodinger.get_maestro()


[docs]class HetType(enum.Enum): METAL_OR_ION = enum.auto() DETECTED_LIGAND = enum.auto() NON_WATER_SOLVENT = enum.auto() OTHER = enum.auto()
# List of PDB residue names to NOT run Epik on: EPIK_EXCLUSION_LIST = ['HEM', 'HEC'] STATE_PANELTY_PROP = 'r_epik_State_Penalty' METAL_PENALTY_PROP = 'r_epik_Metal_State_Penalty' ORIG_ANUM_PROP = 'i_ppw_anum' HET_NUM_PROP = 'i_ppw_het' HET_STATES_PROP = 's_ppw_het_states' # Default weight of each hydrogen bond in the state score: HBOND_ENERGY = -1.0 # kcal/mol/h-bond # Penalize het states where neutral amide is interacting with a metal. This # number must be large enough to overpower any favorable differences in Epik # state penalty that the neutral (or positive) charged state may have. METAL_AMIDE_CLASH_PENALTY = 20.0 # Consider NH...O as twice stronger than other H-bonds: # https://doi.org/10.1021/jp103470e p.9534: # "... Nevertheless, the hydrogen bond fit energies of all fit # functions showed the general expected order4 of: ENH ··· O ≈ # -6 kJ/mol > ENH ··· N ≈ ECH ··· O ≈ -3 kJ/mol. ..." HBOND_ENERGY_NHO = -2.0 # kcal/mol/h-bond RANK_NH_O_HIGHER = mmutil.feature_flag_is_enabled( mmutil.RANK_NH_O_HIGHER_IN_PPW) PDBNAME_ELEMENT_DICT = {} for pdbname in [' OD1', ' OE1', ' OD2', ' OE2']: PDBNAME_ELEMENT_DICT[pdbname] = "O" for pdbname in [' ND1', ' ND2', ' NE2']: PDBNAME_ELEMENT_DICT[pdbname] = "N" for pdbname in [' CG ', ' CD ', ' CD2', ' CE1']: PDBNAME_ELEMENT_DICT[pdbname] = "C"
[docs]def retrieve_and_read_pdb(pdbid): """ Retrieve the given PDB and return the Structure object. On error, raises RuntimeError. """ try: # First attempt to retrieve from third-party database, then # attempt download: pdb_file = getpdb.get_pdb(pdbid) if not os.path.isfile(pdb_file): raise Exception("getpdb module did not write out the file %s" % pdb_file) except Exception as err: msg = "Failed to retrieve or download PDB file for ID: %s" % pdbid raise RuntimeError(msg) # NOTE: For PDBReader, all_occ is already True by default. The # InfraReader that is used to read CIF files does not have this option. try: st = next(structure.StructureReader(pdb_file)) except Exception as err: raise RuntimeError(str(err)) return st
# Dictionary that defines possible ionization states for metals: # Key is the atomic number. METAL_AND_ION_CHARGES = { 11: [1], # NA Sodium +1 12: [2], # MG Magnesium +2 19: [1], # K Potassium +1 20: [2], # CA Calcium +2 25: [2, 3, 4, 5, 6, 7], # MN Manganese +2, +3, +4, +5, +6, +7 26: [2, 3], # FE Iron +2 +3 27: [2, 3], # CO Cobalt +2 +3 28: [2, 3], # NI Nickel +2 +3 29: [1, 2, 3], # CU Copper +1 +2 +3 30: [2], # ZN Zinc +2 # 48:[2], # CD Cadmium +2 No MacroModel parameters # 80:[1, 2] HG Mercury +1 +2 No MacroModel parameters } epik_exe = os.path.join(os.environ['SCHRODINGER'], 'epik') impref_exe = os.path.join(os.environ['SCHRODINGER'], 'utilities', 'impref') protassign_exe = os.path.join(os.environ['SCHRODINGER'], 'utilities', 'protassign') if sys.platform == 'win32': epik_exe += ".exe" impref_exe += ".exe" protassign_exe += ".exe" def _count_bonds(a): """ Count number of bonds to the given StructureAtom (double bonds = 2 bonds), zero-order bonds are not counted. """ total_orders = 0 for bond in a.bond: order = bond.order total_orders += order return total_orders
[docs]def serialize_het_states(states_by_het, complex_st): """ Set JSON string for all given states as a CT-level property. """ # Keys are het nums (as str, as required by JSON module), values are # lists of hets. Het number is included so that restoring is possible # even if some hets were later removed by the user (i_ppw_het # property would remain unchanged). states_by_het = { str(hetnum): [s.serialize() for s in states ] for hetnum, states in states_by_het.items() } json_str = json.dumps(states_by_het) compressed_str = zlib.compress(bytes(json_str, 'utf-8')) encoded_str = base64.b64encode(compressed_str).decode('utf-8') complex_st.property[HET_STATES_PROP] = encoded_str
[docs]def deserialize_het_states(complex_st): """ Attempt to read the het states from the CT using the s_ppw_het_states property, if present. """ encoded_str = complex_st.property.get(HET_STATES_PROP) if not encoded_str: # No states to load return [] compressed_str = base64.b64decode(encoded_str) json_str = zlib.decompress(compressed_str) states_dict = json.loads(json_str) # In JSON, all keys are str, to cast to int: all_hets_and_states = {int(hetnum): v for hetnum, v in states_dict.items()} return all_hets_and_states
[docs]class HetState: """ Class representing an ionization/tautomeric state of a het (e.g. ligand). """
[docs] def __init__(self, hetnum): self.hetnum = hetnum # Het number (value of i_ppw_het properties) self.charges = None # items=(original anum, formal charge) self.orders = None # items=(orig anum1, orig anum2, order) self.penalty = None # Epik state penalty; 0.0 for metals self.metal_penalties = None # keys=orig anum, values=metal penalty self.score = None # Score of the het state, in context of protein self.info_str = None # Information string explaining state details,
# including num hbonds to receptor.
[docs] @classmethod def initFromEpikOutput(cls, hetnum, state_st): """ Return a HetState instance for the given Epik output structure. """ self = cls(hetnum) self.charges, self.orders = self.extractStateChargesAndOrders(state_st) state_penalty, metal_penalties = get_state_penalties(state_st) self.penalty = state_penalty # Will be 0.0 for metals self.metal_penalties = metal_penalties # NOTE: score and info_str will be calculated when state is applied # to the complex CT return self
[docs] @classmethod def initFromSerializedState(cls, hetnum, serialized_state): """ Return a HetState instance for the given serialized state. """ self = cls(hetnum) (self.charges, self.orders, self.penalty, self.metal_penalties, self.score, self.info_str) = serialized_state # Convert atom number keys back to ints. This is needed because all # keys are loaded as strings when reading JSON: self.metal_penalties = { int(anum): penalty for anum, penalty in self.metal_penalties.items() } return self
[docs] def serialize(self): # JSON requires keys to be strings: return (self.charges, self.orders, self.penalty, self.metal_penalties, self.score, self.info_str)
[docs] def extractStateChargesAndOrders(self, state_st): hetnum = self.hetnum # Dict of charge information for each atom in the output state. # Keys are original/input atom indices: charge_data = [] for atom in state_st.atom: # Iterate over all atoms in this Epik output CT: anum = atom.property.get(ORIG_ANUM_PROP) # Skip atoms that were not in the original het. This includes the # protein atoms covalently bound to the het, and hydrogens that # were added by Epik. if atom.property.get(HET_NUM_PROP) != hetnum: continue # If we got here then we are processing a heavy atom that is part # of the het - so it must have i_ppw_anum set, as Epik doesn't # touch heavy atoms. assert anum, "ERROR: No i_ppw_anum property for atom" charge_data.append((anum, atom.formal_charge)) order_data = [] for bond in state_st.bond: atom1 = bond.atom1 atom2 = bond.atom2 if atom1.property.get(HET_NUM_PROP) != hetnum or atom2.property.get( HET_NUM_PROP) != hetnum: # Hydrogen or covalently bound protein atom: continue anum1 = atom1.property.get(ORIG_ANUM_PROP) anum2 = atom2.property.get(ORIG_ANUM_PROP) assert anum1 is not None and anum2 is not None order_data.append((anum1, anum2, bond.order)) return charge_data, order_data
[docs] def applyState(self, st): """ Apply this het state to the specified complex structure: modify the bond orders and formal charges to apply the currently selected het state. Also calculates the score and info_str properties Raises RuntimeError if any of the het atoms are no longer in the structure, or are not numbered correctly. """ # Delete hydrogen neighbors before adding them according to next state: het_atom_indices = self.findHetAtoms(st) hydrogens = hydrogen_neighbors(st, het_atom_indices) st.deleteAtoms(hydrogens) # Need to find het atoms again, as it's possible that removing # hydrogens has altered atom ordering: het_atom_indices = self.findHetAtoms(st) # Keys are original atom indices (prior to pre-processing), values are # atom objects, for hets that are still present in the structure. atoms_by_anum = {} for anum in het_atom_indices: atom = st.atom[anum] # Iterate over het heavy atoms in the Workspace CT: # This atom's atom number in input (unprepared) complex CT: anum = atom.property.get(ORIG_ANUM_PROP) assert anum is not None atoms_by_anum[anum] = atom expecting_anums = {anum for (anum, _) in self.charges} for anum1, anum2, _ in self.orders: expecting_anums.add(anum1) expecting_anums.add(anum2) missing_atoms = expecting_anums.difference(atoms_by_anum.keys()) if missing_atoms: raise RuntimeError( 'Could not apply het state (%i atoms missing or renumbered)' % len(missing_atoms)) # Change the bond orders and charges as necessary: for anum, charge in self.charges: atom = atoms_by_anum[anum] atom.formal_charge = charge if charge > 0: charge_str = '+%i' % charge elif charge < 0: charge_str = '%i' % charge else: charge_str = '' atom.label_user_text = str(charge_str) atom.label_format = "%UT" atom.label_color = 13 for anum1, anum2, order in self.orders: atom1 = atoms_by_anum[anum1] atom2 = atoms_by_anum[anum2] bond = st.getBond(atom1, atom2) bond.order = order num_atoms_pre_htreat = st.atom_total # Add hydrogens according to the new bond orders and formal charges: st.retype() build.add_hydrogens(st, atom_list=het_atom_indices) # Hide the new hydrogens (so that they are not flickered when selecting # state): for anum in range(num_atoms_pre_htreat + 1, st.atom_total): st.atom[anum].visible = False # Calculate the total score for this state. The score may have changed # since the states were generated by Epik due to user's changes - for # example removing an interacting metal or water molecule. self.score, self.info_str = calc_state_score(st, het_atom_indices, self.penalty, self.metal_penalties)
[docs] def findHetAtoms(self, complex_st): """ Return a list of het heavy atoms in the given complex structure. """ asl = "(atom.%s %i)" % (HET_NUM_PROP, self.hetnum) return analyze.evaluate_asl(complex_st, asl)
class _CommonProblemsFixer: """ Class for fixing common structure problems. """ def _hasZobs(self, atom): """ Return True if any of the bonds of this atom are zero-order. """ for bond in atom.bond: if bond.order == 0: return True return False def _setFormalCharge(self, atom, new_charge): """ Set the formal charge for the given atom. Logs the change in the self._corrections_list. """ prev_formal_charge = atom.formal_charge if prev_formal_charge == new_charge: return atom.formal_charge = new_charge msg = "Changed atom %i (%s) charge from %i to %i" % ( atom.index, atom.element, prev_formal_charge, new_charge) self._corrections_list.append(msg) def _deprotCarbAcid(self, carbon): """ De-protonate the specified carboxylic acid. :type carbon: `structure._StructureAtom` object :param carbon: The carbon of the carboxylic acid. """ # NOTE: This is no longer needed due to CONV-855/CONV-860 fix oxygen_double = None oxygen_single = None for bond in carbon.bond: atom2 = bond.atom2 if atom2.element == "O": if bond.order == 2: oxygen_double = atom2 else: oxygen_single = atom2 if not oxygen_double or not oxygen_single: # If this ever happens, then this is not a carboxylic acid, or the # residue has missing atoms. Skip this group. return self._setFormalCharge(oxygen_single, -1) # Delete the hydrogen bound to the oxygen (if any): for other_atom in oxygen_single.bonded_atoms: if other_atom.element == "H": self._atoms_to_delete.append(other_atom.index) correction = ( "Deleted hydrogen %i" % other_atom.index + " because it was interfering with a zero-order bond") self._corrections_list.append(correction) def _fixElement(self, atom): """ If an atom's element (Whether it's an oxygen, nitrogen, or carbon) does not correspond to its PDB name, it gets reset. (Ev:69207) """ expected_element = PDBNAME_ELEMENT_DICT.get(atom.pdbname) if expected_element: prev_element = atom.element if prev_element != expected_element: correction = "Changed atom %i (%s) element from %s to %s" % ( atom.index, atom.pdbname, prev_element, expected_element) atom.element = expected_element self._corrections_list.append(correction) def _processSulfur(self, atom): """ Ev:105338 Fix formal charges on sulfurs """ all_neighbors_are_carbons = True total_orders = 0 for bond in atom.bond: order = bond.order total_orders += order if bond.atom2.element != "C": all_neighbors_are_carbons = False if total_orders == 3 and all_neighbors_are_carbons: prev_formal_charge = atom.formal_charge if prev_formal_charge != 1: self._setFormalCharge(atom, +1) def _processBackboneOxygen(self, atom): """ PPREP-875 Remove extra bonds when residue oxygens are bound to covalent ligands """ # R-O=R will become R-O-R if _count_bonds(atom) >= 3 and atom.formal_charge <= 0: # This oxygen is making one too many bonds. Remove one. # We ignore R-O(+)=R, which is valid. if len(atom.bond) != 2: return # For now we handle only the case where one bond is single # and one bond is double. # Find the double bond and make it single: for bond in atom.bond: if bond.order >= 2: bond.order = 1 res = str(bond.atom1.getResidue()) correction = "Modified bond %i-%i (%s): Set order to 1" % \ (bond.atom1, bond.atom2, res) self._corrections_list.append(correction) break def run(self, st): """ Fix common structure problems in the given Structure. :rtype: list of strings :return: A list of texts describing what changes were made. """ self._atoms_to_delete = [] self._corrections_list = [] for atom in st.atom: self._fixElement(atom) if atom.element == "S": self._processSulfur(atom) # Ev:108985 elif atom.element == "B": if _count_bonds(atom) == 4 and atom.formal_charge == 0: self._setFormalCharge(atom, -1) # Ev:120900 Delete hydrogens with more than one non-ZOB bond (PDB error) elif atom.element == "H": num_bonds = 0 for bond in atom.bond: if bond.order > 0: num_bonds += 1 if num_bonds > 1: self._atoms_to_delete.append(atom.index) correction = "Deleted atom %i: This hydrogen had invalid number of bonds (%i)" % ( atom.index, num_bonds) self._corrections_list.append(correction) elif atom.pdbname == " O ": self._processBackboneOxygen(atom) elif atom.pdbname in (" OD1", " OD2"): # PPREP-1092 Find carboxylic acids with ZOBs # Use pdbnames instead of elements as an optimization (since # fewer atoms will have these names, while there are many Os). # NOTE: This is no longer needed due to CONV-855/CONV-860 fix if self._hasZobs(atom): for bond in atom.bond: if bond.order != 0 and bond.atom2.element == "C": self._deprotCarbAcid(bond.atom2) # PPREP-943 Fix formal charges on some irons: # (FIXME perhaps make this correction more broad?) elif atom.element == "Fe": if atom.pdbres == "SRM " and atom.pdbname == "FE ": self._setFormalCharge(atom, +2) if self._atoms_to_delete: st.deleteAtoms(self._atoms_to_delete) st.retype() return self._corrections_list
[docs]def fix_common_structure_mistakes(st): """ Fixes common problems in structures and returns a list of correction strings that are to be reported to the user. """ return _CommonProblemsFixer().run(st)
[docs]def atomsasl(atoms): """ Generates an ASL expression for the specified atoms. """ if len(atoms) == 0: raise Exception("ERROR: empty atom list passed to atomsasl()!") asl = "(atom.num %s)" % ', '.join(map(str, atoms)) return asl
def _create_bonds_by_smarts(st, bond_type, smarts1, smarts2, dist, verbose=False): """ General function to create bonds between the first atom in two different smarts patterns if they are closer then a distance. The smarts matching is performed on a per residue basis. :param st: Structure to modify. :type st: Schrodinger.structure :param bond_type: Bond type to be used for reporting :type bond_type: String :param smarts1: The first smarts pattern to consider, by convention this should be the protein if that applies :type bond_type: String :param smarts2: The second smarts pattern to consider, by convention this should be the non-protein part if that applies :type bond_type: String :param dist: Atoms must be at least this close to consider for for glycosilation :type dist: float :param verbose: Whether to print formed bonds to stdout :type verbose: boolean :rparam: Pairs of atom ( in the output structure ) where bonds were added :rtype: list of schrodinger atom objects """ output = [] attach_list = [[], []] for res in st.residue: res_anums = res.getAtomIndices() res_ct = res.extractStructure() for smarts, attach in zip([smarts1, smarts2], attach_list): match = analyze.evaluate_smarts_canvas(res_ct, smarts) # Map residue atom ids back to main structure for a_match in match: anum_by_st = res_anums[a_match[0] - 1] attach.append(anum_by_st) smarts1_attach, smarts2_attach = attach_list if not smarts1_attach or not smarts2_attach: return output atoms_to_delete = [] atoms_to_add_hyd = [] used_atoms = set() atoms = smarts1_attach + smarts2_attach for iatom1, iatom2 in measure.get_close_atoms(st, dist, atoms=atoms): atom1 = st.atom[iatom1] atom2 = st.atom[iatom2] # Don't reuse atoms if atom1.index in used_atoms or atom2.index in used_atoms: continue # Identify close atoms that are not bound if st.areBound(atom1, atom2): continue # Check both ways for s1_atom, s2_atom in itertools.permutations([atom1, atom2]): if s1_atom.index not in smarts1_attach: continue if s2_atom.index not in smarts2_attach: continue st.addBond(s1_atom, s2_atom, 1) output.append((s1_atom, s2_atom)) used_atoms.add(s1_atom.index) used_atoms.add(s2_atom.index) # Keep track of the hydrogens to delete on each side # and which atoms to add hydrogens to. This will all be # done at the end to avoid messing up indexes for atom in s1_atom, s2_atom: for ba in atom.bonded_atoms: if ba.element == "H": atoms_to_delete.append(ba.index) atoms_to_add_hyd.append(atom.index) if verbose: print(' Created a %s bond between %s and %s' % (bond_type, "%s:%d%s(%s):%s" % (s1_atom.chain, s1_atom.resnum, s1_atom.inscode, s1_atom.pdbres, s1_atom.pdbname), "%s:%d%s(%s):%s" % (s2_atom.chain, s2_atom.resnum, s2_atom.inscode, s2_atom.pdbres, s2_atom.pdbname))) # Note that since we are using structure Atom objects for output, # the indexes autoamtically update as the underlying CT is changed if len(atoms_to_delete) > 0: rmap = st.deleteAtoms(atoms_to_delete, renumber_map=True) atoms_to_add_hyd = [rmap[i] for i in atoms_to_add_hyd] if len(atoms_to_add_hyd) > 0: build.add_hydrogens(st, atom_list=atoms_to_add_hyd) return output
[docs]def create_glycosylation_bonds(st, dist=1.8, verbose=True): """ Create glycosylation bonds for N-linked or O-linked glycosilation events Identfies neutral O or N with implicit or explicit hydrogens and forms bonds to sugars ( ring with 5 aliphatic carbons and one oxygen ) at locations adjacent to the oxygen. :param st: Structure to modify. :type st: Schrodinger.structure :param dist: Atoms must be at least this close to consider for for glycosilation :type dist: float :param verbose: Whether to print formed bonds to stdout :type verbose: boolean :rparam: Pairs of atom ( in the output structure ) where bonds were added :rtype: list of schrodinger atom objects """ # ( note that only NH1 is checked for for ARG as NH2 is positively # charged. return _create_bonds_by_smarts( st, "glycosylation", "[N,O;+0;h,H1,H2]", "[C;r6;H2,H1,h][O;r6][C;r6][C;r6][C;r6][C;r6]", dist, verbose=verbose)
[docs]def create_palmitoylation_bonds(st, dist=1.8, verbose=True): """ Create palmitoylation bonds. Identfies neutral cysteine S with implicit or explicit hydrogens and palmitoyl groups or palmitoyl groups with the OH of the acid replace by a hydrogen :param st: Structure to modify. :type st: Schrodinger.structure :param dist: Atoms must be at least this close to consider for for pamitoylation :type dist: float :param verbose: Whether to print formed bonds to stdout :type verbose: boolean :rparam: Pairs of atom ( in the output structure ) where bonds were added :rtype: list of schrodinger atom objects """ return _create_bonds_by_smarts(st, "palmitoylation", "[S,O;+0;h,H1,H2]", "[C;+0;h,H1](=[O;+0])", dist, verbose=verbose)
[docs]def create_disulfide_bonds(st, dist=3.2, verbose=False): """ Create bonds between proximal Sulfurs, deleting any hydrogens on them. If verbose is True, prints log info to the termnal. Returns a list of (atom1, atom2) for ever added bond. """ cys_sulfurs = analyze.evaluate_asl( st, '((res.ptype "CYS ") OR (res.ptype "CYX ")) AND ((atom.ptype " SG "))') added_bonds = [] # Ev:92646 atoms_to_delete = [] for isulfur_index in cys_sulfurs: for jsulfur_index in cys_sulfurs: if jsulfur_index >= isulfur_index: continue isulfur = st.atom[isulfur_index] jsulfur = st.atom[jsulfur_index] # Ev:129699 Skip this pair if at least one is already bound to # something (perhaps from a previous iteration of this loop) pair_hydrogens = [] skip = False for a in [isulfur, jsulfur]: num_heavy = 0 for bond in a.bond: if bond.order > 0: # Ev:131161 Ignore zero-order bonds n = bond.atom2 if n.element == 'H': pair_hydrogens.append(n.index) else: num_heavy += 1 if num_heavy > 1: skip = True if skip: continue if st.measure(isulfur, jsulfur) <= dist: isulfur.addBond(jsulfur, 1) added_bonds.append((isulfur.index, jsulfur.index)) isulfur.formal_charge = 0 jsulfur.formal_charge = 0 atoms_to_delete += pair_hydrogens ires = isulfur.getResidue() jres = jsulfur.getResidue() # Ev:88748 Change the resname of the combined residue to CYX: if ires.pdbres != "CYX " or jres.pdbres != "CYX ": ires.pdbres = "CYX " jres.pdbres = "CYX " if verbose: print( ' Created a bond between %s and %s; changed residue names to "CYX "' % (ires, jres)) else: if verbose: print(' Created a bond between %s and %s' % (ires, jres)) # Must delete atoms at the end, in order to preserve atom numbers: st.deleteAtoms(atoms_to_delete) st.retype() return added_bonds
[docs]def convert_selenomethionines(st): """ Convert MSE residues to METs. Returns a list of residue strings that were converted. """ converted_residues = [] for res in st.residue: if res.pdbres == 'MSE ': # Convert this Selenomethionine to a Methionine: res_str = str(res) converted_residues.append(res_str) for atom in res.atom: if atom.pdbname == 'SE ': atom.element = 'S' atom.pdbname = ' SD ' atom.color = 'yellow' res.pdbres = 'MET' st.retype() return converted_residues
def _set_oxygen_orders(st, mainatom, double, single, charge): """ Sets the orders of phosphate-oxygen or sulfate-oxygen bonds. mainatom - atom number of the phosphate or sulfur double - list of atom numbers of the oxygens to double-bond to single - list of atom numbers of oxygens to single-bond to, and set charge to <charge>. """ st = st.copy() for anum in double: bond = st.getBond(mainatom, anum) bond.order = 2 st.atom[anum].formal_charge = 0 for anum in single: bond = st.getBond(mainatom, anum) bond.order = 1 st.atom[anum].formal_charge = charge return st def _generate_phosphate_states(st, atoms): """ Returns a list of structures (expanded states) """ # TODO move this code into the GenerateStatesTask class phosphate = atoms[0] double_oxygen = atoms[1] # Make a list of negative hydrogens and hydrogens with hydrogens: negative_oxygens = [] hydrogen_oxygens = [] # Iterate over all oxygens with single bond to Phosphate: for anum in atoms[2:]: if st.atom[anum].bond_total == 1: if st.atom[anum].formal_charge == -1: negative_oxygens.append(anum) else: hydrogen_oxygens.append(anum) # FIXME For now, do not deal with this Phosphate if it has any hydrogen # oxygens: if hydrogen_oxygens: return [st] if not negative_oxygens: # No states can be generated from this Phosphate return [st] # Alternate double bond between the oxygens: if len(negative_oxygens) == 1: st2 = _set_oxygen_orders(st, phosphate, [negative_oxygens[0]], [double_oxygen], -1) return [st, st2] elif len(negative_oxygens) == 2: st2 = _set_oxygen_orders(st, phosphate, [negative_oxygens[0]], [double_oxygen, negative_oxygens[1]], -1) st3 = _set_oxygen_orders(st, phosphate, [negative_oxygens[1]], [double_oxygen, negative_oxygens[0]], -1) return [st, st2, st3] else: print('ERROR: wrong number of negative oxygens:', len(negative_oxygens)) return [st] def _generate_sulfate_states(st, atoms): """ Returns a list of structures (expanded states) """ # TODO move this code into the GenerateStatesTask class sulfur = atoms[0] double_oxygen1 = atoms[1] double_oxygen2 = atoms[2] # Make a list of negative hydrogens and hydrogens with hydrogens: negative_oxygens = [] hydrogen_oxygens = [] # Iterate over all oxygens with single bond to Sulfur: for anum in atoms[3:]: if st.atom[anum].bond_total == 1: if st.atom[anum].formal_charge == -1: negative_oxygens.append(anum) else: hydrogen_oxygens.append(anum) # Alternate double bond between the oxygens: if len(negative_oxygens) == 1 and len(hydrogen_oxygens) == 0: # st1 is equivalent of: [double_oxygen1, double_oxygen2], negative_oxygens[0] st2 = _set_oxygen_orders(st, sulfur, [double_oxygen1, negative_oxygens[0]], [double_oxygen2], -1) st3 = _set_oxygen_orders(st, sulfur, [double_oxygen2, negative_oxygens[0]], [double_oxygen1], -1) return [st, st2, st3] elif len(hydrogen_oxygens) == 1 and len(negative_oxygens) == 0: # st1 is equivalent of: [double_oxygen1, double_oxygen2], hydrogen_oxygens[0] st2 = _set_oxygen_orders(st, sulfur, [double_oxygen1, hydrogen_oxygens[0]], [double_oxygen2], 0) st3 = _set_oxygen_orders(st, sulfur, [double_oxygen2, hydrogen_oxygens[0]], [double_oxygen1], 0) return [st, st2, st3] else: # No states can be generated from this Sulfate return [st]
[docs]def count_phosphates_and_sulfurs(st): # Find any phosphates: p_smarts = "P(=O)(O)(O)O" s_smarts = "S(=O)(=O)(O)O" p_matches = analyze.evaluate_smarts_canvas(st, p_smarts) s_matches = analyze.evaluate_smarts_canvas(st, s_smarts) main_atoms = set() for atoms in p_matches + s_matches: main_atoms.add(atoms[0]) return len(main_atoms)
[docs]def extend_phosphate_states(st): """ For specified structure, generates phosphate states, and returns list of output structures. Ev:78688 NOTE: Output structure has no hydrogens. """ # TODO move this code into the GenerateStatesTask class hydrogens = [] for a in st.atom: if a.atomic_number == 1: hydrogens.append(a) st.deleteAtoms(hydrogens) # Find any phosphates: p_smarts = "P(=O)(O)(O)O" matches = analyze.evaluate_smarts_canvas(st, p_smarts) # Remove duplicates: matches_no_dups = [] sorted_matches = [] for match in matches: sorted_match = sorted(match) if sorted_match not in sorted_matches: sorted_matches.append(sorted_match) matches_no_dups.append(match) sts = [st] for atoms in matches_no_dups: all_extended_sts = [] for st in sts: extended_sts = _generate_phosphate_states(st, atoms) all_extended_sts.extend(extended_sts) sts = all_extended_sts return sts
[docs]def extend_sulfate_states(st): """ For specified structure, generates sulfate states, and returns list of output structures. Ev:82634 """ # TODO move this code into the GenerateStatesTask class hydrogens = [] for a in st.atom: if a.atomic_number == 1: hydrogens.append(a) st.deleteAtoms(hydrogens) # Find any sulfates in the structure: s_smarts = "S(=O)(=O)(O)O" matches = analyze.evaluate_smarts_canvas(st, s_smarts) # Remove duplicates: matches_no_dups = [] sorted_matches = [] for match in matches: sorted_match = sorted(match) if sorted_match not in sorted_matches: sorted_matches.append(sorted_match) matches_no_dups.append(match) sts = [st] for i, atoms in enumerate(matches_no_dups): # For this sulfate, extend the states: all_extended_sts = [] for j, st in enumerate(sts): extended_sts = _generate_sulfate_states(st, atoms) all_extended_sts.extend(extended_sts) sts = all_extended_sts # Add hydrogens, because hydrogens were added to some atoms: for st in sts: build.add_hydrogens(st) return sts
[docs]def get_chain_sequences(st, remove_tails=True): """ Will read the PDB sequences from the sequence block, and will return a dictionary (keys: chain names; values: sequence strings). If remove_tails is True, will chop off tails that are not existent in the CT, but will leave in the missing loop sections. Will raise RuntimeError on an error, or mmerror on mmct failure. """ st = st.copy() # Make a list of residues from the chain sequence that are in the CT: used_chain_indexes = {} # key: chain; value: list of ints for chain in st.chain: chain_used_indexes = [] for res in chain.residue: index = res.atom[1].property.get('i_pdb_seqres_index') if index is not None: chain_used_indexes.append(index) used_chain_indexes[chain.name] = chain_used_indexes # Get the PDB sequence strings for each chain: try: data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st) except mm.MmException: data_handle = mm.mmct_ct_get_or_open_additional_data(st, True) chain_seq_dict = {} # Ev:124587 Do not attempt to read the sequence if it's not present: # will be 1 in sequence data exists: num_seqres_blocks = mm.m2io_get_number_blocks(data_handle, "m_PDB_SEQRES") if num_seqres_blocks == 0: raise RuntimeError("Input structure had no sequence block data") mm.m2io_goto_block(data_handle, "m_PDB_SEQRES", 1) num_rows = mm.m2io_get_index_dimension(data_handle) for i in range(1, num_rows + 1): chain_strings = mm.m2io_get_string_indexed(data_handle, i, ["s_pdb_chain_id"]) seqres_strings = mm.m2io_get_string_indexed(data_handle, i, ["s_pdb_SEQRES"]) chain = chain_strings[0] seqres = seqres_strings[0] chain_seq_dict[chain] = seqres chain_strings = list(chain_seq_dict) used_chains = list(used_chain_indexes) for chain in used_chains: if chain not in chain_strings: msg = 'WARNING: Chain "%s" did not have a sequence record' % chain print(msg) # Modify the sequence strings by chopping off tails: output_sequences = {} # key: chain name; value: sequence string mm.mmseq_initialize(mm.error_handler) for chain, seqres in chain_seq_dict.items(): if chain not in used_chains: # Ev:102107 Possibly the user has deleted this chain continue # Figure out what residues in this sequence are missing in the CT: used_indexes = used_chain_indexes[chain] # used_indexes is 1-indexed # list of tuples: index=res index; value: (one letter code, whether it # is used) sequence_list = [] numres = len(seqres) // 4 for i in range(numres): beg = i * 4 end = i * 4 + 4 resname = seqres[beg:end] onelett = mm.mmseq_get_one_letter_res_name(resname) # Ev:86135 i is zero-indexed; while the list is 1-indexed: used = bool(i + 1 in used_indexes) sequence_list.append((onelett, used)) in_sequence = "" for onelett, used in sequence_list: in_sequence += onelett if remove_tails: # Chop off the leading tail: while sequence_list: onelett, used = sequence_list[0] if used: break else: sequence_list.pop(0) # Chop off the trailing tail: while sequence_list: onelett, used = sequence_list[-1] if used: break else: sequence_list.pop(-1) if not sequence_list: raise RuntimeError("All of sequence for chain %s was missing" % chain) # Convert to a sequence string (excluding tails): out_sequence = "" for onelett, used in sequence_list: out_sequence += onelett output_sequences[chain] = out_sequence mm.mmseq_terminate() return output_sequences
[docs]def write_sequences_to_fasta(pdbid, sequences_dict, fastafile): pdbid = pdbid.upper() fh = open(fastafile, 'w') for chain, sequence in sequences_dict.items(): numres = len(sequence) line1 = ">%s_%s, %i bases,\n" % (pdbid, chain, numres) line2 = sequence + '\n' fh.write(line1) fh.write(line2) fh.close()
[docs]def does_res_have_missing_side_chains(residue): """ Given a _Residue object, returns True if the residue is missing side-chain atoms. If at least one backbone atom is (also) missing, False is returned. Basically only residues for which Prime missing-side-chains job can be run will return True. """ # Ev:96128 Use a new mechanism to determine which residues have missing # atoms. This method does NOT rely on the i_m_pdb_convert_problem # property, which may be out-of-date: # DPB names for the residue backbone atoms: backbone_names = {" C ", " CA ", " N ", " O "} # Do not show DNA/RNA residues, because we can't fill missing side-chains # on them: pdbres = residue.pdbres if pdbres in [ ' A ', ' C ', ' G ', ' U ', ' DA ', ' DC ', ' DG ', ' TD ' ]: return False if not residue.hasMissingAtoms(): return False heavy_elements = [] heavy_atom_names = set() for atom in residue.atom: if atom.atomic_number > 1: heavy_elements.append(atom.element) heavy_atom_names.add(atom.pdbname) # Ev:116225 Ignore residues that have been modified to -NH2 caps: if heavy_elements == ["N"]: return False # Ev:126307 Ignore residues that have been modified to -NH-CH3 caps: if sorted(heavy_elements) == ["C", "N"]: return False # PPREP-794 Ignore residues that are missing any backbone atoms: # (Note that this makes the two tests above unnecessary) if not heavy_atom_names.issuperset(backbone_names): return False # If got here, the reisude has missing side-chain atoms: return True
[docs]def do_any_residues_have_missing_side_chains(st): """ Returns True if at least one of the residue in the given structure has missing side-chain atoms (backbone atoms are ignored). """ for res in st.residue: if does_res_have_missing_side_chains(res): return True return False
[docs]def fix_sulfur_charges(st): """ Post process by fixing the charge on zero-order-bonded Sulfurs Gives -1 or -2 charge to Sulfurs as appropriate. Deletes a hydrogen from Sulfurs coordinating with metals (Ev:61622) """ hydrogens_to_delete = [] for sulfur in st.atom: if sulfur.element != 'S': continue bonded_to_iron = False # Bonds for non-metal neighbors to this Sulfur (excluding hydrogens) reg_bonds = 0 zob_bonds = 0 # How many bonds to metals hydrogens = [] # Hydrogen atoms bonded to this Sulfur for bond in sulfur.bond: bo = bond.order n = bond.atom2 if n.element == 'Fe': # Set charge of Iron in FeS clusters to +2 n.formal_charge = +2 bonded_to_iron = True if bo == 0: zob_bonds += 1 elif n.element == 'H': hydrogens.append(n) else: # regular bond to non-hydrogen atom reg_bonds += bo # Number of hydrogens depends on number of regular bonds + number of zobs: if reg_bonds + zob_bonds == 0: max_hydrogens = 2 elif reg_bonds + zob_bonds == 1: max_hydrogens = 1 elif reg_bonds + zob_bonds == 2: max_hydrogens = 0 else: # This is an unexpected hybridization state. Do not delete any # hydrogens (PPREP-694). Unless this sulfur is bond to an iron, # in which case we will still delete all hydrogens from it, and will # adjust the formal charge (PPREP-944). if bonded_to_iron: max_hydrogens = 0 else: # Unknown hybridization state and NOT bound to an iron; skip: continue # Delete all hydrogens that are above the number allowed: while len(hydrogens) > max_hydrogens: hydrogens_to_delete.append(hydrogens.pop(0)) kept_hydrogens = len(hydrogens) # The charge depends on the number of regular bonds (zobs excluded): if kept_hydrogens + reg_bonds == 0: sulfur.formal_charge = -2 elif kept_hydrogens + reg_bonds == 1: sulfur.formal_charge = -1 elif kept_hydrogens + reg_bonds == 2: sulfur.formal_charge = 0 if hydrogens_to_delete: st.deleteAtoms(hydrogens_to_delete) st.retype() # Correct the MacroModel atom types.
[docs]def extract_het_from_complex(complex_st, het_atoms): """ Extract the het defined by the given ASL from the complex structure, and include receptor atoms withing 2 bonds (for covalently bound ligands). :param complex_st: Receptor-ligand complex structure :type complex_st: structure.Structure :param het_atoms: List of atom indices for the het :type het_atoms: list(int) :return: Het structure, Het structure plus some receptor atoms (covalent ligands only), and het type. :rtype: (structure.Structure, structure.Structure, HetType) """ # Get Structure for het atoms: het_asl = atomsasl(het_atoms) het_atoms = analyze.evaluate_asl(complex_st, het_asl) hetnum = complex_st.atom[het_atoms[0]].property[HET_NUM_PROP] het_only_st = complex_st.extract(het_atoms) # Get Structure for het atoms and nearby atoms bonded to it: near_het_asl = "(withinbonds 2 (%s))" % het_asl near_heavyatoms = analyze.evaluate_asl(complex_st, near_het_asl) near_heavyatoms_st = complex_st.extract(near_heavyatoms) near_heavyatoms_st.property['i_ppw_hetnum'] = hetnum # For records # Add hydrogens to the near-het atoms, so that Epik does not # produce errors regarding their lewis structure: extra_atoms_asl = "not (%s) and (not atom.ele H)" % het_asl extra_atoms_list = analyze.evaluate_asl(near_heavyatoms_st, extra_atoms_asl) build.add_hydrogens(near_heavyatoms_st, atom_list=extra_atoms_list) if het_only_st.atom_total == 1 and het_only_st.atom[ 1].atomic_number in METAL_AND_ION_CHARGES: het_type = HetType.METAL_OR_ION elif analyze.evaluate_asl(het_only_st, 'ligand'): het_type = HetType.DETECTED_LIGAND elif analyze.evaluate_asl(het_only_st, 'solvent'): het_type = HetType.NON_WATER_SOLVENT else: het_type = HetType.OTHER return het_only_st, near_heavyatoms_st, het_type
[docs]def extract_hets_from_complex(complex_st, het_atoms_lists): """ Detect hets in the given complex structure, and extract them into separate substructures. For covalent ligands, receptor atoms within 2 bonds are also retained, so that Epik can account for them. :param complex_st: Receptor-ligand complex structure :type complex_st: structure.Structure :param het_atoms_lists: List of atoms for all hets :type het_atoms_lists: list(list(int)) :return: Yields tuples of: Het structure, Het structure plus some receptor atoms (covalent ligands only), and het type. :rtype: generator(structure.Structure, structure.Structure, HetType) """ # Delete any zero-order-bonds (Ev:79718). Needed in order for Epik # to generate states correctly for zero-order-bonded ligands. This # deletion # is only done to the temporary copy of the complex structure. complex_st = complex_st.copy() for atom in complex_st.atom: num_bonds = len(atom.bond) # Reverse order so that bond deletion does not affect the loop: for i in range(num_bonds, 0, -1): bond = atom.bond[i] if bond.order == 0: complex_st.deleteBond(atom, bond.atom2) for het_atoms in het_atoms_lists: het_st, state_st, het_type = extract_het_from_complex( complex_st, het_atoms) yield het_st, state_st, het_type
[docs]def prepare_for_epik(complex_st, types_to_process): """ Extract hets from the given complex structure, (except het types that user requested to skip), and separate retained hets into 2 lists: Those that Epik should be run on, those that Epik doesn't produce any states for. Atoms in the original structure will be marked with i_ppw_anum property, for matching Epik output to atoms in the input structure. :param complex_st: Receptor-ligand complex structure :type complex_st: structure.Structure :param types_to_process: List of het types should be processed :type types_to_process: [HetType] :return: List of structures to run Epik on, and list of structures that should be processed, but without Epik. :rtype: [structure.Structure], [structure.Structure] """ # NOTE: Whether states are skipped here or not had no affect on # whether they are shown in the GUI's Substructures tab - it will # always show all hets, even if they have no states generated. # Set the hetero atom number property on the hetero atoms in the stucture, # so Epik results can be matched to the atoms in the complex structure. het_atoms_lists = findhets.find_hets(complex_st) if not het_atoms_lists: return [], [] print("Processing %i het groups..." % len(het_atoms_lists)) # Annotate the structure with properties, so that Epik results can # be mapped back to the input structure: for atom in complex_st.atom: atom.property[ORIG_ANUM_PROP] = atom.index for hetnum, het_atoms in enumerate(het_atoms_lists, start=1): # This is required by prepare_for_epik() function now: for atomnum in het_atoms: complex_st.atom[atomnum].property[HET_NUM_PROP] = hetnum sts_to_run_epik_on = [] sts_to_not_run_epik_on = [] for het_st, state_st, het_type in extract_hets_from_complex( complex_st, het_atoms_lists): # NOTE: For non-covalent ligands, het_st is same as state_st. # Hets that don't match user's criteria are skipped completely if not het_type in types_to_process: continue # Epik will be run on some groups, but is not run on groups that # have valence violations, metals, ions, or excluded groups. skip_epik = (het_type == HetType.METAL_OR_ION or any( a.pdbres.strip() in EPIK_EXCLUSION_LIST for a in state_st.atom)) if skip_epik: sts_to_not_run_epik_on.append(state_st) else: sts_to_run_epik_on.append(state_st) return sts_to_run_epik_on, sts_to_not_run_epik_on
[docs]def filter_undesired_states(orig_st, state_sts): """ Returns a subset of state structures, which excludes metal-binding states for hets that are not within 5A of a metal. :param orig_st: Original complex structure (receptor, ligands, metals) :type orig_st: `structure.Structure` :param state_st: Epik output states to filter. :type state_st: Iterable of `structure.Structure` :return: List of filtered structures. :rtype: List of `structure.Structure` """ filtered_states = [] # For each het, whether it's within 5A of a metal: is_near_metal_dict = {} for state_st in state_sts: # Determine whether this is a metal-only state that should be skipped: # NOTE: If this property is not present, then Epik failed to generate # states for this structure - which means the output state is # effectively NOT "Metal only", and should be kept. if state_st.property.get('b_epik_Metal_Only'): hetnum = state_st.property['i_ppw_hetnum'] if hetnum not in is_near_metal_dict: asl = "(atom.%s %i) AND (within 5 metals)" % (HET_NUM_PROP, hetnum) matches = analyze.evaluate_asl(orig_st, asl) is_near_metal_dict[hetnum] = bool(matches) if not is_near_metal_dict[hetnum]: continue filtered_states.append(state_st) return filtered_states
[docs]def find_ppw_atom(st, anum): """ Find the atom in the given structure whose i_ppw_anum property is set to the given value. ValueError exception is raised if such atom is not found. """ matches = analyze.evaluate_asl(st, "(atom.%s %i)" % (ORIG_ANUM_PROP, anum)) if not matches: raise ValueError("No atom with i_ppw_anum=%s was found" % anum) return st.atom[matches[0]]
[docs]def apply_state(complex_st, state_st): """ Modify the het in complex_st complex (protein/ligand) structure such that its ionization state matches the output that we got from Epik (state_st). :param complex_st: Original complex structure (receptor + het) :type complex_st: `structure.Structure` :param state_st: Epik output for the het group. May include some atoms from the receptor if the ligand is covalently-bound. :type state_st: `structure.Structure` :return: List of atom indices in complex_st that are part of the het. :rtype: (dict, list) """ heavy_atoms = [] for state_atom in state_st.atom: if state_atom.element == 'H': continue # Skip hydrogens state_anum = state_atom.property.get(ORIG_ANUM_PROP) if state_anum is None: # Skip protein atoms for covalently bound ligands continue complex_atom = find_ppw_atom(complex_st, state_anum) ahet = state_atom.property.get(HET_NUM_PROP) heavy_atoms.append(complex_atom.index) # Iterate over all bonds that this atom makes, and adjust them to # match Epik output: for bond in state_atom.bond: state_n_atom = bond.atom2 if state_n_atom.element == 'H': continue # Skip hydrogens neighbor_anum = state_n_atom.property.get(ORIG_ANUM_PROP) if neighbor_anum is None: # Skip protein atoms for covalently bound ligands continue nhet = state_n_atom.property.get(HET_NUM_PROP) # Skip this bond if the neighbor is NOT from the same het (it's # part of the protein to which the ligand is covalently bound - we # include some of the protein atoms in the Epik input). if nhet != ahet: continue # Modify the original full receptor structure: # Ev:96171 Determine which 2 atoms in <complex_st> correspond to this bond: neighbor_atom = find_ppw_atom(complex_st, neighbor_anum) complex_st.getBond(complex_atom, neighbor_atom).order = bond.order # Skip the formal charge if the atom is NOT part of the het (it's # part of the receptor to which the ligand is covalently bound - we # include some of the receptor atoms in the Epik input). if ahet is not None: complex_atom.formal_charge = state_atom.formal_charge # Need to re-type after changing bond orders and formal charges: complex_st.retype() # Delete and re-add het hydrogens in the complex structure: hydrogens_to_delete = hydrogen_neighbors(complex_st, heavy_atoms) renum_map = complex_st.deleteAtoms(hydrogens_to_delete, renumber_map=True) heavy_atoms = [renum_map[anum] for anum in heavy_atoms] natoms = complex_st.atom_total build.add_hydrogens(complex_st, atom_list=heavy_atoms) het_atoms = heavy_atoms + list(range(natoms + 1, complex_st.atom_total + 1)) return het_atoms
[docs]def apply_state_and_calc_score(complex_st, state_st): """ Apply the state <state_st> to <complex_st>, and return the score for the state in the context of the protein complex. :param complex_st: Original complex structure (receptor + het) :type complex_st: `structure.Structure` :param state_st: Epik output for the het group. May include some atoms from the receptor if the ligand is covalently-bound. :type state_st: `structure.Structure` :return: Tuple of state score, Epik penalty, and information string. Epik penalty will be None for metal states. :rtype: (float, float/None, str) """ # TODO Move this to be an instance of HetState in the future het_atoms = apply_state(complex_st, state_st) state_penalty, metal_penalties = get_state_penalties(state_st) state_score, info_str = calc_state_score(complex_st, het_atoms, state_penalty, metal_penalties) return state_score, state_penalty, info_str
[docs]def get_state_penalties(state_st): """ Return the Epik state penalty for this Epik output structure, as well as metal penalties for each atom that has the r_epik_Metal_State_Penalty property set. """ # Metal states don't go through Epik, so don't have a penalty. # Setting it to zero here will effectively prevent sorting of # states for metal hets. state_penalty = state_st.property.get(STATE_PANELTY_PROP, 0.0) metal_penalties = {} for state_atom in state_st.atom: state_anum = state_atom.property.get(ORIG_ANUM_PROP) if state_anum is None: # This as an added hydrogen continue metal_penalty = state_atom.property.get(METAL_PENALTY_PROP) if metal_penalty is not None: metal_penalties[state_anum] = metal_penalty return state_penalty, metal_penalties
[docs]def get_smallest_metal_penalty(complex_st, metal_penalties): """ Return the lowest metal penalty of all het atoms, considering only atoms that are within 3A of a receptor metal. """ all_metals = analyze.evaluate_asl(complex_st, 'metals') smallest_metal_penalty = None for anum, metal_penalty in metal_penalties.items(): try: complex_atom = find_ppw_atom(complex_st, anum) except ValueError: # This hydrogen was added by Epik and is not in the input structure. continue # Check if any metal within 3A exists AND this atom has a lower # metal penalty than previously seen atoms close to metals. if smallest_metal_penalty is None or metal_penalty < \ smallest_metal_penalty: for metal_anum in all_metals: if complex_st.measure(metal_anum, complex_atom) <= 3: # 3A smallest_metal_penalty = metal_penalty break return smallest_metal_penalty
[docs]def calc_hbond_energy(hydrogen, acceptor): """ Calculate H-bond energy. Overly simple, to address PLDB-3337 and similar. """ if RANK_NH_O_HIGHER and hydrogen.element == 'H': donor = next(hydrogen.bonded_atoms) if donor.element == 'N' and acceptor.element == 'O': return HBOND_ENERGY_NHO return HBOND_ENERGY
[docs]def calc_state_score(complex_st, het_atoms, state_penalty, metal_penalties): """ Calculate the score for the current het state (must already been applies to the complex structure). :het_atoms: List of atoms (complex indexed) that are part of the het. """ smallest_metal_penalty = get_smallest_metal_penalty(complex_st, metal_penalties) # Total score (higher is better): score = 0.0 # Calculate the H-Bonds contribution: hbonds = hbond.get_hydrogen_bonds(complex_st, het_atoms) hbond_count = len(hbonds) for bond in hbonds: score -= calc_hbond_energy(*bond) if state_penalty is not None: info_str = "State penalty: %.2f kcal/mol" % state_penalty score -= state_penalty else: # No state penalty; this is a metal state. Score will be 0.0. info_str = "State penalty: NA" if smallest_metal_penalty is not None: # Reward all states with metal interactions by up to 10 points. # Originally it was planned to replace the state penalty with the # smallest metal state penalty, but that caused non-metal interacting # states with better state penalties to be selected. # Metal state penalties are small positive values - lower is better. score += (10.0 - smallest_metal_penalty) info_str = "Metal state penalty: %.2f kcal/mol" % smallest_metal_penalty # Penalize amides that have an extra hydrogen that can clash # with a metal. Currently only terminal amides are handled - in # the future, consider making this algorithm more generic. num_metal_clashes = get_num_clashes_with_metals(complex_st, het_atoms) score -= num_metal_clashes * METAL_AMIDE_CLASH_PENALTY info_str += "; H-Bond count: %i" % hbond_count total_charge = 0 for anum in het_atoms: total_charge += complex_st.atom[anum].formal_charge if total_charge > 0: charge_str = 'Q: +%i' % total_charge else: charge_str = 'Q: %i' % total_charge # The info string shows the Epik state penalty and the number of # hydrogen bonds between this state and the receptor. info_str += "; %s" % charge_str return score, info_str
def _generate_metal_and_ion_states(st): """ Ionize the metal or ion atom in the specified state structure (only one, if exists) and return a list of ionized sts. """ out_states = [] # Ionize the metal: for atom in st.atom: charges = METAL_AND_ION_CHARGES.get(atom.atomic_number) if charges: # The original state should be the first state (Ev:65897): out_states.append(st) for ch in charges: if ch != atom.formal_charge: stc = st.copy() stc.atom[int(atom)].formal_charge = ch stc.retype() out_states.append(stc) if not out_states: # No metal in <st>, return the original: out_states.append(st) return out_states
[docs]def generate_phosphate_and_sulfur_states(sts): """ For each ligand state, expand states for the phosphate and sulfate groups. """ expanded_sts = [] for st in sts: # For each input state, generate metal and phosphate states # Ev:78688 if count_phosphates_and_sulfurs(st) > 4: expanded_sts.append(st) continue for st2 in extend_phosphate_states(st): for st3 in extend_sulfate_states(st2): expanded_sts.append(st3) return expanded_sts
[docs]def generate_metal_and_ion_states(sts): # If Epik is not run, the following is done only to the original het: expanded_sts = [] for st in sts: for st2 in _generate_metal_and_ion_states(st): expanded_sts.append(st2) return expanded_sts
def _find_waters(st): """ Return a list of water atoms. Each item in the list is a tuple of 3 water atom indices. Waters that don't have two hydrogens are skipped. """ water_asl = 'water and (atom.ele O)' water_atoms = analyze.evaluate_asl(st, water_asl) waters_list = [] for ai in water_atoms: a = st.atom[ai] # Make a list of the atoms of this water: curr_water_atoms = [ai] for h in a.bonded_atoms: if h.atomic_number == 1: curr_water_atoms.append(h.index) if len(curr_water_atoms) != 3: print(' WARNING: skipping water since it has %i hydrogens' % (len(curr_water_atoms) - 1)) continue curr_water_atoms = tuple(curr_water_atoms) waters_list.append(curr_water_atoms) return waters_list
[docs]def get_bridging_waters(st, min_hbonds=3): """ Return a list of all waters in the specified structure that make at least <min_hbonds> number of H-bonds (H-bonds to other waters excluded). The list contains both oxygen and hydrogen atoms (if present). """ waters_list = _find_waters(st) all_water_atoms = set() for water_mol_atoms in waters_list: all_water_atoms.update(water_mol_atoms) def _get_num_nonwater_hbonds(water_mol_atoms): count = 0 for (atom1, atom2) in hbond.get_hydrogen_bonds(st, water_mol_atoms): anum1 = int(atom1) anum2 = int(atom2) if anum1 in water_mol_atoms: other_atom = anum2 else: other_atom = anum1 if other_atom not in all_water_atoms: count += 1 return count bridging_water_atoms = [] for water_atoms in waters_list: num_hbonds = _get_num_nonwater_hbonds(water_atoms) if num_hbonds >= min_hbonds: bridging_water_atoms += water_atoms return bridging_water_atoms
[docs]def hydrogen_neighbors(st, atoms): """ Returns the list of neighbor (bonded) atoms that are hydrogens. :param st: Structure where atoms are from. :type st: `structure.Structure` :param atoms: List of atom indices for the heavy atoms. :type atoms: list of ints :return: List of hydrogen atom indices :rtype: list of ints. """ hydrogens = [] for heavy_atom in atoms: for bonded_atom in st.atom[heavy_atom].bonded_atoms: if bonded_atom.index not in hydrogens and bonded_atom.atomic_number == 1: hydrogens.append(bonded_atom.index) return hydrogens
[docs]def get_num_clashes_with_metals(st, het_anums): """ Returns the number of times there is a hydrogen that is clashing with a metal. For now, only hydrogens on aliphatic nitrogens that are zero- order bonded to metals are considered. Geometry is not considered, because amides are rotatable groups. :param st: Structure where atoms are from. :type st: structure.Structure :param het_anumst of atom indices for the heavy atoms. :type het_anumst of ints :return: List of nitrogen atoms. :rtype: list(structure._StructureAtom) """ num_clashes = 0 for anum in het_anums: atom = st.atom[anum] if atom.element != 'N': continue zob_present = False h_present = False r_groups = 0 valence = 0 for bond in atom.bond: if bond.order == 0: zob_present = True else: valence += bond.order if bond.atom2.element == 'H': h_present = True else: r_groups += 1 if zob_present and h_present and r_groups == 1 and valence == 4: num_clashes += 1 return num_clashes
[docs]def get_pdb_id(st): """ Returns the PDB ID of the given structure. If the property does not exist, returns the string "unknown". """ pdbid = st.property.get("s_pdb_PDB_ID") if not pdbid: return "unknown" return pdbid.lower()
[docs]def get_het_name(st, het_atoms, include_chain=False): """ Return the het "name" to display for the user for the given het. Assumes all residues have the same chain name. Example outputs: "HEM 123a" "HEM A:123a" "GLY-VAL-PRO 110-111-112" "GLY-VAL-PRO A:110-111-112" :param st: Structure containing the het :type st: `structure.Structure` :param het_atoms: Atoms from this het. :type het_atoms: list of ints :param include_chain: Whether the chain name should be included. :type include_chain: bool """ residues = [] for ai in het_atoms: res = st.atom[ai].getResidue() if not res in residues: residues.append(res) name_str = '-'.join([res.pdbres.strip() for res in residues]) + ' ' if include_chain: name_str += st.atom[het_atoms[0]].chain + ':' name_str += '-'.join( ['%i%s' % (res.resnum, res.inscode.strip()) for res in residues]) return name_str
[docs]def add_prepared_props(st): """ Adds "prepared" and "prepared with version" properties to the given structure. """ st.property['b_ppw_prepared'] = True st.property['s_ppw_prepared_with_version'] = schrodinger.get_release_name()
[docs]def generate_bio_units(st): """ If the given structure has Biounit data, generate conformations for each biounit. If multiple bio units are defined, multiple structures are returned - oner per bio unit. If no biounit data is present, original structure is returned (in list). """ biounits = biounit.biounits_from_structure(st) if not biounits: return [st] sts = [biounit.apply_biounit(st, bu) for bu in biounits] return sts
[docs]def apply_het_states(orig_st, state_sts, logger): """ For each het state, generate a complex structure with that state applied, and return a list of (state score, complex structure), one for each state. :param orig_st: Input protein complex structure. :type orig_st: structure.Structure :param state_sts: List of ligand states to apply. :type state_sts: List[structure.Structure] :param logger: Logger to report output to. :type logger: logging.Loggger :return: List of state score and complex structure tuples :rtype: List[Tuple[float, structure.Structure]] """ # For each state for this het: hetname = get_het_name(state_sts[0], state_sts[0].getAtomIndices(), include_chain=True) logger.debug("Processing het %s: " % hetname) # TODO Convert this to a list of HetState objects in the future: applied_states = [] # For each het/lig structure, modify the input complex structure so that # its het has the same ionization state: for state_st in state_sts: complex_st = orig_st.copy() # Apply the state and calculate the score: state_score, epik_penalty, info_str = apply_state_and_calc_score( complex_st, state_st) applied_states.append((state_score, complex_st)) logger.debug(' number of states applied: %i' % len(applied_states)) return applied_states
[docs]def delete_far_waters(st, angstroms=5): het_atom_numbers = get_het_atom_numbers(st) if het_atom_numbers: # hets found hets_asl = atomsasl(het_atom_numbers) far_waters_asl = "water and not fillres within %s (%s)" % (angstroms, hets_asl) else: far_waters_asl = "water" far_water_atoms = analyze.evaluate_asl(st, far_waters_asl) st.deleteAtoms(far_water_atoms)
[docs]def delete_non_bridging_waters(st, delwater_hbond_cutoff): """ Delete waters that do not make at least this number of hydrogen bonds to non-waters. """ # Make a list of all water atoms: water_atoms = analyze.evaluate_asl(st, "water") # Make a list of bridging water atoms: bridging_water_atoms = get_bridging_waters(st, delwater_hbond_cutoff) # Make a list of water atom that need to be deleted: del_atoms = [a for a in water_atoms if a not in bridging_water_atoms] del_waters = [ str(st.atom[a].getResidue()) for a in del_atoms if st.atom[a].element == "O" ] if del_waters: st.deleteAtoms(del_atoms) return del_waters
[docs]def idealize_hydrogen_temp_factor(st): """ Set temperature factor for hydrogens without temperature factors to that of the bonded heavy atom (if available). This is important for X-ray refinement to prevent R-factor collapse. :param st: Input Structure :type st: Schrodinger.structure :return: None, but alters hydrogen temperature factors in place. """ for atom in st.atom: if atom.atomic_number != 1 or atom.temperature_factor: continue for heavy_atom in atom.bonded_atoms: temperature_factor = heavy_atom.temperature_factor if temperature_factor is not None: atom.temperature_factor = temperature_factor if atom.alt_xyz is None: continue if heavy_atom.alt_xyz is None: temperature_factor = heavy_atom.temperature_factor else: temperature_factor = heavy_atom.property.get( 'r_m_alt_pdb_tfactor') if temperature_factor is not None: atom.property['r_m_alt_pdb_tfactor'] = temperature_factor
[docs]def get_het_atom_numbers(st): """ Get the list of het groups in a structure :param st: Input Structure :type st: schrodinger.structure.Structure :return: atom numbers in hetero atom fields. Each list is the atoms within a given hetero group :rtype: list of list of integers """ het_atom_numbers = [] # List of atoms number for ALL hets for het_atoms in findhets.find_hets(st): for ai in het_atoms: het_atom_numbers.append(ai) return het_atom_numbers
[docs]class PrepWizardSettings(parameters.CompoundParam): jobname: str = 'prepwizard' force_field: str = '2005' preprocess: bool = True assign_bond_orders: bool = True use_ccd: bool = True add_hydrogens: bool = True readd_hydrogens: bool = False idealize_hydrogen_tf: bool = True add_terminal_oxygens: bool = False cap_termini: bool = False preprocess_delete_far_waters: bool = False preprocess_watdist: float = None delete_far_waters: bool = True watdist: float = 5.0 treat_metals: bool = True treat_disulfides: bool = False treat_glycosylation: bool = False treat_palmitoylation: bool = False selenomethionines: bool = False antibody_cdr_scheme: annotation.AntibodyCDRScheme = annotation.DEFAULT_ANTIBODY_SCHEME renumber_ab_residues: bool = False fillsidechains: bool = False fillloops: bool = False custom_fasta_file: bool = None run_epik: bool = True use_epikx: bool = False max_states: int = 1 run_protassign: bool = True samplewater: bool = False include_epik_states: bool = False xtal: bool = False pH: float = 7.0 epik_pH: float = 7.0 epik_pHt: float = 2.0 use_propka: bool = True propka_pH: float = 7.0 label_pkas: bool = False force_list: list = [] minimize_adj_h: bool = False protassign_number_sequential_cycles: int = None protassign_max_cluster_size: int = None delwater_hbond_cutoff: int = 0 delete_nonbridging_waters: bool = False run_impref: bool = True rmsd: float = 0.3 fixheavy: bool = False reference_struct: structure.Structure = None use_PDB_pH: bool = False preserve_st_titles: bool = False
[docs]def update_task_from_settings(task, settings): """ Update the given PPWorkflowTask with these given settings. """ input = task.input input.do_preprocess = settings.preprocess input.do_hbond = settings.run_protassign input.do_cleanup = settings.run_impref or settings.delete_far_waters or settings.delete_nonbridging_waters # PPWorkflowTask has a do_cleanup parameters, while PrepWizardSettings # does not (the task combines # 3 operations into one). pwtasks.update_params(input.preprocess, settings) pwtasks.update_params(input.preprocess.generate_states_settings, settings) pwtasks.update_params(input.hbond, settings) pwtasks.update_params(input.cleanup, settings)
@application.require_application(create=True, use_qtcore_app=True) def prepare_structure(st, options, logger=None): """ Prepare the given protein structure using PrepWizard. :param st: Protein structure to prepare. :type st: structure.Structure :param options: PrepWizard options. :type options: PrepWizardSettings :param logger: Optional logger to use. :type logger: log.logging.Logger :return: Prepared structures. Depending on the number of hets in the protein, up to options.max_states number of structures will be produced; by default 1. :rtype: List[structure.Structure] """ task = pwtasks.PPWorkflowTask() update_task_from_settings(task, options) task.specifyTaskDir(os.getcwd()) task.name = options.jobname task.logger = logger task.input.struct = st # Run the backend method, without adding a job control layer. This also # causes all output to go to stdout instead of log files. task.backendMain() expanded_sts = task.output.structs if logger: logger.info(' Number of states generated: %i' % len(expanded_sts)) return expanded_sts
[docs]def tag_st_het_num_prop(sts): """ Set the hetero atom number property to the sequence number in `sts` on the structures :param sts: the structures to tag :type sts: List[structure.Structure] """ for idx, st in enumerate(sts, start=1): st.property[HET_NUM_PROP] = idx