Source code for schrodinger.application.bioluminate.protein

"""
Module to gather residue property data for proteins.

Copyright (c) Schrodinger, LLC. All rights reserved
"""

# Contributors: Joshua Williams

import errno
import itertools
import os
import re
from collections import OrderedDict
from past.utils import old_div
from shutil import rmtree

import numpy

from schrodinger import structure
from schrodinger.application.bioluminate import surfcomp
from schrodinger.application.prime import input as prime_input
from schrodinger.infra import mm
from schrodinger.infra.mm import MmException
from schrodinger.job import jobcontrol
from schrodinger.job.util import hunt
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import minimize
from schrodinger.utils import fileutils

_version = '$Revision: 0.0 $'

#- Imports -------------------------------------------------------------------

try:
    from schrodinger.application.prime.packages.multiloopbuilder import \
        MultiLoopBuilder
except:
    MultiLoopBuilder = None

try:
    from . import propka_parse
    from . import run_propka
except:
    propka_parse = None
    run_propka = None

#- Globals -------------------------------------------------------------------

# Used to define whether an atom is nonpolar (see atom_is_nonpolar function)
NONPOLAR_HEAVY_ELEMENTS = ['C', 'S']

ALL_RESIDUES = [
    'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'HIP', 'HID',
    'HIE', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR',
    'VAL'
]

# NOTE: Why are ARN, ASH, GLH, LYN not in the above list, while HIP/HID/HIE is?

SUPPORTED_RESIDUES = ALL_RESIDUES + ['ARN', 'ASH', 'GLH', 'LYN']

AA_3_LETTER_MAP = {
    resname: structure.RESIDUE_MAP_3_TO_1_LETTER[resname]
    for resname in SUPPORTED_RESIDUES
}

# Mapping of residue names from 1-letter to the "most standard" 3-letter name:
# This dict is needed to support 1-letter names in the user's input files.
AA_1_LETTER_MAP = structure.RESIDUE_MAP_1_TO_3_LETTER

# Removed unless we support: 'ARN', 'ASH', 'GLH', 'LYN'
POLAR_RESIDUES = [
    'ARG', 'ASP', 'GLU', 'HIS', 'ASN', 'GLN', 'LYS', 'SER', 'THR', 'TYR', 'HID',
    'HIE', 'HIP'
]

# Removed unless we support: 'CYX'
NONPOLAR_RESIDUES = [
    'PHE', 'LEU', 'ILE', 'TRP', 'VAL', 'MET', 'PRO', 'CYS', 'ALA'
]

# Separate list that includes non-polar residues which are not always
# applicable/meaningful in UI context (used only for SASA at the moment).
SASA_NONPOLAR_RESIDUES = NONPOLAR_RESIDUES + ['CYT']

# Removed unless we support: 'TYO'
AROMATIC_RESIDUES = ['PHE', 'TYR', 'TRP', 'HIS', 'HIE', 'HIP']

BASIC_RESIDUES = ['ARG', 'LYS', 'HIS', 'HIP']
ACIDIC_RESIDUES = ['ASP', 'GLU']
CHARGED_RESIDUES = BASIC_RESIDUES + ACIDIC_RESIDUES

# Removed unless we support: 'TYO', 'ASH', 'GLH', 'HIE'
NEUTRAL_RESIDUES = [
    'ALA', 'ASN', 'CYS', 'GLN', 'GLY', 'HID', 'HIE', 'ILE', 'LEU', 'MET', 'PHE',
    'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'
]

#TODO: Push to external JSON file
# Taken from component_interactions.py (Shawn Watts)
RESIDUE_ROTOR_COUNT = {
    'ALA': 0,
    'ARG': 4,  # Excludes C_epsilon N_zeta NH3 rotor.
    'ARN': 4,
    'ASN': 2,
    'ASH': 2,
    'ASP': 2,
    'CYS': 2,
    'GLN': 3,
    'GLU': 3,
    'GLH': 3,
    'GLY': 0,
    'HIS': 2,
    'HID': 2,
    'HIP': 2,
    'HIE': 2,
    'ILE': 2,  # Excludes C_gamma and C_delta trivial rotors.
    'LEU': 2,  # Excludes C_delta trivial rotor.
    'LYN': 4,  # Excludes C-NH3 rotor.
    'LYS': 4,  # Excludes C-NH3 rotor.
    'MET': 3,  # Excludes S-CH3 rotor.
    'PHE': 2,
    'PRO': 0,
    'SER': 2,  # Includes C-OH rotor.
    'THR': 2,  # Includes C-OH rotor, excludes C_gamma trivial rotor.
    'TRP': 2,
    'TYR': 3,  # Includes Phenol OH rotor.
    'VAL': 1,  # Excludes C_gamma trivial rotors.
}

#TODO: Push to external JSON file
# Taken from: J Mol Biol 157:105, 1982.
HYDROPATHY_SCALE = {
    'ALA': 1.8,
    'ARG': -4.5,
    'ARN': -4.5,  # Neutral-Arginine (Maestro-specific)
    'ASH': -3.5,  # Protonated Aspartic (Maestro-specific)
    'ASN': -3.5,  # Asparagine
    'ASP': -3.5,  # Aspartic acid
    'CYS': 2.5,
    'CYX': 2.5,  # Cysteine in sulphide linkage
    'CYT': 2.5,  # Ionised Cysteine (Maestro-specific)
    'GLH': -3.5,  # (Maestro-specific)
    'GLN': -3.5,
    'GLU': -3.5,
    'GLY': -0.4,
    'HIS': -3.2,
    'HID': -3.2,  # Histidine (Maestro-specific)
    'HIE': -3.2,  # Histidine (Maestro-specific)
    'HIP': -3.2,  # Histidine (Maestro-specific)
    'ILE': 4.5,
    'LEU': 3.8,
    'LYN': -3.9,  # Protonated Lysine (Maestro-specific)
    'LYS': -3.9,
    'MET': 1.9,
    'PHE': 2.8,
    'PRO': -1.6,
    'SER': -0.8,
    'SRO': -0.8,  # Ionized Serine (Maestro-specific)
    'SEP': -0.8,  # Phosphorylated serine (Maestro-specific)
    'THR': -0.7,
    'THO': -0.7,  # Ionized Threonine (Maestro-specific)
    'TPO': -0.7,  # Phosporylated Threonine (Maestro-specific)
    'TRP': -0.9,
    'TYR': -1.3,
    'TYO': -1.3,  # Ionized Tyrosine (Maestro-specific)
    'PTR': -1.3,  # Phosphorylated tyrosine (Maestro-specific)
    'VAL': 4.2,
}

CHAIN_NAME_REGEX = '(?P<chain>[a-zA-Z0-9_]{1})'

MUTATION_1_LETTER_RE = re.compile(
    CHAIN_NAME_REGEX + r"""
    :
    (?P<resnum>-?\d+)
    (?P<inscode>[a-zA-Z]{1})?  # optional
    \s?                         # optional
    (?P<mutations>[%s]+)?       # optional
    """ % (''.join(list(AA_1_LETTER_MAP))), re.VERBOSE)
MUTATION_3_LETTER_RE_BASE = CHAIN_NAME_REGEX + r"""
    :
    (?P<resnum>-?\d+)
    (?P<inscode>[a-zA-Z]{1})?  # optional
    \s?                        # optional
    (?P<mutations>(%s)(,%s)*)? # optional
    """
MUTATION_3_LETTER_RE = re.compile(
    MUTATION_3_LETTER_RE_BASE %
    ('|'.join(SUPPORTED_RESIDUES), '|,'.join(SUPPORTED_RESIDUES)), re.VERBOSE)

MUTATION_RE = re.compile(
    CHAIN_NAME_REGEX + r"""
    :
    (?P<resnum>-?\d+)
    (?P<inscode>[a-zA-Z]{1})?  # optional
    ->
    (?P<new_resname>(%s))
    """ % ('|'.join(SUPPORTED_RESIDUES)), re.VERBOSE)

PROP_CONVERSIONS = {
    'e_pot': 'Potential_energy',
    'e_internal': 'Internal_energy',
    'e_interaction': 'Interaction_energy',
    'prime_energy': 'Prime_Energy',
    'prime_coulomb': 'Prime_Coulomb',
    'prime_cov': 'Prime_Covalent',
    'prime_vdw': 'Prime_vdW',
    'prime_gb': 'Prime_Solv_GB',
    'prime_sa': 'Prime_Solv_SA',
    'prime_lipo': 'Prime_Lipo',
    'prime_hbond': 'Prime_Hbond',
    'prime_packing': 'Prime_Packing',
    'prime_selfcont': 'Prime_SelfCont',
    'affinity_coulomb': 'Affinity_Coulomb',
    'affinity_cov': 'Affinity_Covalent',
    'affinity_vdw': 'Affinity_vdW',
    'affinity_gb': 'Affinity_Solv_GB',
    'affinity_sa': 'Affinity_Solv_SA',
    'affinity_lipo': 'Affinity_Lipo',
    'affinity_hbond': 'Affinity_Hbond',
    'affinity_packing': 'Affinity_Packing',
    'affinity_selfcont': 'Affinity_SelfCont',
    'stability_coulomb': 'Stability_Coulomb',
    'stability_cov': 'Stability_Covalent',
    'stability_vdw': 'Stability_vdW',
    'stability_gb': 'Stability_Solv_GB',
    'stability_sa': 'Stability_Solv_SA',
    'stability_lipo': 'Stability_Lipo',
    'stability_hbond': 'Stability_Hbond',
    'stability_packing': 'Stability_Packing',
    'stability_selfcont': 'Stability_SelfCont',
    'stability_reference': 'Stability_Reference',
    'pka': 'pKa',
    'atomic_sasa_polar': 'Atomic_SASA_(polar)',
    'atomic_sasa_nonpolar': 'Atomic_SASA_(nonpolar)',
    'sasa_polar': 'SASA_(polar)',
    'sasa_nonpolar': 'SASA_(nonpolar)',
    'sasa_total': 'SASA_(total)',
    'hydropathy': 'Hydropathy',
    'rotatable': 'Total_rotatable_bonds',
    'vdw_surf_comp': 'vdW_Surface_Complementarity',
    # The following are queued in for inclusion but no backends have been added
    #    'complementarity'      : 'Complentarity',
    #    'solubility'           : 'Solubility',
    #    'aggregation'          : 'Aggregation',
}

#- Functions -----------------------------------------------------------------


[docs]def find_residue_atom(st, chain, resnum, inscode): # Find an atom from this residue in the CT: for a in st.atom: if a.chain == chain and a.resnum == resnum and a.inscode == inscode: return a raise ValueError("Failed to find residue: %s:%i%s" % (chain, resnum, inscode))
[docs]def get_residue_asl(residue, ca=False): """ Creates an ASL based on a residue's chain, residue number and inscode. The ASL can optionally only include the alpha carbon of the residue. :param residue: The residue to create an ASL for :type residue: `schrodinger.structure._Residue` :raise RuntimeError: If the passed in residue is incorrect type :return: ASL expression for residue :rtype: str """ if not isinstance(residue, structure._Residue): err = 'Residues must be a structure._Residue object' raise RuntimeError(err) # We need the extra "(" and ")". If you want to link these together # you will want to join with OR asl = '((chain.name "%s")' % residue.chain asl += ' AND (res.num %d)' % residue.resnum asl += ' AND (res.ins "%s")' % residue.inscode if ca: asl += ' AND (atom.ptype " CA "))' else: asl += ')' return asl
[docs]def get_residues_asl(residues, ca=False): """ Creates an ASL based on a list of residue's chains, residue numbers and inscodes. The ASL can optionally only include the alpha carbon of the residue. :param residue: The residues to create an ASL for :type residue: list or tuple of `schrodinger.structure._Residue` :raise RuntimeError: If residues are not a list or tuple :raise RuntimeError: If any passed in residues are incorrect type :return: ASL expression for all residues :rtype: str """ residues_asls = [] if not isinstance(residues, (list, tuple)): err = 'A list or tuple is required to be passed in to ' err += '"get_residues_asl" function.' raise RuntimeError(err) # We are using _Residue objects for res in residues: residues_asls.append(get_residue_asl(res, ca=ca)) # Join all the residues with an OR residues_asl = ' OR '.join(residues_asls) return residues_asl
[docs]def valid_asl(st, asl): """ Returns True/False depending on whether the asl is a valid expression or not. """ try: analyze.evaluate_asl(st, asl) return True except MmException: return False
[docs]def get_residues_within(st, residues, within=0.0, ca=False): """ Returns a list of residues for `st` that are within `within` angstroms of each residue. If the `ca` keyword is True the within calculation will only look for alpha carbon in `residues`. This means that if `within` is set to 5.5 angstroms and there is only a single atom that belongs to a residue at that cutoff, the residue that the atom belongs to will be refined. :param st: Structure to evaluate and which all `residues` correspond :type st: `schrodinger.structure.Structure` :param residues: All residues targeted for refinement :type residues: list or tuple of `schrodinger.structure._Residue` :param within: Distance (angstroms) of residues to include in refinement :type within: float :param ca: Use only alpha carbons to find residues within :type ca: bool :returns: List of `schrodinger.structure._Residue` objects :rtype: list """ residue_asls = get_residues_asl(residues, ca=ca) within_asl = 'within %.2f (%s)' % (within, residue_asls) atom_indices = analyze.evaluate_asl(st, within_asl) # Make a list (dict's values) of residues to refine retaining the order # passed in residue_map = {} for idx in atom_indices: atom = st.atom[idx] chain = atom.chain.strip() or '_' key = '%s:%s' % (chain, atom.resnum) # We create a key to make sure we only have one residue # since there is no way to do: if atom.getResidue() in list residue_map[key] = atom.getResidue() return list(residue_map.values())
[docs]def residue_is_polar(residue): """ Tests whether a residue is polar :param residue: Residue to test :type residue: `structure._Residue` :rtype: bool """ pdb_name = residue.pdbres.strip() if pdb_name in POLAR_RESIDUES: return True else: return False
[docs]def residue_is_nonpolar(residue): """ Tests whether a residue is nonpolar (for SASA) :param residue: Residue to test :type residue: `structure._Residue` :rtype: bool """ pdb_name = residue.pdbres.strip() if pdb_name in SASA_NONPOLAR_RESIDUES: return True else: return False
[docs]def atom_is_nonpolar(atom): """ Returns true if the atom is considered non-polar. Here are the rules for non-polar atoms: - The atom's element is a C or S - The atom's element is a H and one bonded atom's element is C or S :type atom: `schrodinger.structure._StructureAtom` """ if atom.element in NONPOLAR_HEAVY_ELEMENTS: return True if atom.element == 'H': for a in atom.bonded_atoms: if a.element in NONPOLAR_HEAVY_ELEMENTS: return True return False
#- Classes -------------------------------------------------------------------
[docs]class PrimeConfig(prime_input.Prime): """ Class containing the methods to write Prime input files. NOTE THAT THIS ALWAYS USES OPLS2005 """ ALL_RESIDUES = 'all'
[docs] def __init__(self, st_filename, set_defaults=True, **kwargs): self.st_filename = st_filename if set_defaults: self.defaults = { 'STRUCT_FILE': st_filename, 'JOB_TYPE': 'REFINE', 'USE_CRYSTAL_SYMMETRY': 'no', 'USE_RANDOM_SEED': 'no', 'SEED': 0, 'INT_DIEL': 1.00, 'EXT_DIEL': 80.00, 'SGB_MOD': 'vsgb2.0', 'OPLS_VERSION': '2005', 'USE_MEMBRANE': 'no' } for k, v in kwargs.items(): if k in self.defaults: self.defaults[k] = v else: self.defaults = {} super(PrimeConfig, self).__init__(kw=self.defaults)
[docs] def addResidues(self, residues=None): """ Adds residues to consider for refinement. The passed in argument can take the form of: - ASL expression - List of `schrodinger.structure._Residue` objects - 'all' - None """ if residues is None: self['SELECT'] = 'all' else: self['SELECT'] = 'pick' for i, res in enumerate(residues): self['RESIDUE_' + str(i)] = res
[docs] def prepEnergy(self): self['PRIME_TYPE'] = 'ENERGY'
[docs] def prepMinimize(self, residues=None): self['PRIME_TYPE'] = 'REAL_MIN' self['ENERGY_BREAKDOWN'] = 'yes' self.addResidues(residues)
[docs] def prepResidue(self, residues=None): self['PRIME_TYPE'] = 'SIDE_PRED' self['NUM_SC_OUTPUT_STRUCT'] = 1 self['RESIDUE_BACKMIN'] = 'yes' self.addResidues(residues)
[docs] def prepSidechain(self, residues=None): self['PRIME_TYPE'] = 'SIDE_PRED' self['NUM_SC_OUTPUT_STRUCT'] = 1 self.addResidues(residues)
[docs] def prepSidechainCBeta(self, residues=None): self.prepSidechain(residues=residues) self['SAMPLE_CBETA'] = 'yes' self['MAXCONEANG'] = 30.0
[docs] def prepSidechainBB(self, residues=None): self.prepSidechain(residues=residues) self['SAMPLE_BACKBONE'] = 'yes' self['BACKBONELEN'] = 3
[docs] def prepActive(self, lig_id, residues=None): self['PRIME_TYPE'] = 'SITE_OPT' self['LIGAND'] = lig_id self['NPASSES'] = 1 self.addResidues(residues)
[docs] def prepLoop(self, start_res=None, end_res=None, res_sphere=7.5, maxcalpha=None, protocol='LOOP_BLD', loop2=None, max_jobs=0, residues=None): """ :type start_res: string :param start_res: loop start residue, e.g. A:15 :type end_res: string :param end_res: loop start residue, e.g. A:20 :type res_sphere: float :param res_sphere: radius of nearby residue refinement :type maxcalpha: float :param maxcalpha: CA atom movement constraint :type protocol: string :param protocol: loop refinement protocol :type loop2: list :param loop2: the definition of the second loop, e.g. ['A:4','A:6'] :type residues: None :param residues: Unused, kept for API compatibility :type max_jobs: int :param max_jobs: how many processes will be run simultaneously """ self['PRIME_TYPE'] = protocol self['LOOP_0_RES_0'] = start_res self['LOOP_0_RES_1'] = end_res self['RES_SPHERE'] = res_sphere if maxcalpha is not None: self['MAX_CA_MOVEMENT'] = maxcalpha self['MIN_OVERLAP'] = 0.70 self['NPASSES'] = 1 if protocol == 'LOOP_PAIR': # cooperative loop sampling if loop2 is None: raise ValueError('A second loop must be defined') self['LOOP_1_RES_0'] = loop2[0] self['LOOP_1_RES_1'] = loop2[1] if max_jobs != 0: self['MAX_JOBS'] = max_jobs
[docs] def prepAntibodyLoop(self, start_res=None, end_res=None, cpus=1, residues=None): self['PRIME_TYPE'] = 4 self['LOOP_0_RES_0'] = start_res self['LOOP_0_RES_1'] = end_res self['RES_SPHERE'] = 5.0 self['NUM_FIXED_STAGE'] = 5 self['MAX_CA_REF1'] = 6.0 self['MAX_CA_REF2'] = 3.0 self['SGB_MOD'] = 'vsgb2.0' self['THREADS'] = 10 self['NUM_OUTPUT_STRUCT'] = 10 self['MAX_JOBS'] = cpus end_resnum = int(end_res.split(':')[-1]) start_resnum = int(start_res.split(':')[-1]) total_residues = (start_resnum - end_resnum) + 1 if total_residues >= 12: self['MIN_OVERLAP'] = 0.50 self['OFAC_INIT_STAGE'] = '0.40/0.45/0.5/0.55/0.60' else: self['MIN_OVERLAP'] = 0.60 self['OFAC_INIT_STAGE'] = '0.50/0.55/0.60/0.65/0.70' del self['INT_DIEL']
[docs] def prepBldStruct(self, jobname, dirname): self['JOB_TYPE'] = 'MODEL' self['QUERY_OFFSET'] = 0 self['TEMPLATE_NAME'] = jobname self['%s_NUMBER' % jobname] = 1 self['%s_STRUCT_FILE' % jobname] = '%s.ent' % jobname self['%s_ALIGN_FILE' % jobname] = '%s.aln' % jobname self['DIR_NAME'] = dirname self['TAILS'] = 0 self['SIDE_OPT'] = 'true' self['MINIMIZE'] = 'true' self['BUILD_INSERTIONS'] = 'false' self['MAX_INSERTION_SIZE'] = 20 self['BUILD_TRANSITIONS'] = 'true' self['BUILD_DELETIONS'] = 'true' self['KEEP_ROTAMERS'] = 'true' self['KNOWLEDGE_BASED'] = 'true' self['NUM_OUTPUT_STRUCT'] = 10
[docs]class PrimeStructure:
[docs] def __init__(self, jobname): self.jobname = jobname
[docs] def createTemplateFile(self, template_seq, filename=None): """ Writes a template PDB file as .ent """ if not filename: filename = "%s.ent" % self.jobname with open(filename, "w") as fh: # The Residue.structure list keeps structure information # in PDB format, so we just need to write out these lines. for res in template_seq.residues: fh.writelines(res.structure) fh.writelines(["TER\n"]) return filename
[docs] def createAlignFile(self, reference_seq, template_seq, filename=None): """ Writes an alignment file for the template. If no filename is supplied the file will be named <jobname>.aln. :param reference_seq: The reference sequence :type reference_seq: `sequence<Sequence>` :param template_seq: The template sequence :type template_seq: `sequence<Sequence>` """ if not filename: filename = "%s.aln" % self.jobname # Make a reference and template sequence line. Replace gaps # symbols with dots. reference_txt = "ProbeAA: " reference_txt += reference_seq.text().replace("~", ".").replace("-", ".") reference_txt += '\n' template_txt = "Fold AA: " template_txt += template_seq.text().replace("~", ".").replace("-", ".") template_txt += '\n' with open(filename, 'w') as fh: fh.writelines(reference_txt) fh.writelines(template_txt) return filename
[docs]class PropkaError(Exception): """ A custom exception for any propka failures """
[docs]class OrderedResidueDict(OrderedDict): """ Creates an ordered dictionary for residues in a structure """
[docs] def __init__(self, residues, default_value=None): super(OrderedResidueDict, self).__init__() for res in residues: self[res] = default_value
[docs]class PropertyCalculator: """ Class for calculating properties of proteins and protein residues. Here is an example of how to calculate properties for a protein:: from schrodinger import structure from schrodinger.application.bioluminate import protein # Get the input structure st = structure.Structure.read('receptor.maegz') # Define the properties to calculate calculations = [ 'e_pot', 'e_internal', 'e_interaction', 'prime_energy', 'pka', 'sasa_polar', 'sasa_nonpolar', 'sasa_total'] # Create the calculator calculator = protein.PropertyCalculator(st, "my_calculator_jobname") # Calculate the properties properties = calculator.calculate(*calculations) In the example above the `properties` output would look something like this:: properties = { 'e_pot' : 1573.4, 'e_internal' : 624.7, 'e_interaction' : 994.8, 'prime_energy' : 744.2, 'pka' : 124.1, 'sasa_polar', : 3122.3, 'sasa_nonpolar' : 271.1, 'sasa_total' : 3393.4 } """ # All the aggregate calculations available AGGREGATE_CALCULATIONS = [ 'e_pot', # FIXME: These 2 need to have methods for getting totals Ev:133399 # 'e_internal', # 'e_interaction', 'prime_energy', 'pka', 'sasa_polar', 'sasa_nonpolar', 'sasa_total', 'hydropathy', 'rotatable', 'vdw_surf_comp', # 'complementarity', # 'solubility', # 'aggregation' ] # All the calculations available that iterate over each residue RESIDUE_CALCULATIONS = [ 'e_pot', 'e_internal', 'e_interaction', 'pka', 'sasa_polar', 'sasa_nonpolar', 'sasa_total', 'hydropathy', 'rotatable', 'vdw_surf_comp', # 'complementarity', # 'solubility', # 'aggregation' ]
[docs] def __init__(self, struct, jobname, cleanup=True, nbcutoff=14.0, residues=None, lig_asl=None): """ Construct a `ProteinCalculator` class from a structure file and a jobname. :param struct: The protein structure or protein/ligand structures :type struct: `schrodinger.structure.Structure` object :param jobname: The jobname that will be used for all calculations that require output files. :param residues: An iterable of _Residue objects to analyze. If not specified, all residues in the structure are considered. :type residues: Iterable of `schrodinger.structure._Residue` objects. :param lig_asl: The ASL for the ligand substructure. Used for calculating the vdW surface complementarity. :type lig_asl: str """ self.struct = struct self.jobname = jobname self._cleanup = cleanup self._min_nbcutoff = nbcutoff self._lig_asl = lig_asl self._minimizer = None """ Private variable that stores the minimizer. We don't create it here since some applications that use this class will not need it. """ if residues is None: # Make a list of the residues and use these to iterate over. self.residues = [res for res in self.struct.residue] else: self.residues = list(residues) # self.struct._proxy # Properties that hold the calculated data. Each of these will be # an OrderedDict with the keys being the structure's residue objects. # We store these as ordered dicts so scripts using this can access the # calculated data in the same order as the residues in the protein. A # normal dict would work if the structure._ResidueIterable returned # the same residue object but it doesn't. self.e_pot_data = OrderedResidueDict(self.residues) self.e_internal_data = OrderedResidueDict(self.residues) self.e_interaction_data = OrderedResidueDict(self.residues) self.atomic_nonpolar_sasa_data = OrderedResidueDict(self.residues) self.atomic_polar_sasa_data = OrderedResidueDict(self.residues) self.sasa_data = OrderedResidueDict(self.residues) self.hydropathy_data = OrderedResidueDict(self.residues) self.rotatable_data = OrderedResidueDict(self.residues) self.vdw_surf_comp_data = OrderedResidueDict(self.residues) self.pka_data = OrderedResidueDict(self.residues) # These calculated properties only exist in aggregate form, there # is no value for individual residues. self.prime_energy_total = None self.e_pot_total = None self.sasa_total = None self.sasa_polar_total = None self.sasa_nonpolar_total = None self.hydropathy_total = None self.rotatable_total = None self.vdw_surf_comp_total = None self.pka_total = None # This is used for all SASA calculations self._atomic_sasa_list = None self._vdw_surf_comp_by_atom = None self.progress = (0, 0) """ Variable that can be used to get the progress of calculations. This variable is only set in `self.calculateOverResidues`. Since that method returns a generator, each step can query `self.progress` to get a description of the progress. This variable is a tuple with the form ( step, total steps ). """
@property def minimizer(self): """ The minimizer used in energy calculations. """ if not self._minimizer: self._minimizer = minimize.Minimizer( struct=self.struct, nonbonded_cutoff=self._min_nbcutoff, ffld_version=minimize.OPLS_2005, cleanup=False) # SHARED-5335, BIOLUM-3602 # Auto fixing of lewis causes charged cysteines # turning cleanup=False to restore old behavior return self._minimizer def _prepInscode(self): """ Private method to renumber residues with inscodes so that propka can handle them. The returned st is renumbered and the renumbering map is also returned and used in `self.setpKaData` """ new_ct = self.struct.copy() problem_res = [] used_resnums = set() for residue in new_ct.residue: if residue.inscode != ' ': problem_res.append(residue) used_resnums.add(residue.chain + ':' + str(residue.resnum)) temp_resnums = {} for res in problem_res: old_name = res.chain + ':' + str(res.resnum) + res.inscode for proposed_resnum in range(9999, -9999, -1): proposed_name = res.chain + ':' + str(proposed_resnum) if proposed_name not in used_resnums: for atom in res.atom: atom.resnum = proposed_resnum atom.inscode = ' ' temp_resnums[old_name] = proposed_name used_resnums.add(proposed_name) break return (new_ct, temp_resnums)
[docs] def runpKa(self): """ Runs PROPKA to get the pKa of all residues in the `self.struct`, then sets `self.pka_data`. """ # PROPKA can get stuck in an infinite loop when there are severe # clashes. This clashes can be introduced in unforesoon ways, such # as multiconformer ligands where each conformer has a different # atom constituency, i.e. microheterogeneity for ligands. overlapping_atom_pairs = analyze.find_overlapping_atoms( self.struct, ignore_hydrogens=True, ignore_waters=True, dist_threshold=0.5) if len(overlapping_atom_pairs) > 5: raise PropkaError( "Multiple severe heavy clashes found. Aborting PROPKA calculation." ) st, renum_map = self._prepInscode() # Find a jobname for PROPKA that is not in use yet: i = 0 while True: i += 1 propka_jobname = os.path.abspath("%s_propka-%i" % (self.jobname, i)) ipdb = propka_jobname + ".pdb" if not os.path.isfile(ipdb): break # This jobname is not in use yet st.write(ipdb) ifile = propka_jobname + ".propka_input" ofile = propka_jobname + ".pka" logfile = propka_jobname + ".log" # So that these files get cleaned up when the job completes: temp_files = [ipdb, ifile, ofile, logfile] try: stdout, stderr = run_propka.run_propka(ipdb) except Exception as err: raise PropkaError('Error running PROPKA:\n%s' % err) # Save output to a log file (for debugging): with open(logfile, "w") as fh: fh.write(str(stdout)) fh.write(str(stderr)) if stderr: raise PropkaError( f'Error running PROPKA. Log file: {logfile}.\n Stderr: {stderr}' ) if not os.path.isfile(ofile): raise PropkaError('PROPKA output file not found. Log file: %s' % logfile) # We are only interested in: # 'resname', 'resnum', 'chain', 'pKa' headers = propka_parse.SUMMARY_HEADER[:4] header, summary = propka_parse.get_summary(ofile, headers) self.setpKaData(summary, renum_map=renum_map) # Clean up the output files if self._cleanup: for f in temp_files: if os.path.isfile(f): fileutils.force_remove(f)
[docs] def getResiduepKa(self, residue): """ Returns the pKa for specified residue :param residue: Residue to get internal energy for :type residue: `structure._Residue` :rtype: float """ if not any(self.pka_data.values()): self.runpKa() return self.pka_data.get(residue)
[docs] def getTotalpKa(self): """ Gets the sum of the pKa values for the protein. :rtype: float """ if self.pka_total is not None: return self.pka_total if not any(self.pka_data.values()): self.runpKa() values = [float(v) for v in self.pka_data.values() if v is not None] self.pka_total = sum(values) return self.pka_total
[docs] def setpKaData(self, summary, renum_map=None): """ Compares residues from the PROPKA summary with the residues in `self.residues` and when matches are found the summary's pKa is set for that residue in `self.pka_data` """ for res in self.residues: maestro_resid = '%s:%s' % (res.chain, res.resnum) if renum_map: name = res.chain + ':' + str(res.resnum) + res.inscode match = renum_map.get(name) if match: maestro_resid = match # PROPKA will assign a chain "_" if none is found maestro_resid = maestro_resid.replace(' ', '_') for idx, row in enumerate(summary): # Skip terminals resname = row[0] if resname == 'C-': # or resname == 'N+': # NOTE: We need to include the N+ terminal to fix # PPREP-925. This is because PROPKA actually marks # the first residue in the chain as "N+", no matter # what type it is. It's different for "C-" residues. continue resnum = row[1] chain = row[2] pKa = row[3] propka_resid = '%s:%s' % (chain, resnum) if propka_resid == maestro_resid: self.pka_data[res] = pKa del summary[idx] break
[docs] def getTotalPrimeEnergy(self): """ Run Prime Minimization on `self.struct`. This will launch a job using job control. After the job completes the total energy will be taken from the first CT using the "r_psp_Prime_Energy" property. :returns: Prime energy of protein :rtype: float """ if self.prime_energy_total is not None: return self.prime_energy_total refiner = Refiner(self.struct) output_st = refiner.runPrimeMinimization(self.jobname) self.prime_energy_total = output_st.property.get('r_psp_Prime_Energy') if self._cleanup: refiner.clean() return self.prime_energy_total
[docs] def getPrimeEnergyByResidues(self, residues): """ Run Prime Minimization on `self.struct` only minimizing the residues in `residues`. This will launch a job using job control. After the job completes the total energy will be taken from the first CT using the "r_psp_Prime_Energy" property. :param residues: Residues to minimize :type residues: list of `residues<structure._Residue>` :returns: Prime energy of protein :rtype: float """ internal_residues = set() for res in residues: resnum = res.resnum residx = int(resnum) - 1 internal_residues.add(self.residues[residx]) refiner = Refiner(self.struct, residues=internal_residues) output_st = refiner.runPrimeMinimization(self.jobname) prime_energy_total = output_st.property.get('r_psp_Prime_Energy') if self._cleanup: refiner.clean() return prime_energy_total
[docs] def getResiduePotentialEnergy(self, residue): """ Return the potential energy for a residue. :param residue: Residue to get potential energy for :type residue: `structure._Residue` :rtype: float """ e_pot = self.e_pot_data.get(residue) if e_pot is None: self._calculateEnergiesByResidue(residue) e_pot = self.e_pot_data[residue] return e_pot
[docs] def getPotentialEnergyGenerator(self): """ Return a generator that iterates over each residue in `self.struct` yielding the `schrodinger.structure._Residue` object and it's potential energy. :rtype: generator :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy` :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy` """ for res in self.residues: e_pot = self.getResiduePotentialEnergy(res) yield (res, e_pot)
[docs] def getTotalPotentialEnergy(self): """ Get the potential energy of `self.struct` which is calculated using `schrodinger.structutils.minimize.Minimizer`. The potential energy is the sum of the internal energies and the interaction energies. :return: Total potential energy of all the residues :rtype: float :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy` :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy` """ if self.e_pot_total is None: self.e_pot_total = self.minimizer.getEnergy() return self.e_pot_total
[docs] def getResidueInternalEnergy(self, residue): """ Return the residue's internal energy. :param residue: Residue to get internal energy for :type residue: `structure._Residue` :rtype: float :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy` """ e_internal = self.e_internal_data.get(residue) if e_internal is None: self._calculateEnergiesByResidue(residue) e_internal = self.e_internal_data[residue] return e_internal
[docs] def getInternalEnergyGenerator(self): """ Return a generator that iterates over each residue in `self.struct`. This yields the `schrodinger.structure._Residue` object and it's internal energy. :rtype: generator :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy` """ for res in self.residues: e_internal = self.getResidueInternalEnergy(res) yield (res, e_internal)
def _calculateEnergiesByResidue(self, res): #DEBUG print "_calculateEnergiesByResidue() called" min = self.minimizer atom_subset = res.getAtomIndices() e_bonded = sum(min.getBondedEnergies(atom_subset)) (intra_nb_energy, inter_nb_energy) = min.getNonBondedEnergies(atom_subset) e_internal = e_bonded + intra_nb_energy e_interact = inter_nb_energy self.e_interaction_data[res] = e_interact self.e_internal_data[res] = e_internal self.e_pot_data[res] = e_interact + e_internal
[docs] def getResidueInteractionEnergy(self, residue): """ Return the residue's interaction energy. :param residue: Residue to get interaction energy for :type residue: `structure._Residue` :rtype: float :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy` """ e_interact = self.e_interaction_data.get(residue) if e_interact is None: self._calculateEnergiesByResidue(residue) e_interact = self.e_interaction_data[residue] return e_interact
[docs] def getInteractionEnergyGenerator(self): """ Return a generator that iterates over each residue in `self.struct`. This yields the `schrodinger.structure._Residue` object and it's interaction energy. :rtype: generator :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy` """ for res in self.residues: e_interact = self.getResidueInteractionEnergy(res) yield (res, e_interact)
def _calculateAtomicSasaList(self): """ Calculate SASA for all atoms in the structure, and save them. """ self._atomic_sasa_list = analyze.calculate_sasa_by_atom(self.struct) def _sumSasaForAtoms(self, atoms=None): """ Sum up the pre-calculated SASAs for the given atoms. If `atoms` is `None`, the sum of all atoms will be returned. """ # Make sure we have the data we need. if self._atomic_sasa_list is None: self._calculateAtomicSasaList() if atoms is None: return sum(self._atomic_sasa_list) sasa = 0.0 for anum in atoms: sasa += self._atomic_sasa_list[anum - 1] return sasa
[docs] def getResidueAtomicPolarSASA(self, residue, sidechain=False): """ Returns SASA for all polar atoms in residue :param residue: Residue to get atomic polar SASA contribution for :type residue: `structure._Residue` :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ # If we already have it calculated return it sasa = self.atomic_polar_sasa_data.get(residue) if sasa is not None: return sasa # Get the polar atom indices for the residue atom_idx = [ str(a.index) for a in residue.atom if not atom_is_nonpolar(a) ] if not atom_idx: return 0.0 # Create the asl expressions asl = '(atom.num %s)' % ','.join(atom_idx) if sidechain: asl += ' and (sidechain)' # Evaluate the asl expressions to get the atoms to calculate the # SASA for polar_indices = analyze.evaluate_asl(self.struct, asl) sasa = self._sumSasaForAtoms(polar_indices) self.atomic_polar_sasa_data[residue] = sasa return sasa
[docs] def getAtomicPolarSASAGenerator(self, sidechain=False): """ Returns a generator that yields the `schrodinger.structure._Residue` object and its calculated SASA for only the polar atoms in each residue in `self.struct`. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: generator """ for res in self.residues: sasa = self.getResidueAtomicPolarSASA(res, sidechain=sidechain) yield (res, sasa)
[docs] def getResidueAtomicNonPolarSASA(self, residue, sidechain=False): """ Returns SASA for only the nonpolar atoms in residue :param residue: Residue to get atomic nonpolar SASA contribution for :type residue: `structure._Residue` :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ # If we already have it calculated return it sasa = self.atomic_nonpolar_sasa_data.get(residue) if sasa is not None: return sasa # Get the polar atom indices for the residue atom_idx = [str(a.index) for a in residue.atom if atom_is_nonpolar(a)] if not atom_idx: return 0.0 # Create the asl expressions asl = '(atom.num %s)' % ','.join(atom_idx) if sidechain: asl += ' and (sidechain)' # Evaluate the asl expressions to get the atoms to calculate the # SASA for nonpolar_indices = analyze.evaluate_asl(self.struct, asl) sasa = self._sumSasaForAtoms(nonpolar_indices) self.atomic_nonpolar_sasa_data[residue] = sasa return sasa
[docs] def getAtomicNonPolarSASAGenerator(self, sidechain=False): """ Returns a generator that yields the `schrodinger.structure._Residue` object and its calculated SASA for only the nonpolar atoms in each residue in `self.struct`. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: generator """ for res in self.residues: # Get the polar atom indices for the residue nonpolar_res_atoms = [ str(a.index) for a in res.atom if atom_is_nonpolar(a) ] if nonpolar_res_atoms: # If this residue has non-polar atoms # Create the asl expressions asl = '(atom.num %s)' % ','.join(nonpolar_res_atoms) if sidechain: asl += ' and (sidechain)' # Evaluate the asl expressions to get the atoms to calculate the # SASA for nonpolar_indices = analyze.evaluate_asl(self.struct, asl) nonpolar_sasa = self._sumSasaForAtoms(nonpolar_indices) else: nonpolar_sasa = 0.0 self.atomic_nonpolar_sasa_data[res] = nonpolar_sasa yield (res, nonpolar_sasa)
[docs] def getResidueSASA(self, residue, sidechain=False): """ Returns the SASA for residue. :param residue: Residue to get SASA for :type residue: `structure._Residue` :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ # If we already have it calculated return it sasa = self.sasa_data.get(residue) if sasa is not None: return sasa res_atom_indices = [str(a.index) for a in residue.atom] if not res_atom_indices: return 0.0 asl = '(atom.num %s)' % ','.join(res_atom_indices) if sidechain: asl += ' and (sidechain)' atom_indices = analyze.evaluate_asl(self.struct, asl) sasa = self._sumSasaForAtoms(atom_indices) self.sasa_data[residue] = sasa return sasa
[docs] def getSASAPolarGenerator(self, sidechain=False): """ Returns a generator that yields the `schrodinger.structure._Residue` object and its calculated SASA for each polar residue in `self.struct`. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: generator """ for res in self.residues: if not residue_is_polar(res): continue sasa = self.getResidueSASA(res, sidechain=sidechain) yield (res, sasa)
[docs] def getTotalSASAPolar(self, sidechain=False): """ Returns the total approximate solvent accessible surface area for all polar residues. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ # If we already have it calculated return it if self.sasa_polar_total is None: asl = '(res. %s)' % ','.join(POLAR_RESIDUES) if sidechain: asl += ' and (sidechain)' atom_indices = analyze.evaluate_asl(self.struct, asl) if not atom_indices: return 0.0 self.sasa_polar_total = self._sumSasaForAtoms(atom_indices) return self.sasa_polar_total
[docs] def getSASANonPolarGenerator(self, sidechain=False): """ Returns a generator that yields the `schrodinger.structure._Residue` object and its calculated SASA for each nonpolar residue in `self.struct`. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: generator """ for res in self.residues: if not residue_is_nonpolar(res): continue sasa = self.getResidueSASA(res, sidechain=sidechain) yield (res, sasa)
[docs] def getTotalSASANonPolar(self, sidechain=False): """ Returns the total approximate solvent accessible surface area for all non-polar residues. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ # If we already have it calculated return it if self.sasa_nonpolar_total is None: asl = '(res. %s)' % ','.join(SASA_NONPOLAR_RESIDUES) if sidechain: asl += ' and (sidechain)' atom_indices = analyze.evaluate_asl(self.struct, asl) if not atom_indices: return 0.0 self.sasa_nonpolar_total = self._sumSasaForAtoms(atom_indices) return self.sasa_nonpolar_total
[docs] def getSASAGenerator(self, sidechain=False): """ Returns a generator that yields the `schrodinger.structure._Residue` object and its calculated SASA for each residue in `self.struct`. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: generator """ for res in self.residues: sasa = self.getResidueSASA(res, sidechain=sidechain) yield (res, sasa)
[docs] def getTotalSASA(self, sidechain=False): """ Returns the total approximate solvent accessible surface area for all residues. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ # If we already have it calculated return it if self.sasa_total is None: if sidechain: asl = '(sidechain)' # NOTE: everything that matches "sidechain" is already part of a "protein". else: asl = "(protein)" # See BIOLUM-1219 atom_indices = analyze.evaluate_asl(self.struct, asl) self.sasa_total = self._sumSasaForAtoms(atom_indices) return self.sasa_total
[docs] def getResidueHydropathy(self, residue, sidechain=False): """ Returns hydropathy value for residue :param residue: Residue to get hydropathy value for :type residue: `structure._Residue` :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ # Make sure we have the SASA data for the residue, it is needed for # hydropathy value res_sasa = self.sasa_data.get(residue) if res_sasa is None: res_sasa = self.getResidueSASA(residue, sidechain=sidechain) # Get a structure object contianing only the residue atoms res_st = residue.extractStructure() # Get the SASA for the residue without any neighboring atoms solo_res_sasa = sum(analyze.calculate_sasa_by_atom(res_st)) pdb_name = residue.pdbres.strip() hval = HYDROPATHY_SCALE.get(pdb_name, 0.0) calculated_hval = (old_div(res_sasa, solo_res_sasa)) * hval self.hydropathy_data[residue] = calculated_hval return calculated_hval
[docs] def getHydropathyGenerator(self, sidechain=False): """ Returns a generator that yields the `schrodinger.structure._Residue` object and its calculated hydropathy for each residue in `self.struct`. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: generator """ for res in self.residues: calculated_hval = self.getResidueHydropathy(res, sidechain=sidechain) yield (res, calculated_hval)
[docs] def getTotalHydropathy(self, sidechain=False): """ Returns the total calculated hydropathy value for all residues. :param sidechain: Only consider sidechain atoms when calculating SASA :type sidechain: bool :rtype: float """ if self.hydropathy_total is None: self.hydropathy_total = 0.0 for res, value in self.getHydropathyGenerator(sidechain=sidechain): self.hydropathy_total += value return self.hydropathy_total
[docs] def getResidueRotatableBonds(self, residue): """ Return the number of rotors for a residue. :param residue: Residue to get rotor count for :type residue: `structure._Residue` :rtype: int """ # If we already have it calculated return it count = self.rotatable_data.get(residue) if count is not None: return count resname = residue.pdbres.strip() count = RESIDUE_ROTOR_COUNT.get(resname, 0) self.rotatable_data[residue] = count return count
[docs] def getRotatableBondsGenerator(self): """ Returns a generator that yields the `schrodinger.structure._Residue` object and its number of rotors for each residue in `self.struct`. :rtype: generator """ for res in self.residues: count = self.getResidueRotatableBonds(res) yield (res, count)
[docs] def getTotalRotatableBonds(self): """ :return: Sum of rotors for all residues. :rtype: float """ if self.rotatable_total is None: self.rotatable_total = 0.0 for res, count in self.getRotatableBondsGenerator(): self.rotatable_total += count return self.rotatable_total
def _calculateVDWSurfComp(self): if self._lig_asl is None: self._vdw_surf_comp_by_atom = {} return ligand_atoms = analyze.evaluate_asl(self.struct, self._lig_asl) if not ligand_atoms or len(ligand_atoms) == self.struct.atom_total: self._vdw_surf_comp_by_atom = {} else: self._vdw_surf_comp_by_atom = surfcomp.calc_complementarity_by_atom( self.struct, ligand_atoms, grid_spacing=0.6)
[docs] def getTotalSurfComp(self): """ :return: Median of vdW surface complementarity values for all surface points for all residues. :rtype: float """ if self.vdw_surf_comp_total is None: self._calculateVDWSurfComp() sc_values = [] for values in self._vdw_surf_comp_by_atom.values(): sc_values += values if sc_values: self.vdw_surf_comp_total = numpy.median(sc_values) else: self.vdw_surf_comp_total = 0.0 return self.vdw_surf_comp_total
[docs] def getResidueSurfComp(self, residue): """ :return: Median of vdW surface complementarity values for all accounted points on the surface of this residue. :rtype: float :param residue: Residue to get the value for :type residue: `structure._Residue` """ if self._vdw_surf_comp_by_atom is None: self._calculateVDWSurfComp() # If we already have it calculated return it sc = self.vdw_surf_comp_data.get(residue) if sc is not None: return sc sc_values = [] for atom in residue.atom: sc_values += self._vdw_surf_comp_by_atom[atom.index] if sc_values: sc = numpy.median(sc_values) else: sc = 0.0 self.vdw_surf_comp_data[residue] = sc return sc
[docs] def calculateOverResidues(self, *properties): """ Helper method that returns a generator which will calculate multiple properties for `self.struct`. All results will be returned in a tuple with the form ( `structure._Residue`, calc dict ). Here is a list of valid properties to calculate: - e_pot - e_internal - e_interaction - pka - sasa_polar - sasa_nonpolar - sasa_total - hydropathy - rotatable - vdw_surf_comp :param properties: Properties to calculate :type properties: str (see PropertyCalculator.RESIDUE_CALCULATIONS) :raise KeyError: If a property passed in is invalid :return: Generator that yields `structure._Residue` and dict where keys are properties passed in and values are the total value of the property for the protein. e.g (_Residue, {'e_pot':1324.3}) :rtype: generator """ prop_map = { 'e_pot': self.getResiduePotentialEnergy, 'e_internal': self.getResidueInternalEnergy, 'e_interaction': self.getResidueInteractionEnergy, 'pka': self.getResiduepKa, 'sasa_polar': self.getResidueAtomicPolarSASA, 'sasa_nonpolar': self.getResidueAtomicNonPolarSASA, 'sasa_total': self.getResidueSASA, 'hydropathy': self.getResidueHydropathy, 'rotatable': self.getResidueRotatableBonds, 'vdw_surf_comp': self.getResidueSurfComp, } # Make sure the passed in properties are valid for prop in properties: if prop not in self.RESIDUE_CALCULATIONS: props_str = ", ".join(self.RESIDUE_CALCULATIONS) msg = 'Invalid property %s. Valid properties: %s' % (prop, props_str) raise KeyError(msg) # Set the total number of calculations that will take place total_steps = len(properties) * len(self.residues) step = 0 for res in self.residues: #DEBUG print 'Calculating %s for %s' % (', '.join(properties), res) calculations = {} for prop in properties: if prop == 'vdw_surf_comp' and self._lig_asl is None: raise KeyError( 'Ligand ASL must be defined to calculate vdw_surf_comp') else: calculations[prop] = prop_map.get(prop)(res) # Set the current progress step += 1 self.progress = (step, total_steps) yield (res, calculations)
[docs] def calculate(self, *properties): """ Helper method to calculate multiple properties for `self.struct`. All results will be returned in a dict where the keys are each of the properties in `properties`, and their values are the values returned from their corresponding method. Here is a list of valid properties to calculate: - e_pot - sasa_polar - sasa_nonpolar - sasa_total - prime_energy - pka - hydropathy - rotatable - vdw_surf_comp :param properties: Properties to calculate :type properties: str (see PropertyCalculator.AGGREGATE_CALCULATIONS) :raise KeyError: If a property passed in is invalid :return: Dict where keys are properties passed in and values are the total value of the property for the protein. e.g {'e_pot': 1324.3, 'sasa_total': 1846.9} :rtype: dict """ prop_map = { 'e_pot': self.getTotalPotentialEnergy, 'prime_energy': self.getTotalPrimeEnergy, 'pka': self.getTotalpKa, 'sasa_polar': self.getTotalSASAPolar, 'sasa_nonpolar': self.getTotalSASANonPolar, 'sasa_total': self.getTotalSASA, 'hydropathy': self.getTotalHydropathy, 'rotatable': self.getTotalRotatableBonds, 'vdw_surf_comp': self.getTotalSurfComp, } # Make sure the passed in properties are valid valid = all((p in self.AGGREGATE_CALCULATIONS) for p in properties) if not valid: props_str = ", ".join(self.AGGREGATE_CALCULATIONS) raise KeyError('Invalid properties supplied. Valid properties: ' '%s.' % props_str) data = {} for prop in properties: #DEBUG print 'Calculating %s...' % prop data[prop] = prop_map.get(prop)() return data
[docs] def getTotalAggregation(self): return
[docs] def getTotalSolubility(self): return
[docs] def getTotalComplementarity(self): return
[docs]class Mutation: """ Helper class for `Mutator`. This will store a mutated structure and the resides that were mutated. """
[docs] def __init__(self, ref_struct, struct, residue_map): """ :param ref_struct: Reference structure (potentially "idealized") :type ref_struct: `schrodinger.structure.Structure` :param struct: Mutated structure :type struct: `schrodinger.structure.Structure` :param residue_map: The mapping of reference residues to their mutated residues :type residue_map: dict instance of reference and mutated `residues<schrodinger.structure._Residue>` """ self.ref_struct = ref_struct self.struct = struct self.residue_map = residue_map
[docs]class Mutator: """ Mutates a set of residues in a protein structure allowing concurrent mutations as well as the option to limit concurrent mutations to sequential residues only. Here is an example of a mutation of a Ser residue to: Asp, Glu, Asn, & Gln (one-letter codes are D, E, N, & Q respectively). The Ser residue is in chain A and has a residue number of 22. This example will write a file named 'mutated_structures.maegz' that has the reference structure as the first CT and each mutation CT after that. Five total structures will be in the output file:: from schrodinger import structure from schrodinger.application.bioluminate import protein # Get the input structure reference_st = structure.Structure.read('receptor.maegz') # Create the writer for the output file and append the reference writer = structure.StructureWriter('mutated_structures.maegz') writer.append(reference_st) # Define the residues and mutations residues = ['A:22'] muts = 'DENQ' # Get a compatible list of mutations. The above turns into # [('A', 22, 'DENQ')] mutations = protein.Mutator.convert_residue_list(residues, muts) # Construct the mutator mutator = protein.Mutator(st, mutations) # Loop over each mutation for mutation in mutator.generate(): # mutated_structure = mutation.struct residue_map = mutation.residue_map res_str = ", ".join(str(res) for res in residue_map.values()) print 'Residues affected by this mutation: %s' % res_str # Do something with the mutated structure (refine maybe) writer.append(mutated_structure) @todo: Add logging """ MUTATIONS_PROPERTY = "s_bioluminate_Mutations" UNFOLDED_PROPERTY = "r_bioluminate_Unfolded_Contribution" UNFOLDED_PROPERTY_PRIME = "r_bioluminate_Unfolded_Contribution_Prime" # Lookup for unfolded residue contributions where: # unfolded_contribution = mut_residue - wt_residue # # ***** # The unfolded state is represented by a Boltzmann-weighted energy of a # tripeptide conformational ensemble. For each tripeptide Gly-X-Gly (where X # is the wild type or mutant residue), Confgen was run with OPLS_2005 and # "water" MM-GB/SA implicit solvent to generate conformers. # ***** # Units are kcal/mol. # The energies here should be negative # # In this the "Minimizer" list. To generate it, each conformation # was minimized with the Minimizer, and the endpoint energy was calculated. # Then the Boltzmann weighted average Prime energy for the ensemble of # structures was calculated, using an effective temperature of 25 degrees C. # These values were added as requested in BIOLUM-1317 # GXG_DATA = { "ALA": -100.736, "ARG": -118.478, "ARN": -118.478, # ARG +H "ASN": -137.153, "ASP": -149.255, "ASH": -149.255, # ASP +H "CYS": -100.845, "GLN": -136.183, "GLU": -143.536, "GLH": -143.536, # GLU +H "GLY": -105.658, "HIS": -104.977, "HIE": -104.977, "HID": -104.977, "HIP": -104.977, "ILE": -84.130, "LEU": -92.400, "LYS": -110.759, "LYN": -110.759, # LYS -H "MET": -99.708, "PHE": -96.483, "PRO": -66.763, "SER": -96.365, "THR": -98.156, "TRP": -105.114, "TYR": -101.858, "VAL": -93.493, } # # In this the "Prime" list. To generate it, each conformation # was minimized with Prime, and the endpoint energy was calculated. # Then the Boltzmann weighted average Prime energy for the ensemble of # structures was calculated, using an effective temperature of 25 degrees C. # These values were added as requested in BIOLUM-1317 # GXG_DATA_PRIME = { "ALA": -112.635, "ARG": -142.467, "ARN": -142.467, # ARG +H "ASN": -156.375, "ASP": -154.559, "ASH": -154.559, # ASP +H "CYS": -113.747, "GLN": -152.611, "GLU": -141.673, "GLH": -141.673, # GLU +H "GLY": -116.263, "HIS": -121.600, "HIE": -121.600, "HID": -121.600, "HIP": -121.600, "ILE": -97.541, "LEU": -107.106, "LYS": -123.751, "LYN": -123.751, # LYS -H "MET": -112.643, "PHE": -113.719, "PRO": -81.734, "SER": -112.693, "THR": -116.048, "TRP": -122.407, "TYR": -123.497, "VAL": -109.017, } # These are the supported residues in build.mutate SUPPORTED_BUILD_RESIDUES = [ 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'HIP', 'HIE', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL' ] # FIXME add 'ARN', 'ASH', and 'GLH' to this list.
[docs] def __init__(self, ref_struct, mutations, concurrent=1, sequential=False, idealize=True): """ :param ref_struct: The reference (starting) structure :type ref_struct: `schrodinger.structure.Structure` instance :param mutations: A list of the mutations to carry out on the `ref_struct`. Each element of the list is a tuple of ("res num.", ["pdbnames"]) where "res num." is the residue number being altered and "pdbnames" is a list of the standard PDB residue names to mutate it to. :type mutations: List of tuples :param concurrent: Maximum concurrent mutations :type concurrent: int :param sequential: Limit concurrent mutations to being sequential :type sequential: bool :param idealize: Whether to idealize the reference structure by self-mutating the affected residues before calculating properties. :type idealize: bool :raise RuntimeError: If concurrent is not between 1 and 99. :see: For easy creation of mutations variable `Mutator.convert_residue_list` """ if concurrent < 1 or concurrent > 99: err = 'Invalid concurrent value: %s' % concurrent raise RuntimeError(err) # NOTE: This is the input structure without any modifications. Any # self-mutations and minimizations will be performed later. self.ref_struct = ref_struct # IMPORTANT: This class should never be modifying this structure self._mutations = None self.mutations = mutations self.concurrent = concurrent self.sequential = sequential self._idealize_reference = idealize # Make sure sequential is False if there is no concurrent # mutations being made if self.concurrent == 1: self.sequential = False self._validate_mutations_structure()
def _validate_mutations_structure(self): """ *Instance* method for validating mutations using checks relevant to actual reference structure. This is run after generating mutations at initialization of the Mutator instance. Other validation methods below DO NOT check or have access to the reference structure since they are used as static methods Checks: 1. Only C terminal residues with OXT should be allowed """ accepted_mutations = [] for mutation in self.mutations: chain, resnum, inscode, resname = mutation # C terminal residues must have 'OXT', if capped, NMA will be terminal and shouldnt be mutated c_term_res = [ res for res in self.ref_struct.residue if res.chain == chain.replace('_', ' ') ][-1] c_term_num = c_term_res.resnum c_term_inscode = c_term_res.inscode is_c_term = c_term_num == resnum and c_term_inscode == inscode if is_c_term: has_oxt = ' OXT' in [a.pdbname for a in c_term_res.atom] if not has_oxt: print( f"\nWARNING: Refusing to mutate uncharged C terminal residue {c_term_res}. Consider leaving charged or adding a capping group.\n" ) continue accepted_mutations.append(mutation) self.mutations = accepted_mutations
[docs] @staticmethod def validate_mutated_residues(residues): """ Method for validating the residues used in mutations passed in to the `MutateProtein` class. :raise ValueError: If the 3-letter residue name is not supported by the `build,mutate` method. """ # TODO: Convert the return to raise a custom MutateProteinError. This # will help in letting front-end know why it fails. # TODO: Add validation for assuring chain and resnum are in self.struct # Catch residues not supported by the build.mutate method invalid = [] for res in residues: if res not in Mutator.SUPPORTED_BUILD_RESIDUES: invalid.append(res) if invalid: msg = 'Unsupported residues: %s\n\n' % ', '.join(invalid) msg += 'The resnames for mutations must be one of: %s' %\ ','.join(Mutator.SUPPORTED_BUILD_RESIDUES) raise ValueError(msg)
[docs] @staticmethod def validate_mutations(mutations): """ Private method for validating the mutations passed in to the `MutateProtein` class. :raise ValueError: If the `mutations` passed in is not a list, if each item in the list is not a tuple, if the tuple is not of length 4 (chain, resnum idx, inscode, mutation resnames), if the resnum is not an integer, or any of the 3-letter residue names in "mutation resnames" is not supported by the `build,mutate` method. """ # TODO: Convert the return to raise a custom MutateProteinError. # This will help in letting front-end know why it fails. # TODO: Add validation for assuring chain and resnum are in self.struct if not type(mutations) == list: raise ValueError('Mutations must be a list') for mutation in mutations: if not type(mutation) == tuple: raise ValueError('Each mutations must be a tuple') if not len(mutation) == 4: raise ValueError('Each mutation must define chain, resnum, ' 'inscode, and resname') chain, idx, inscode, resname = mutation try: idx = int(idx) except ValueError: raise ValueError('The resnum must be an integer')
# Catch residues not supported by the build.mutate method # This has to be disabled to support unnatural amino acids # Mutator.validate_mutated_residues( [resname] ) # Dev: Todo, validate_mutated_residues should support unnaturals @property def mutations(self): """ The list of mutations that will be carried out """ return self._mutations @mutations.setter def mutations(self, mutations): """ The setter method for `self.mutations`. :raise RuntimeError: If the mutations have an invalid syntax """ Mutator.validate_mutations(mutations) self._mutations = mutations @property def total_mutations(self): """ Total number of mutations that will be generated """ mutations_list = self.calculateMutationsList() return len(mutations_list)
[docs] @classmethod def convert_res_file(cls, filename, regex=MUTATION_3_LETTER_RE): """ Converts lines in filename into a list of mutations to use. Returns a list of tuples where each tuple is ( "chain", "resnum", "inscode", "three-letter resnames for mutation"). Each line could be multiple mutations (one residue to multiple mutation states) :param filename: Name of file containing the list of mutations. :type filename: str :param regex: Regular expression for matching residues :type regex: regular expression object :raise RuntimeError: If any of chain, resnum or mutation is missing :returns: List of mutations with valid syntax for the class :rtype: list of tuples """ mutations = [] with open(filename) as fileobj: for i, line in enumerate(fileobj, start=1): # Skip blank lines if not line.strip(): continue # Skip commented lines if line.startswith('#'): continue new_muts = cls.convert_res_to_muts(line, regex, validate=False) if new_muts is None: raise RuntimeError( 'Invalid syntax for mutation in %s: %s (line %s)' % (filename, line, i)) mutations.extend(new_muts) Mutator.validate_mutations(mutations) return mutations
[docs] @classmethod def convert_res_list(cls, reslist, regex=MUTATION_3_LETTER_RE, validate=True): """ Converts list of residues into a list of mutations to use. Returns a list of tuples where each tuple is ( "chain", "resnum", "inscode", "three-letter resnames for mutation"). Each residue string could be multiple mutations (one residue to multiple mutation states) :param reslist: List of residues to convert to mutations :type reslist: list of str :param regex: Regular expression for matching residues :type regex: regular expression object :param validate: Whether to validate the potential mutations :type validate: bool :return: List of mutations with valid syntax for the class or None if any item in the list is not valid :rtype: list of tuples or None """ mutations = [] for res_str in reslist: new_muts = cls.convert_res_to_muts(res_str, regex, validate=False) if new_muts is None: return None mutations.extend(new_muts) if validate: try: Mutator.validate_mutations(mutations) except ValueError: return None return mutations
[docs] @staticmethod def convert_res_to_muts(res_str, regex=MUTATION_3_LETTER_RE, validate=True): """ Converts a residue string into a list of mutations to use. Returns a list of tuples of ("chain", "resnum", "inscode", "three-letter resnames for mutation"). Will return None if any item in the list is not a valid residue string. A residue string could be multiple mutations (one residue to multiple mutation states) :param res_str: Residue string to convert to mutations :type res_str: str :param regex: Regular expression for matching residues :type regex: regular expression object :param validate: Whether to run validation on the mutation :type validate: bool :return: List of mutations with valid syntax for the class or None if the res_str is not valid. :rtype: list tuples or None """ mutations = [] match = regex.search(res_str) if not match: return None chain = match.group('chain') resnum = match.group('resnum') inscode = match.group('inscode') or ' ' mut_types = match.group('mutations') #DEBUG print 'Getting mutations from file', chain, resnum, mut_types # If this is a 1 letter mutation we change the 1 letter to 3 if regex == MUTATION_1_LETTER_RE: if mut_types: mut_types = [AA_1_LETTER_MAP[m] for m in mut_types] else: # split them by comma if mut_types: mut_types = [m for m in mut_types.split(',')] # resnum could be 0, converted to integer later. BIOLUM-2963 if not chain or not resnum or not inscode or not mut_types: return None for resname in mut_types: mutation = (chain, int(resnum), inscode, resname) mutations.append(mutation) if validate: # Validate the parsed mutations now Mutator.validate_mutations(mutations) return mutations
[docs] @staticmethod def convert_muts_file(muts_file, regex=MUTATION_RE): """ Converts lines in filename into a list of mutations to use. Returns a list of tuples where each tuple is ( "chain", "resnum", "inscode", "three-letter resnames for mutation"). Also supports loop insertion and deletion. Each line is one mutation (could be multiple residues) """ mutations_list = [] with open(muts_file) as fh: for line in fh: change_strings = line.strip().split() if len(change_strings) == 0: continue # Skip commented lines if line.startswith('#'): continue changes = [] for change_str in change_strings: match = regex.search(change_str) if match: chain = match.group('chain') resnum = match.group('resnum') inscode = match.group('inscode') or ' ' new_resname = match.group('new_resname') # convert to 3-letter name if it is 1-letter code if len(new_resname) == 1: if new_resname in AA_1_LETTER_MAP: new_resname = AA_1_LETTER_MAP[new_resname] else: raise RuntimeError("Invalid mutation: %s" % change_str) changes.append( (chain, int(resnum), inscode, new_resname)) else: raise RuntimeError("Invalid mutation: %s" % change_str) mutations_list.append(changes) if not mutations_list: raise RuntimeError("No mutations specified in the file") return mutations_list
[docs] @staticmethod def convert_residue_list(residues, mutations, regex=MUTATION_3_LETTER_RE): """ Convert a list of residues and mutations to a standard list of mutations. Returns a list of tuples where each tuple is ( "chain", "resnum", "inscode", "three-letter resnames for mutation"). :param residues: Residues that will be mutated. :type residues: list of strings (Syntax: <chain>:<resnum> if no chain use ``"_"``) :param mutations: The three-letter names for the residues that will be used in mutation. :raise RuntimeError: If any of chain, resnum or mutation is missing or if there is an invalid residue name :returns: List of mutations with valid syntax for the class :rtype: list of tuples """ mutations_list = [] for res in residues: chain = None resnum = None match = regex.search(res) if match: chain = match.group('chain') resnum = match.group('resnum') inscode = match.group('inscode') or ' ' #DEBUG print 'Getting mutations from list', chain, resnum for resname in mutations: mutation = (chain, int(resnum), inscode, resname) mutations_list.append(mutation) else: # Raise an error for non-blank lines that don't match regex raise RuntimeError('Invalid syntax for residue: %s' % res) # Validate the parsed mutations now Mutator.validate_mutations(mutations_list) return mutations_list
[docs] def calculateMutationsList(self): """ Calculate the mutations that will be performed, based on the input residues and their mutations, and the "concurrent" and "sequential" settings. """ def filter_out_duplicate_residues(res_combos): # Set of (chain, resnum, inscode) seen_residues = set() filtered_res_combos = [] for combo in res_combos: chain, resnum, inscode, res_type = combo res_id = (chain, resnum, inscode) if res_id not in seen_residues: filtered_res_combos.append(combo) seen_residues.add(res_id) return filtered_res_combos def are_sequential(mutations): """ Determine if given mutation residues are sequential. Assumes that residues are given in sequence order. """ for i in range(len(mutations) - 1): mut1 = mutations[i] mut2 = mutations[i + 1] atom1 = find_residue_atom(self.ref_struct, mut1[0], mut1[1], mut1[2]) atom2 = find_residue_atom(self.ref_struct, mut2[0], mut2[1], mut2[2]) if not mm.mmct_res_connected(self.ref_struct.handle, atom1.index, atom2.index): # These 2 residues are not sequential return False # All residues are sequential return True # Loop over all combinations of mutations of length self.concurrent all_combinations = [] for num_concurrent_muts in range(1, self.concurrent + 1): # The above range loop is necessary to fix BIOLUM-122 for res_combos in itertools.combinations(self.mutations, num_concurrent_muts): # BIOLUM-1450 Filter out duplicate per-residue mutatons: filtered_res_combos = filter_out_duplicate_residues(res_combos) if len(filtered_res_combos) != len(res_combos): # We must be doing this mutation already continue # Make sure all mutations are concurrent for combination if self.sequential: altered_residues = [c[:-1] for c in res_combos] if not are_sequential(altered_residues): continue all_combinations.append(res_combos) mutations_list = [] # Iterate over all possible mutation combinations: for res_combos in all_combinations: # Sort the alteration list (for uniqueness comparison to work): mut_alterations = sorted(res_combos) if mut_alterations not in mutations_list: # Make sure we are not adding any duplicates (really needed?) mutations_list.append(mut_alterations) return mutations_list
[docs] def generate(self): """ Used to loop over all mutations. Each mutation consists of the mutated structure and a residue mapping dict. The structure is raw, that is, unrefined in any way. :returns: Generator for all mutations defined in `self.mutations` Each step of generator yields a `mutation<Mutation>`. :rtype: generator """ # Make a list of mutations: mutations_list = self.calculateMutationsList() # For each final mutation (can be one or more residue change): for i, changes in enumerate(mutations_list, start=1): # Each iteration of this loop represents a MUTATION. Each mutation # can consist or one or more residue changes. yield self.getMutationFromChanges(changes)
[docs] def getMutationFromChanges(self, changes): mutation_map = OrderedDict() mutation_strings = [] idealized_ref_struct = self.ref_struct.copy() # Idealize the reference structure (mutate every changed residue to itself): if self._idealize_reference: for chain, resnum, inscode, new_resname in changes: # Get the residue that is going to be mutated residue = idealized_ref_struct.findResidue('%s:%s' % (chain, resnum)) # FIXME no inscode here???? curr_resname = residue.pdbres.strip() if curr_resname in Mutator.SUPPORTED_BUILD_RESIDUES: res_atom = residue.getAtomIndices()[0] # Mutate the residue to itself build.mutate(idealized_ref_struct, res_atom, curr_resname) else: print( "WARNING: Residue %s (%s) could not be idealized, because it's a non-standard residue" % (str(residue), curr_resname)) mutated_st = self.ref_struct.copy() unfolded_contribution_gas = 0.0 unfolded_contribution_prime = 0.0 # Apply each residue change: for chain, resnum, inscode, new_resname in changes: # Find an atom from this residue in the CT: residue_atom = find_residue_atom(mutated_st, chain, resnum, inscode) orig_resname = residue_atom.pdbres.strip().upper() res_str = "%s:%i%s" % (chain, resnum, inscode) # Do the actual mutation if new_resname.startswith('+') or new_resname.startswith('-'): (mutated_st, changed_seq) = self.getLoopMutation(mutated_st, res_str, new_resname) else: renum_map = build.mutate(mutated_st, residue_atom.index, new_resname) # Get the reference residue that we are mutating reference_atom = find_residue_atom(idealized_ref_struct, chain, resnum, inscode) reference_residue = reference_atom.getResidue() # Get the mutated residue mutated_residue = residue_atom.getResidue() # Map the reference residue to the mutated residue mutation_map[reference_residue] = mutated_residue # NOTE: This string is being parsed by residue_scanning_viewer_gui.py: mut_str = "%s(%s->%s)" % (str(mutated_residue), orig_resname, new_resname) mutation_strings.append(mut_str) # Update the unfolded contribution for this mutant # TODO: for now set 0.0 for nonstandard residues # BIOLUM-2554, loop ins/del needs correction for peptide bond PEP_POT = -106.5 PEP_PRIME = -30.1 if new_resname.startswith('+'): dG1 = 0.0 dG2 = 0.0 for i, c in enumerate(changed_seq): # FIXME: Ideally need need to get the 3-letter code of the # actual residue - as multiple residues types can have the # same 1-letter code. resname = AA_1_LETTER_MAP[c] dG1 += (self.GXG_DATA[resname] - self.GXG_DATA['GLY'] + PEP_POT) dG2 += (self.GXG_DATA_PRIME[resname] - self.GXG_DATA_PRIME['GLY'] + PEP_PRIME) elif new_resname.startswith('-'): dG1 = 0.0 dG2 = 0.0 for i, c in enumerate(changed_seq): # FIXME: Ideally need need to get the 3-letter code of the # actual residue - as multiple residues types can have the # same 1-letter code. resname = AA_1_LETTER_MAP[c] dG1 -= (self.GXG_DATA[resname] - self.GXG_DATA['GLY'] + PEP_POT) dG2 -= (self.GXG_DATA_PRIME[resname] - self.GXG_DATA_PRIME['GLY'] + PEP_PRIME) else: try: dG1 = (self.GXG_DATA[new_resname] - self.GXG_DATA[orig_resname]) dG2 = (self.GXG_DATA_PRIME[new_resname] - self.GXG_DATA_PRIME[orig_resname]) except: dG1 = 0.0 dG2 = 0.0 unfolded_contribution_gas += dG1 unfolded_contribution_prime += dG2 # Comma-separated list of: "H:19(ASN->ALA)" # NOTE: These strings are being parsed by residue_scanning_viewer_gui.py: mutated_st.property[self.MUTATIONS_PROPERTY] = ",".join( mutation_strings) mutated_st.property[self.UNFOLDED_PROPERTY] = unfolded_contribution_gas mutated_st.property[ self.UNFOLDED_PROPERTY_PRIME] = unfolded_contribution_prime mutation = Mutation(idealized_ref_struct, mutated_st, mutation_map) return mutation
[docs] def getLoopMutation(self, mutated_st, res_str, new_resname): """ build loop insertion or deletion """ settings = {} settings['NUM_OUTPUT_STRUCT'] = 1 settings['EXPAND_LOOP'] = 2 settings['QUIET'] = 'yes' os.environ['PSP_EXEC'] = hunt('psp') resid_list = [] resname_list = [] # List of 3-letter PDB names for all residues for res in mutated_st.residue: resid_list.append(str(res)) resname_list.append(res.pdbres.replace(' ', '')) anchor_resid = res_str.replace(' ', '') ianchor = resid_list.index(anchor_resid) anchor_seq = AA_3_LETTER_MAP[resname_list[ianchor]] if new_resname.startswith('+'): # A:52 +GLY will do loop between A:51,A:54 with seq "D(52)GLYR(53)" istart = ianchor - 1 start_resid = resid_list[istart] iend = ianchor + 2 end_resid = resid_list[iend] anchor_next_seq = AA_3_LETTER_MAP[resname_list[ianchor + 1]] build_seq = anchor_seq + new_resname[ 1:] + anchor_next_seq # actual building sequence changed_seq = new_resname[1:] elif new_resname.startswith('-'): # A:52 -2 will do loop between A:51,A:56 with seq "R(52)P(55)", 53,54 are deleted del_length = int(new_resname[1:]) iend = ianchor + 2 + del_length end_resid = resid_list[iend] istart = ianchor - 1 start_resid = resid_list[istart] build_seq = anchor_seq + AA_3_LETTER_MAP[resname_list[iend - 1]] changed_seq = '' for index in range(ianchor + 1, ianchor + 1 + del_length): changed_seq += AA_3_LETTER_MAP[resname_list[index]] chain = start_resid[0:1] stemresnum1 = int(start_resid[2:]) stemresnum2 = int(end_resid[2:]) loops = [] loops.append([chain, stemresnum1, stemresnum2, build_seq]) MLB = MultiLoopBuilder(mutated_st, loops, settings) if MLB.models: return (MLB.models[0], changed_seq) else: err = 'Cannot build insertion: %s %s' % (res_str, build_seq) raise RuntimeError(err)
[docs]class Refiner: """ Creates input files and runs calculations for protein refinement jobs using Prime and our `schrodinger.structutils.minimize.Minimizer` class. Here is an example of how to refine a protein that just had a residue mutated. In this example only the residues within 7.0 angstroms of the mutated residue will be refined:: from schrodinger.structure import StructureReader from schrodinger.structutils import build from schrodinger.application.bioluminate import protein # Get the structure st = StructureReader('receptor.maegz') # Atom number 30 is the alpha carbon of a GLU ca = st.atom[30] # Mutate GLU -> ASP renum_map = build.mutate(st, ca.index, "ASP") # Get the residue that was mutated mutated_residue = None for res in st.residue: ca_keys = (ca.chain, ca.resnum, ca.inscode) res_keys = (res.chain, res.resnum, res.inscode) if ca_keys == res_keys: mutated_residue = res break # We want to use the reference to gather the residues to refine refine_residues = protein.get_residues_within( st, [mutated_residue], within = 7.0 ) # Create the refiner refiner = protein.Refiner(st, residues=refine_residues) # Run Prime minimization which returns the refined structure refined_struct = refiner.runPrimeMinimization('my_refinement_jobname') """ # Available refinement jobs PYTHON_MINIMIZE = 'python_minimize' PRIME_MINIMIZE = 'prime_minimize' PRIME_RESIDUE = 'prime_residue' PRIME_SIDECHAIN = 'prime_sidechain' PRIME_SIDECHAIN_CBETA = 'prime_sidechain_cbeta' PRIME_SIDECHAIN_BB = 'prime_sidechain_bb' PRIME_LOOP_PRED = 'prime_loop_prediction' # can set up multiloop jobs too PRIME_ANTIB_LOOP_PRED = 'prime_antibody_loop_prediction' # FIXME Add a <cleanup> argument (defaulting to True) and implement automatic cleanup.
[docs] def __init__(self, struct, residues=None): """ :param struct: The structure being refined :type struct: `schrodinger.structure.Structure` :param residues: Residues to consider for refinement :type residues: None or list/tuple of `structure.structure._Residue` """ self.struct = struct self.residues = residues self.jobdir = self._createJobDir() self.python_types = [self.PYTHON_MINIMIZE] self.prime_types = [ self.PRIME_MINIMIZE, self.PRIME_RESIDUE, self.PRIME_SIDECHAIN, self.PRIME_SIDECHAIN_CBETA, self.PRIME_SIDECHAIN_BB, self.PRIME_LOOP_PRED, self.PRIME_ANTIB_LOOP_PRED ]
[docs] def setResidues(self, residues): """ Set the residues to refine. This is a list of integers refering to the residue indices for the structure. """ self.residues = residues
def _createJobDir(self): """ Private method to create a directory to run the refinement jobs from. The name is "refinement_job_<n>" where n is either 1 or the next increment of a previously found directory. For exampe if there is a directory named "refinement_job_5" the new directory will be "refinement_job_6". """ dirs = [d for d in os.listdir(os.getcwd()) if os.path.isdir(d)] count = 0 for d in dirs: match = re.search(r'refinement_job_(\d+)', d) if match: _count = int(match.group(1)) if _count > count: count = _count retries = 5 while True: temp_dir = 'refinement_job_%d' % (count + 1) try: os.mkdir(temp_dir) break except OSError as err: if err.errno != errno.EEXIST: raise count += 1 retries -= 1 if retries < 1: raise return os.path.abspath(temp_dir)
[docs] def clean(self): """ Remove all files created from the refinement job """ try: rmtree(self.jobdir) except Exception as err: print('Unable to remove the directory %s\n%s' % (self.jobdir, err))
[docs] def writePrimeInput(self, refine_type, input_file, st_filename, **kwargs): """ Writes the input file for a Prime refinement job. :param refine_type: The type of Prime refinement to run (see class variables) :type refine_type: str :param input_file: Name of the input file for the refinement job :type input_file: str :param st_filename: Filename of the structure to be refined :type st_filename: str :raise RuntimeError: If `refine_type` is not supported :rtype: None """ if refine_type not in self.prime_types: raise RuntimeError( '"%s" is not a supported Prime refinement job. Available ' 'options:\n%s' % '\n '.join(self.prime_types)) # Remove the host kwarg, it is added at the end of this method host = kwargs.pop('host', None) # MultiRefine loop jobs may need to define max_jobs if host: try: hostlist = jobcontrol.host_str_to_list(host) nproc = hostlist[0][1] if nproc: kwargs['max_jobs'] = int(nproc) # must be lower case! except (ValueError, IndexError) as err: print('Unable to get the number of processes!', err) st_file = os.path.basename(st_filename) input_obj = PrimeConfig(st_file, **kwargs) # Remove some kwarg as they are in the defaults and not expected by method() sgb_mod = kwargs.pop('SGB_MOD', None) use_membrane = kwargs.pop('USE_MEMBRANE', None) method_map = { self.PRIME_MINIMIZE: input_obj.prepMinimize, self.PRIME_RESIDUE: input_obj.prepResidue, self.PRIME_SIDECHAIN: input_obj.prepSidechain, self.PRIME_SIDECHAIN_CBETA: input_obj.prepSidechainCBeta, self.PRIME_SIDECHAIN_BB: input_obj.prepSidechainBB, self.PRIME_LOOP_PRED: input_obj.prepLoop, self.PRIME_ANTIB_LOOP_PRED: input_obj.prepAntibodyLoop } method = method_map.get(refine_type) if method is None: raise ValueError('Invalid refinement: %s' % refine_type) elif self.residues: method(residues=self.residues, **kwargs) else: method(**kwargs) # Add the compute host to the input file here if host: input_obj['HOST'] = host input_obj.write(input_file)
[docs] def refinePrime(self, refine_type, jobname, completed_callback=None, **kwargs): """ Run a Prime refinement job through job control and return the refined output structure. :param refine_type: The type of Prime refinement to run (see class variables) :type refine_type: str :param jobname: Jobname to use :type jobname: str :param completed_callback: Whether to start the job and wait, or call given function with Job object is parameter on completion. :type completed_callback: callable :raise RuntimeError: If `refine_type` is not supported :raise RuntimeError: If launching the refinement job fails :raise RuntimeError: If the refinement job fails :returns: Refined structure :rtype: `schrodinger.structure.Structure` object or `schrodinger.job.jobcontrol.Job` """ # TODO: Add this to a log #DEBUG if self.residues is None: #DEBUG print 'Adding "%s" refinement job for residues: ALL' % refine_type #DEBUG else: #DEBUG print 'Adding "%s" refinement job for residues: %s' % \ #DEBUG ( refine_type, ', '.join(str(r) for r in self.residues)) # Write out the structure and inp files for the refinement job struct_file = os.path.join(self.jobdir, '%s.maegz' % jobname) output_file = os.path.join(self.jobdir, '%s-out.maegz' % jobname) input_file = os.path.join(self.jobdir, '%s.inp' % jobname) log_file = os.path.join(self.jobdir, '%s.log' % jobname) self.struct.write(struct_file) self.writePrimeInput(refine_type, input_file, struct_file, **kwargs) command = [ "prime", os.path.basename(input_file), # Crashes with abs paths ] # Add the -HOST if given host = kwargs.get('host', None) # We ignore host=localhost since this causes issues with Prime. # See Ev:133176. if host and host != 'localhost': command += ['-HOST', host] if completed_callback: from schrodinger.job import jobhandler job = jobhandler.launch_job_with_callback(command, completed_callback, launch_dir=self.jobdir) return job else: job = jobcontrol.launch_job(command, launch_dir=self.jobdir) job.wait() if not job.succeeded(): errmsg = 'Job failed.\nJobname: %s\nJobId: %s\n' % (jobname, job.job_id) lastlines = [] with open(os.path.join(self.jobdir, log_file)) as fh: alines = fh.readlines() lastlines = alines[-30:] if lastlines: errmsg += 'Below are the last 30 lines of the log file...\n' errmsg += ''.join(lastlines) raise RuntimeError(errmsg) if job.StructureOutputFile: output = os.path.join(self.jobdir, job.StructureOutputFile) else: # multirefine prime jobs do not have job.StructureOutputFile output = output_file if not os.path.isfile(output): raise RuntimeError( 'Job has no output\n\nJobname: %s\nJobId: %s' % (jobname, job.job_id)) return structure.Structure.read(output)
[docs] def runPrimeMinimization(self, jobname): """ Shortcut to run a Prime minimization job :see: `Refiner.refinePrime` documentation """ return self.refinePrime(self.PRIME_MINIMIZE, jobname)
[docs] def runPrimeResidue(self, jobname): """ Shortcut to run a Prime residue refinement job :see: `Refiner.refinePrime` documentation """ return self.refinePrime(self.PRIME_RESIDUE, jobname)
[docs] def runPrimeSidechain(self, jobname): """ Shortcut to run a Prime sidechain refinement job :see: `Refiner.refinePrime` documentation """ return self.refinePrime(self.PRIME_SIDECHAIN, jobname)
[docs] def runPrimeSidechainCBeta(self, jobname): """ Shortcut to run a Prime sidechain refinement job with CA-CB vector sampling. This will vary the orientation of the CA-CB bond by up to 30 degrees from the initial direction. :see: `Refiner.refinePrime` documentation """ return self.refinePrime(self.PRIME_SIDECHAIN_CBETA, jobname)
[docs] def runPrimeSidechainBB(self, jobname): """ Shortcut to run a Prime sidechain refinement job with backbone sampling. This will sample the backbone by running a loop prediction on a set of 3 residues centered on the residue for which the side chain is being refined. :see: `Refiner.refinePrime` documentation """ return self.refinePrime(self.PRIME_SIDECHAIN_BB, jobname)
[docs] def runPrimeLoopPrediction(self, jobname, start_res=None, end_res=None): """ Shortcut to run a Prime loop prediction refinement job.. :see: `Refiner.refinePrime` documentation """ return self.refinePrime(self.PRIME_LOOP_PRED, jobname, start_res=start_res, end_res=end_res)
[docs] def runPythonMinimize(self, jobname): """ Shortcut to run a `schrodinger.structutils.minimize.Minimizer` job. :param jobname: Jobname to use :type jobname: str :returns: Minimized structure :rtype: `schrodinger.structure.Structure` object """ minimizer = minimize.Minimizer( struct=self.struct, ffld_version=minimize.OPLS_2005, cleanup=False) # SHARED-5335, BIOLUM-3602 # Auto fixing of lewis causes charged cysteines # turning cleanup=False to restore old behavior minimize_indices = [] # Get the atoms that will be minimized. if self.residues is None: minimize_indices.extend([a.index for a in self.struct.atom]) else: for res in self.residues: minimize_indices.extend([a.index for a in res.atom]) # Freeze all atoms we don't want minimized for atom in self.struct.atom: if atom.index not in minimize_indices: minimizer.addPosFrozen(atom.index) minimizer.minimize() return minimizer.getStructure()
[docs] def runRefinement(self, refine_type, jobname, **kwargs): """ Shortcut to run any of the available refinement jobs. :param refine_type: The type of Prime refinement to run (see class variables) :type refine_type: str :param jobname: Jobname to use :type jobname: str :raise RuntimeError: If `refine_type` is not supported :raise RuntimeError: If the refinement job fails :returns: Refined structure :rtype: `schrodinger.structure.Structure` object """ if refine_type in self.prime_types: return self.refinePrime(refine_type, jobname, **kwargs) elif refine_type == self.PYTHON_MINIMIZE: return self.runPythonMinimize(jobname)
[docs]class Consensus: """ Access the atoms, residues, and molecules (or just their indices) that are considered to be consensus objects for a template structure and query structure. All properties are returned as an `OrderedDict` that maps the template objects to their consensus objects from the query structure. Here is an example of how to get all the consensus waters between two protein structures. We define the cutoff here at 2 Angstroms:: from schrodinger.structure import StructureReader from schrodinger.application.bioluminate import protein pt = maestro.project_table_get() # Create an ASL map for all ligands in the WS asl_map = [] for row in pt.included_rows: st = row.getStructure() ligands = analyze.find_ligands(st) if not ligands: continue indices = [] for ligand in ligands: indices.extend([str(i) for i in ligand.atom_indexes]) asl = 'atom.n %s' % ','.join(indices) asl_map.append((st, asl)) # Create a consensus of all ligands, specifying that at least three # structures must have a ligand atom within 2A from one another. consensus = protein.Consensus(asl_map, 3, dist_cutoff=2) # To get the atom objects consensus_atoms = consensus.atoms # To get the molecule objects molecules = consensus.molecules """ ASL_WATER = 'water and NOT (atom.ele H)' ASL_WATER_NOZOB = 'water and NOT (atom.ele H) and NOT (withinbonds 1 (not water))' ASL_IONS = 'ions' ASL_LIGAND = '(((m.atoms 5-130)) and not ((ions) or ' ASL_LIGAND += '(res.pt ACE ACT ACY BCT BME BOG CAC CIT CO3 DMS EDO EGL EPE ' ASL_LIGAND += 'FES FMT FS3 FS4 GOL HEC HED HEM IOD IPA MES MO6 MPD MYR NAG ' ASL_LIGAND += 'NCO NH2 NH3 NO3 PG4 PO4 POP SEO SO4 SPD SPM SUC SUL TRS )))'
[docs] def __init__(self, asl_map, minimum_number, dist_cutoff=2.0): """ :param asl_map: List of structures and the ASL used to limit the atoms used when calculating the consensus :type asl_map: tuple of (`structure<structure.Structure>`, ASL) :param minimum_number: The minimum number of matches within structures. An atom will be considered a "consensus" atom if it is within the `dist_cutoff` of at least `minimum_number` of structures in the list of passed in structures. :type minimum_number: int :param dist_cutoff: Distance in Angstroms used to define a consensus match :type dist_cutoff: float :attention: The list of consensus atoms (or molecules, residues, indices, etc. depending on the property called, i.e. `self.molecules`) will all be unique and will depend on the ASL passed in. If the ASL is not specific enough you may end up with poor results. """ self.asl_map = asl_map self.number_cutoff = minimum_number self.dist_cutoff = dist_cutoff self._consensus_atoms = [] """ Atom indices mapping template and query consensus """ self._setIndexMap() self._atom_indices = None
def _setIndexMap(self): """ Private method called in __init__. Finds the consensus atom indices between the template and query. """ # Get a mapping of structures to atom indices for the given asl #asl_atom_map = {} atom_list = [] for st, asl in self.asl_map: indices = analyze.evaluate_asl(st, asl) atom_list.append([st.atom[i] for i in indices]) # Prepare the container for all the consensus atoms. # This list is in the same order as the passed in structures. consensus_map = {} ref_count = 0 for ref_atoms in atom_list: # For each atom matching the asl provided we loop through all the # other structures and see if there is a "consensus atom" among them for ref_atom in ref_atoms: # Track the total consensus atoms to make sure we meet cutoff consensus_total = 0 close_atoms = {} mob_count = 0 for mob_atoms in atom_list: if ref_count > mob_count: closest_atom = self.getClosest(ref_atom, mob_atoms) if closest_atom: consensus_total += 1 close_atoms.setdefault(mob_count, []).append(closest_atom) mob_count += 1 # Only add atoms to the consensus map of they meet the cutoff if consensus_total >= self.number_cutoff: consensus_map.setdefault(ref_count, []).append(ref_atom) for k, v in close_atoms.items(): consensus_map.setdefault(k, []).extend(v) ref_count += 1 # Add each set of atoms to the self._consensus_atoms so they match # the list of structures supplied in the init for i in range(len(self.asl_map)): self._consensus_atoms.append(consensus_map.get(i, []))
[docs] def getClosest(self, ref_atom, mob_atoms): """ Gets the closest atom to the `ref_atom` from `mob_atoms`. """ dist = float(self.dist_cutoff) closest_atom = None # Find the closest query atom for mob_atom in mob_atoms: new_dist = measure.measure_distance(ref_atom, mob_atom) if new_dist < dist: dist = new_dist closest_atom = mob_atom return closest_atom
def _createByAtomProperty(self, attr, is_callable=False): """ Private method to get an atom property. :param attr: The attribute to get from the consensus atoms :type attr: str :param is_callable: Whether the prop arg is a callable :type is_callable: bool :returns: A list of lists of the new entities created from the properties :rtype: list of lists """ entity_list = [] for atom_list in self._consensus_atoms: entities = set() for atom in atom_list: if hasattr(atom, attr): atom_attr = getattr(atom, attr) if is_callable: entities.add(atom_attr()) else: entities.add(atom_attr) entity_list.append(list(entities)) return entity_list @property def atoms(self): """ Get the map of `atom objects<structure._StructureAtom>` of consensus atoms. :returns: Atoms of consensus atoms :rtype: `OrderedDict` of `atom objects<structure._StructureAtom>` where the keys are the template atoms and their values are the consensus atoms from the query. """ return self._consensus_atoms @property def atom_indices(self): """ Get the map of atom indices of consensus atoms. :returns: Atom indices of consensus atoms :rtype: `OrderedDict` of ints where the keys are the template atom indices and their values are the consensus atom indices from the query. """ if not self._atom_indices: self._atom_indices = [] for atoms in self.consensus_atoms: self._atom_indices.append([a.index for a in atoms]) return self._atom_indices @property def residues(self): """ Get the list of `residue objects<structure._Residue>` of consensus atoms for each structure in `self.asl_map`. :returns: Residues of consensus atoms :rtype: list of unique consensus `residue objects<structure._Residue>` for each structure in `self.asl_map`. (Order is maintained) """ residues = [] for res_list in self._createByAtomProperty('getResidue', True): residue_names = [] unique_residues = [] for res in res_list: if not str(res) in residue_names: unique_residues.append(res) residue_names.append(res) residues.append(unique_residues) return residues @property def residue_indices(self): """ Get the map of residue indices of consensus atoms. :returns: Residue indices of consensus atoms :rtype: list of unique consensus residue indices for each structure in `self.asl_map`. (Order is maintained) """ return self._createByAtomProperty('resnum') @property def molecules(self): """ Get the map of `molecule objects<structure._Molecule>` of consensus atoms. :returns: Molecules of consensus atoms :rtype: list of unique consensus `molecule objects<structure._Molecule>` for each structure in `self.asl_map`. (Order is maintained) """ molecules = [] for mol_list in self._createByAtomProperty('getMolecule', True): molecule_names = [] unique_molecules = [] for mol in mol_list: if not str(mol) in molecule_names: unique_molecules.append(mol) molecule_names.append(mol) molecules.append(unique_molecules) return molecules @property def molecule_indices(self): """ Get the map of molecule indices of consensus atoms. :returns: Molecule indices of consensus atoms :rtype: list of unique consensus molecule indices for each structure in `self.asl_map`. (Order is maintained) """ return self._createByAtomProperty('molecule_number')