Source code for schrodinger.application.bioluminate.patch_utils.patch_finder

# -*- coding: utf-8 -*-

import functools
import operator
import os
import pickle
import time
from collections import OrderedDict
from past.utils import old_div

import numpy

from schrodinger import structure
from schrodinger import surface
from schrodinger.application.bioluminate.patch_utils import settings
from schrodinger.application.bioluminate.patch_utils.settings import PatchType
from schrodinger.infra import mm
from schrodinger.infra import mmlist
from schrodinger.infra.util import OneIndexedList
from schrodinger.protein import sequence
from schrodinger.structutils import analyze
from schrodinger.ui.sequencealignment import find_gui
from schrodinger.ui.sequencealignment import structure_utils

try:
    from schrodinger.application.prime.packages import additivedescriptors
    from schrodinger.application.prime.packages import agg_profiles
    from schrodinger.application.prime.packages import antibody
    from schrodinger.application.prime.packages import curate_antibody
    from schrodinger.application.prime.packages import loop_utils
    from schrodinger.application.prime.packages import msurfext as mext
    from schrodinger.application.prime.packages import struct_helper as shlp
    from schrodinger.application.prime.packages.protein_descriptors import \
        ext_desc
except ImportError:
    # allow the module to be imported even if Prime isn't available (for unit
    # testing)
    additivedescriptors = None
    agg_profiles = None
    antibody = None
    curate_antibody = None
    loop_utils = None
    ext_desc = None

# TODO: Would be good to go DRY and move this list into antibody.py
REGION_LABELS = [
    'HFR1', 'HFR2', 'HFR3', 'HFR4', 'H1', 'H2', 'H3', 'CH1', 'Hinge', 'CH2',
    'CH3', 'LFR1', 'LFR2', 'LFR3', 'LFR4', 'L1', 'L2', 'L3', 'CL'
]

SURF_NAME = "Protein Patches"
CLOGP_PROP = "r_psp_ClogP"
CLOGP_ITERS = 30
PARTIAL_CHARGE_PROP = "r_m_charge1"
PARTIAL_CHARGE_ITERS = CLOGP_ITERS

PATCH_PROP_PREFIX = "r_bioluminate_patch_"
RES_PROP_PREFIX = 'r_bioluminate_residue_'

# We add this value on to all Zyggregator scores.  This shifts the scores so
# that all scores > 0 correspond to a high likelihood of aggregation.
ZYGG_ADD = 4.25

_pre_analyze_path = os.path.dirname(__file__)
_pre_analyze_path = os.path.abspath(_pre_analyze_path)
PRE_ANALYZE_PATH = os.path.join(_pre_analyze_path, "pre_analyze.py")

# Do not include residues in patches, if their contribution is less:
MIN_RES_CONTRIBUTION = {
    PatchType.positive: 5.0,
    PatchType.negative: 5.0,
    PatchType.hydrophobic: 2.5,
}

DFLTCLRS = ((255, 255, 255), (0, 0, 0))
DFLTVALS = (0.0, 1.0)

# *************************************************************************** #
#                            Functions                                        #
# *************************************************************************** #


[docs]def read_asl_file(basename): with open(basename + ".txt", "r") as asl_in: return asl_in.read()
[docs]def write_asl_file(basename, asl): asl_file = basename + ".txt" with open(asl_file, "w") as asl_out: asl_out.write(asl) return asl_file
[docs]def get_connected_components(st): """ Determine all covalently connected components of `st` (i.e. unbroken chain segments). :return: A tuple of: (1) A list of list of residue names (str). Each list of residue names represents a single covalently connected component. (2) A list of list of `schrodinger.structure._Residue` objects. This object mirrors the structure of the list of list of residue names. """ residues = structure.get_residues_by_connectivity(st) residues = iter(residues) cur_res = next(residues) all_seq = [[]] all_struc = [[]] for next_res in residues: resname = cur_res.pdbres.strip() all_seq[-1].append(resname) all_struc[-1].append(cur_res) if not cur_res.isConnectedToResidue(next_res): all_seq.append([]) all_struc.append([]) cur_res = next_res resname = cur_res.pdbres.strip() all_seq[-1].append(resname) all_struc[-1].append(cur_res) return all_seq, all_struc
def _smooth_grid_vals(vpos, apos, aval, wfcn='ele', rcutoff=10.0, verbose=True): """ wrapper function to call smooth_grid_values in msurfext -> ensures that all parameters are set properly. :param vpos: input vertex coordinates (x,y,z for each entry is expected) :type vpos: list/tuple :param apos: input atom positions (x,y,z format) :type apos: list/tuple :param aval: atom-specific values (e.g. charges, hydrophobicity) :type aval: list/tuple :param wfcn: specify the type of smoothing - ('ele' for partial charge smoothing, 'lipo' for lipophilic data smoothing) :type wfcn: string :param rcutoff: distance radius - specify, how far out the atom value should be "felt". Change only, if you know what you are doing. :type rcutoff: float :param verbose: verbosity flag :rtype: list :return: list of length vpos with smoothed values """ t0 = time.time() if verbose: print("smoothing grid values: %s (%i vtx %i atm) .... " % ( \ wfcn, len(vpos), len(apos))) if len(vpos) < 3: raise ValueError("smoothing: invalid vertex position input!") if not all((True if a and len(a) == 3 else False for a in vpos)): raise ValueError("smoothing: invalid vertex position input!") if not apos or len(apos) < 1: raise ValueError("smoothing: invalid atom position input!") if not all((a and len(a) == 3 for a in apos)): raise ValueError("smoothing: invalid atom position input!") if not aval or len(apos) != len(aval): raise ValueError("smoothing: invalid atom values!") sm_vals = mext.smooth_grid_values(vpos, apos, aval, wfcn=wfcn, rcutoff=rcutoff) if verbose: print("...done. %.4f seconds." % (time.time() - t0)) return sm_vals def _set_vertex_colors(vtxvals, clrscheme=DFLTCLRS, clrvals=DFLTVALS): """ return for each vertex point a corresponding vertex color based on the smoothed vertex values. :param vtxvals: smoothed vertex values :type vtxvals: list/tuple :param clrcheme: list/tuple of reference colors (RGB for each entry) :param clrvals: list/tuple of reference values """ if len(clrvals) < 2 or len(clrscheme) != len(clrvals): raise ValueError("color smooth: invalid color range!") if len(clrscheme) and not all([a and len(a) == 3 for a in clrscheme]): raise ValueError("color smooth: invalid color scheme!") clrs = mext.clr_at_value(clrvals, clrscheme, vtxvals) return clrs
[docs]def triangle_area(coords): """ Calculate the area of a triangle defined by three vertices :param coords: A list of (x, y, z) coordinates for each vertex of the triangle, where each coordinate is a numpy array. :type coords: list :return: The area of the specified triangle :rtype: float :note: The formula used here is taken from http://mathworld.wolfram.com/TriangleArea.html """ a, b, c = list(map(numpy.array, coords)) cross = numpy.cross(c - a, c - b) return 0.5 * numpy.linalg.norm(cross)
[docs]class Patch(object): """ Data about a single patch. """ RES_TYPES = OrderedDict((("Arginine", "ARG"), ("Cysteine", "CYS"), ("Tryptophan", "TRP"), ("Tyrosine", "TYR")))
[docs] def __init__(self, vertices, patch_type, vertex_coords, all_smoothed, neighboring_triangles, neighboring_vertices, nearest_atom_indices, patch_color): """ :param vertices: A set of all vertices that belong to this patch :type vertices: set :param patch_type: The type of patch this is :type patch_type: `settings.PatchType` :param vertex_coords: The (x, y, z) coordinates for each surface vertex :type vertex_coords: list :param all_smoothed: The smoothed value for each vertex :type all_smoothed: list :param neighboring_triangles: A list of neighboring triangles for each vertex, where each triangle is represented as a frozenset of three vertex indices. :type neighboring_triangles: list :param neighboring_vertices: A list of neighboring vertices for each vertex. Neighbors are given as a set. :type neighboring_vertices: list :param nearest_atom_indices: A list of the nearest atom index for each vertex. :type nearest_atom_indices: list @patch_color: color of the patch (r,g,b) """ self.residues = [] # ordered list of residues self.contribution = [] # residue contribution to the patch self.seg = [] # nr atoms per residue contributing self.patch_atms = [] # atom indices of each residue self.type = patch_type self.vertices = vertices self.nearest_atom_indices = {nearest_atom_indices[i] for i in vertices} self.size = self._calcSize(vertices, vertex_coords, neighboring_triangles) self.label_vertex = self._calcLabelVertex(vertices, vertex_coords, neighboring_vertices) self._calcSmoothedStats(all_smoothed, vertices) vtxVals = [all_smoothed[i] for i in vertices] if patch_type is PatchType.negative: vtxVals = [a * -1.0 for a in vtxVals] self.vtxvals = vtxVals self.visible = True self.num = None
def _calcSize(self, vertices, vertex_coords, neighboring_triangles): """ Calculate the surface area of the patch. :param vertices: A set of all vertices that belong to this patch :type vertices: set :param vertex_coords: The (x, y, z) coordinates for each surface vertex :type vertex_coords: list :param neighboring_triangles: A list of neighboring triangles for each vertex, where each triangle is represented as a frozenset of three vertex indices. :type neighboring_triangles: :return: The surface area of the patch :rtype: float """ triangles = set() for vertex in vertices: for cur_triangle in neighboring_triangles[vertex]: if vertices.issuperset(cur_triangle): triangles.add(cur_triangle) size = 0 for cur_triangle in triangles: coords = [vertex_coords[i] for i in cur_triangle] size += triangle_area(coords) return size def _calcLabelVertex(self, vertices, vertex_coords, neighboring_vertices): """ Calculate the central vertex of the patch, which is the vertex to label in the workspace. Rather than using graph eccentricity, we define the central vertex as the vertex with the minimum Euclidean distance to the farthest patch edge vertex. :param vertices: A set of all vertices that belong to this patch :type vertices: set :param vertex_coords: The (x, y, z) coordinates for each surface vertex :type vertex_coords: list :param neighboring_vertices: A list of neighboring vertices for each vertex. Neighbors are given as a set. :type neighboring_vertices: list :return: The central vertex index :rtype: int """ outer_vertices = [ cur_vertex for cur_vertex in vertices if not vertices.issuperset(neighboring_vertices[cur_vertex]) ] outer_coords = [ vertex_coords[cur_vertex] for cur_vertex in outer_vertices ] if not outer_coords: # If this patch doesn't have any defined edges, then return an # arbitrary vertex. This can happen if there are holes in the # surface all around the patch. return next(iter(vertices)) outer_coords = list(map(numpy.array, outer_coords)) min_max_sq_dist = float("inf") central_vertex = -1 for cur_vertex in vertices: cur_coords = numpy.array(vertex_coords[cur_vertex]) squares = (outer_coords - cur_coords)**2 sq_dist = squares.sum(axis=1) max_sq_dist = sq_dist.max() if max_sq_dist < min_max_sq_dist: min_max_sq_dist = max_sq_dist central_vertex = cur_vertex return central_vertex def _calcSmoothedStats(self, all_smoothed, vertices): """ Calculate the minimum, maximum, and average value for patch vertices. The results will be stored as `self.min`, `self.max`, and `self.average`. :param all_smoothed: The smoothed value for each vertex :type all_smoothed: list :param vertices: A set of all vertices that belong to this patch :type vertices: set """ smoothed = [all_smoothed[i] for i in vertices] if self.type is PatchType.negative: # We want max to be the most extreme value, so swap min and max for # negative patches self.min = max(smoothed) self.max = min(smoothed) else: self.min = min(smoothed) self.max = max(smoothed) self.average = old_div(sum(smoothed), len(smoothed)) def __eq__(self, other): return self.num == other.num
[docs] def detailsData(self): """ Generate a list describing the details of this patch :return: A list of details for the patch, where each detail is a list of [group, category, patch details for that category] :rtype: list """ data = OrderedDict() data["Patch Properties"] = [ ["Type", self.type.long_name], ["Size", "%.2f Ų" % self.size], ['Score', "%.1f kcal/mol" % self.energy], ['Intensity', "%.3f" % (old_div(self.energy, self.size))] ] group = "Surface-Exposed Critical Residues" data[group] = [] for long_name, short_name in self.RES_TYPES.items(): label = "%ss" % long_name res_list = (res for res in self.residues if res.resname.strip() == short_name) value = self._resListText(res_list) data[group].append([label, value]) res_list = (res for res in self.residues if res.disulfide) value = self._resListText(res_list) data[group].append(["Disulfides", value]) reactive_types = list(self.residues[0].reactive_type) group = "Reactive Residues" data[group] = [] for site_name in reactive_types: label = site_name.capitalize() res_list = ( res for res in self.residues if res.reactive_type[site_name]) value = self._resListText(res_list) data[group].append([label, value]) data["Patch Residues"] = [[ "Residues", "; ".join(res.fullname for res in self.residues) ]] for data_group, rows in data.items(): for idx in range(len(rows)): data[data_group][idx][0] += ":" return data
def _resListText(self, res_list): """ Generate a string describing the specified list of residues. The percentage of side chain accessibility will be listed for each residue. "None" will be returned if there are no residues. :param res_list: A list of `ResInfo` objects to describe :type res_list: list :return: A string describing the of residues. :rtype: str """ # The unicode non-breaking space \xa0 is used here to better format # the "Details" dialog of the Patch Analysis panel. res_desc = [ "%s\xa0(%i%%\xa0SASA)" % (res.fullname, round(res.accessibility)) for res in res_list ] if res_desc: return "; ".join(res_desc) else: return "None"
[docs]@functools.total_ordering class ResInfo(object): """ Data about a single residue. Includes aggregation, surface area, and reactive residue grouping data. """
[docs] def __init__(self, res, zyggregator=None, aggrescan=None, aggscore=0.0): """ :param res: The residue that this aggregation data describes :type res: `schrodinger.structure._Residue` :param zyggregator: The Zyggregator score for the residue :type zyggregator: float :param aggrescan: The Aggrescan score for the residue :type aggrescan: float """ self.chain = res.chain self.resnum = res.resnum self.inscode = res.inscode self.resname = res.pdbres self.zyggregator = zyggregator self.aggrescan = aggrescan self.aggscore = aggscore self.sasa = 0 self.side_chain_sasa = 0 self.accessibility = None self.reactive_type = None self.disulfide = None self.topography = None
@property def fullname(self): """ The full residue name, formatted similar to A:TYR100. """ chain = self.chain if chain == " ": chain = "" else: chain += ":" resname = self.resname.strip() inscode = self.inscode if inscode == " ": inscode = "" return "%s%s%i%s" % (chain, resname, self.resnum, inscode)
[docs] def getIdStr(self): """ Return a string ID for this residue, e.g. "A:2b" - same format as structure._Residue.___str__() return value. """ chain = self.chain if chain == " ": chain = "_" res_str = "%s:%d" % (chain, self.resnum) if self.inscode != " ": res_str += self.inscode return res_str
def __eq__(self, other): return (self.chain, self.resnum, self.inscode) == (other.chain, other.resnum, other.inscode) def __lt__(self, other): return (self.chain, self.resnum, self.inscode) < (other.chain, other.resnum, other.inscode) def __hash__(self): return hash((self.chain, self.resnum, self.inscode))
[docs] def getAsl(self): """ Return the ASL for finding this residue by chain/resnum/inscode. """ return '(chain.name "{}" AND res.num {} AND res.inscode "{}")'.format( self.chain, self.resnum, self.inscode)
# This makes it possible to unpickle state saved by 18-1 release: ResData = ResInfo
[docs]class ResPatchData(object): """ This class represents a residue's contribution to a patch. """
[docs] def __init__(self, patch, res_info, contribution): self.patch = patch self.res_info = res_info self.contribution = contribution
[docs] def getAsl(self): """ Return the ASL for finding this patch's residue by chain/resnum/inscode. """ return self.res_info.getAsl()
[docs] def getIdStr(self): """ Return a string ID for the residue, e.g. "A:2b" - same format as structure._Residue.___str__() return value. """ return self.res_info.getIdStr()
[docs]class ProteinProperties(object): """ Calculates protein-level properties. Properties will be stored at the ct-level and returned for display on the Properties tab :cvar KYTE_DOOLITTLE_SCALE: A hydrophobicity scale taken from A simple method for displaying the hydropathic character of a protein. J. Kyte, R.F. Doolittle, J Mol Biol. 1982 May 5;157(1):105-32. :vartype KYTE_DOOLITTLE_SCALE: dict """ KYTE_DOOLITTLE_SCALE = { 'ALA': 1.80, 'CYS': 2.50, 'ASP': -3.50, 'GLU': -3.50, 'PHE': 2.80, 'GLY': -0.40, 'HIS': -3.20, 'HIP': -3.20, 'HIE': -3.20, 'HID': -3.20, 'ILE': 4.50, 'LYS': -3.90, 'LEU': 3.80, 'MET': 1.90, 'ASN': -3.50, 'PRO': -1.60, 'GLN': -3.50, 'ARG': -4.50, 'SER': -0.80, 'THR': -0.70, 'VAL': 4.20, 'TRP': -0.90, 'TYR': -1.30 }
[docs] def __init__(self, struc, asa_by_atom): """ :param struc: The structure to calculate the properties of :type struc: `schrodinger.structure.Structure` :param asa_by_atom: The accessible surface area of each atom :type asa_by_atom: `OneIndexedList` """ self.charge = sum(atom.formal_charge for atom in struc.atom) self.weight = sum(atom.atomic_weight for atom in struc.atom) self.hydrophobic_moment = self._calcHydrophobicMoment(struc) self.positive, self.negative = self._calcPosNegSurfaceArea( struc, asa_by_atom) self.acceptor, self.donor = self._calcHBondSurfaceArea( struc, asa_by_atom)
def _calcHydrophobicMoment(self, struc): """ Calculate the hydrophobic moment :param struc: The structure to calculate the hydrophobic moment of :type struc: `schrodinger.structure.Structure` :return: The hydrophobic moment :rtype: float :note: This implementation is based on calc_protein_descriptors in psp-src/python.modules/pro_descriptors.py. """ center_of_mass = analyze.center_of_mass(struc) moment_vec = numpy.array([0.0, 0.0, 0.0]) for atom in struc.atom: if atom.pdbname.strip() != "CA": continue resname = atom.pdbres.strip() try: hydrophobicity = self.KYTE_DOOLITTLE_SCALE[resname] except KeyError: # Ignore non-standard residues, since we don't know their # hydrophobicity continue else: moment_vec += (atom.xyz - center_of_mass) * hydrophobicity moment = (moment_vec**2).sum() moment = numpy.sqrt(moment) return moment def _calcPosNegSurfaceArea(self, struc, asa_by_atom): """ Calculate the surface area for positively and negatively charged atoms :param struc: The structure to calculate the surface areas of :type struc: `schrodinger.structure.Structure` :param asa_by_atom: The accessible surface area of each atom :type asa_by_atom: `OneIndexedList` :return: A tuple of - The surface area for positively charged atoms (float) - The surface area for negatively charged atoms (float) :rtype: tuple """ positive = negative = 0 for atom, asa in zip(struc.atom, asa_by_atom): charge = atom.partial_charge if charge > 0: positive += asa elif charge < 0: negative += asa return positive, negative def _calcHBondSurfaceArea(self, struc, asa_by_atom): """ Calculate the surface area for hydrogen bond acceptors and donors :param struc: The structure to calculate the surface areas of :type struc: `schrodinger.structure.Structure` :param asa_by_atom: The accessible surface area of each atom :type asa_by_atom: `OneIndexedList` :return: A tuple of - The surface area for hydrogen bond acceptors (float) - The surface area for hydrogren bond donors (float) :rtype: tuple """ donor_handle, acceptor_handle = mm.mmct_hbond_get_participants(struc) # For hydrogen bond donors, the heavy atom bound to the hydrogen is # considered a participant of the H-bond. Such atoms are repeated in # this list if they have more than one hydrogen on it. Eg. tertiary # amide N will appear 3 times in this list, once for each H on it. # For this reason, remove duplicates by converting to a set: donors = set(list(mmlist.mmlist(donor_handle))) acceptors = list(mmlist.mmlist(acceptor_handle)) # Verify that the acceptor atoms are all unique: assert len(acceptors) == len(set(acceptors)) acceptor_sa = sum(asa_by_atom[i] for i in acceptors) donor_sa = sum(asa_by_atom[i] for i in donors) return acceptor_sa, donor_sa
[docs]class PatchAnalysis(object): """ Object representing a pre-analysis instance, which is saved by the backend and loaded into the GUI. It is also saved/restored when panel state is saved and restored. """
[docs] def __init__(self, st, surf, asl, clogp_smoothed, partial_charge_smoothed, neighboring_vertices, neighboring_triangles, res_data_by_vertex, prot_properties, topo_data=None): """ :param st: Analyzed structure. :type st: `structure.Structure` :param surf: The surface to find patches on :type surf: `schrodinger.surface.Surface` :param asl: Asl for atoms that were analyzed :type asl: str :param clogp_smoothed: The cLogP (hydrophobicity) values smoothed over the surface. This list contains the smoothed value at each surface vertex. :type clogp_smoothed: list :param partial_charge_smoothed: The partial charge values smoothed over the surface. This list contains the smoothed value at each surface vertex. :type partial_charge_smoothed: list :param neighboring_vertices: A list of neighboring vertices for each vertex. :type neighboring_vertices: list :param neighboring_triangles: A list of neighboring triangles for each vertex, where each triangle is represented as a frozenset of three vertex indices. :type neighboring_triangles: list :param res_data_by_vertex: A list of residue data for each vertex. The `ResInfo` object is given for the residue closest to the vertex. :type res_data_by_vertex: list :param prot_properties: The protein properties to be loaded into the Properties tab. :type prot_properties: `ProteinProperties` :param topo_data: The topography value of all the residues in the structure. :type topo_data: dict(str, str) """ self.st = st self.surf = surf self.asl = asl self.clogp_smoothed = clogp_smoothed self.partial_charge_smoothed = partial_charge_smoothed self.neighboring_vertices = neighboring_vertices self.neighboring_triangles = neighboring_triangles self.res_data_by_vertex = res_data_by_vertex self.prot_properties = prot_properties # For backward compatibility with loading psazip files into the panel if topo_data is not None: self.topo_data = topo_data else: pre_analyzer = PreAnalyzer(st, None) self.topo_data = pre_analyzer.calcAntibodyTopography({}) # When restoring state from 18-1 release, add aggscore atribute, which # was not being written then. This work-around is needed to avoid # creating ResInfo objects without this attribute. for res in self.res_data_by_vertex: # Skip the deleted residues if res and not hasattr(res, 'aggscore'): res.aggscore = 0.0
[docs] def write(self, basename): self.st.write(basename + '.maegz') self.surf.write(basename + '.vis') write_asl_file(basename, self.asl) data = (self.clogp_smoothed, self.partial_charge_smoothed, self.neighboring_vertices, self.neighboring_triangles, self.res_data_by_vertex, self.prot_properties, self.topo_data) with open(basename + ".pkl", "wb") as pkl_out: pickle.dump(data, pkl_out, protocol=2)
[docs] @classmethod def read(cls, basename): """ Read the `PreAnalyzer` data from the specified basename and use it to create a `PatchFinder` object. :param basename: The basename for the `PreAnalyzer` output files :type basename: str """ st = structure.Structure.read(basename + '.maegz') surf = surface.Surface.read(basename + ".vis") asl = read_asl_file(basename) with open(basename + ".pkl", "rb") as pkl_in: # latin encoding allows loading files written in Py2 (2018-1rel) data = pickle.load(pkl_in, encoding='latin1') return cls(st, surf, asl, *data)
[docs]class PreAnalyzer(object): """ Perform patch finding calculations that only need to be done once per structure. This class is intended to be run under job control. """
[docs] def __init__(self, struc, asl): """ :param struc: The structure to analyze :type struc: `schrodinger.structure.Structure` :param asl: An ASL describing atoms of `struc` to analyze :type asl: str """ self._struc = struc self._asl = asl self._surf = None self._clogp_smoothed = None self._partial_charge_smoothed = None self._neighboring_vertices = None self._neighboring_triangles = None self._res_data_by_vertex = None self._properties = None self._topo_data = None
[docs] @classmethod def readAndRun(cls, basename): """ Read a structure and ASL from the specified files and run the analysis. :param basename: The full path to the file to analyzes except the ".maegz" and ".txt" extensions :type basename: str :return: The completed pre-analysis :rtype: `PreAnalyzer` """ struc = structure.Structure.read(basename + ".maegz") asl = read_asl_file(basename) pre_analyzer = cls(struc, asl) pre_analyzer.run() return pre_analyzer
[docs] def run(self): """ Perform the pre-analysis. The results will be stored in instance attributes and can be written to a pickle file using `write`. """ self._prepareStructure() self._prepareSurface() self._res_data_by_vertex, asa_by_atom, self._topo_data = \ self._preCalcResData() self._properties = ProteinProperties(self._struc, asa_by_atom)
def _prepareStructure(self): """ Perform necessary calculations on the structure. The results will be stored in atom-level properties. """ self._calcCLogP() self._calcPartialCharges() def _calcCLogP(self): """ Calculate cLogP, the partition coefficient, for the entire structure. This measures hydrophobicity. Results will be stored in the `CLOGP_PROP` atom-level property. """ calc = additivedescriptors.LogP() calc.getScore(self._struc, ct_property=CLOGP_PROP) def _calcPartialCharges(self): """ Calculate the partial charge on each atom. Results will be stored in the `PARTIAL_CHARGE_PROP` atom-level property. """ mm.mmffld_initialize(mm.MMERR_DEFAULT_HANDLER) ff_handle = mm.mmffld_new(14) mm.mmffld_enterMol(ff_handle, self._struc) mm.mmffld_deleteMol(ff_handle, self._struc) mm.mmffld_delete(ff_handle) mm.mmffld_terminate() def _prepareSurface(self): """ Generate the surface and perform necessary calculations on it. The results will be stored in instance attributes. """ # A copy of input structure with non-protein atoms deleted. # This is used to create surfaces while ignoring non-protein atoms # so as to show the waters/ligands/DNA appearing outside of the surface # rather than holes (caused due to backend deleting non-protein atoms as well) struc_copy = self._struc.copy() sel_atoms = analyze.evaluate_asl(struc_copy, self._asl) non_prot_atoms = analyze.evaluate_asl(struc_copy, 'not protein') renumber_map = struc_copy.deleteAtoms(non_prot_atoms, renumber_map=True) sel_atoms = [ renumber_map[atom] for atom in sel_atoms if renumber_map[atom] is not None ] self._surf = surface.Surface.newMolecularSurface(struc_copy, SURF_NAME, atoms=sel_atoms) self._surf.setTransparency(15) self._calculateSurfaceNeighbors() self._calcSmoothedValues() def _calculateSurfaceNeighbors(self): """ For each vertex on the surface, calculate all neighboring triangles and store the results in `self._neighboring_vertices`. """ self._neighboring_vertices = [ set() for _ in range(self._surf.vertex_count) ] self._neighboring_triangles = [ [] for _ in range(self._surf.vertex_count) ] for vertices in self._surf.patch_vertices: vertex_set = frozenset(vertices) for vertex in vertices: self._neighboring_triangles[vertex].append(vertex_set) neighbors = vertex_set - {vertex} self._neighboring_vertices[vertex].update(neighbors) def _calcSmoothedValues(self): """ Smooth the specified atom-level charges for partial charges and slogP-values over the surface. """ KBOLTZ = 0.00198722 # Boltzman conatant T = 300.0 # temperature LN10 = 2.30259 # log 10 ct = self._struc atms = [a for a in ct.atom] slogp = [a.property.get(CLOGP_PROP, 0.0) for a in atms] slogp = [a * KBOLTZ * T * LN10 for a in slogp] elev = [a.property.get(PARTIAL_CHARGE_PROP, 0.0) for a in atms] elev = [a * KBOLTZ * T * LN10 for a in elev] apos = [a.xyz for a in atms] vpos = self._surf.vertex_coords vpos = [list(a) for a in vpos] val = _smooth_grid_vals(vpos, apos, slogp, wfcn='lipo', rcutoff=5.0) self._clogp_smoothed = val val = _smooth_grid_vals(vpos, apos, elev, wfcn='ele', rcutoff=10.0) self._partial_charge_smoothed = val def _preCalcResData(self): """ Calculate the aggregation, surface area, and reactive residue grouping data for each residue. :return: A tuple of: (1) A list of residue data for each vertex. The `ResInfo` object is given for the residue closest to the vertex. (2) A list of the solvent accessible surface area for each atom (`OneIndexedList`) (3) A dict of residue ids and topography value of all the residues in the structure. :rtype: tuple """ all_seq, all_struc = get_connected_components(self._struc) res_data_by_res = self._calcAggregation(all_seq, all_struc) # dict where keys are atom indices and values are ResidueInfo objects: res_info_by_atom = {} for res in self._struc.residue: for atom in res.atom: res_info_by_atom[atom.index] = res_data_by_res[res] asa_by_atom = self._calcSasa(res_info_by_atom) self._calcAccessibility(res_data_by_res) self._calcReactiveResidues(res_data_by_res, asa_by_atom, all_struc) self._calcDisulfides(res_data_by_res) topo_data = self.calcAntibodyTopography(res_data_by_res) res_data_by_vertex = self._resDataByVertex(res_info_by_atom) return res_data_by_vertex, asa_by_atom, topo_data def _calcAggregation(self, all_seq, all_struc): """ Calculate the Zyggregator and Aggrescan values for each residue. :param all_seq: A list of list of residue names (str). Each list of residue names represents a single covalently connected component. :type all_seq: list :param all_struc: A list of list of `schrodinger.structure._Residue` objects. This object mirrors the structure of the list of list of residue names. :type all_struc: list :return: A dictionary of `ResInfo` objects containing aggregation data. Keys are `_Residue` objects. :rtype: dict """ agg_by_res = {} for cur_seq, cur_struc in zip(all_seq, all_struc): if len(cur_seq) >= 5: zygg = self._calcZyggregator(cur_seq) aggr = self._calcAggrescan(cur_seq) # adjustment to the Zyggregator profile based on the aggrescan # 0 cutoff to scale to the same universe if zygg[0] and aggr[0]: cutoff = ZYGG_ADD aggp = [] for i, v in enumerate(aggr): if not i: continue if (aggr[i - 1] > 0.0 and v < 0.0) or \ (v > 0.0 and aggr[i - 1] < 0.0): aggp += [zygg[i - 1], zygg[i]] if aggp: cutoff = old_div(sum(aggp), float(len(aggp))) cutoff += 0.35 # extra cushion, see paper # due to charges at the N/C termini we believe that there # is no APR propensity within the first/last 5 residues lz = len(zygg) x = list(range(5)) + list(range(lz - 5, lz)) zygg = [a - cutoff for a in zygg] for i in x: zygg[i] = min(zygg[i], 0.15) else: # Zyggregator and Aggrescan require at least five covalently # connected residues for scoring zygg = aggr = [None] * len(cur_seq) for res, cur_zygg, cur_aggr in zip(cur_struc, zygg, aggr): agg_by_res[res] = ResInfo(res, cur_zygg, cur_aggr) return agg_by_res def _resDataByVertex(self, res_info_by_atom): """ Create a list of the `ResInfo` object corresponding to each surface surface. :param res_info_by_atom: A dictionary of `ResInfo` objects :type res_info_by_atom: dict :return: A list of `ResInfo` objects :rtype: list """ res_data_by_vertex = [] for atom_index in self._surf.nearest_atom_indices: if atom_index == -1: val = None else: val = res_info_by_atom[atom_index] res_data_by_vertex.append(val) return res_data_by_vertex def _calcZyggregator(self, seq): """ Calculate the Zyggregator aggregation score for the specified residues :param seq: A list of residue name for a single covalently connected segment. :type seq: list :return: A list of the Zyggregator score for each residue. :rtype: list """ charge = sum( atom.property[PARTIAL_CHARGE_PROP] for atom in self._struc.atom) try: p_agg, z_agg, pval, zperc75 = \ agg_profiles.calc_zygg_profile(seq, charge=charge) # pval is a list of the Zyggregator scores, one value per residue. return pval except (TypeError, ValueError): return [None for a in seq] def _calcAggrescan(self, seq): """ Calculate the Aggrescan aggregation score for the specified residues :param seq: A list of residue name for a single covalently connected segment. :type seq: list :return: A list of the Aggrescan score for each residue. :rtype: list """ try: a3vA, a4vS, a3v, a4v = agg_profiles.calc_aggrescan_profile(seq) return a4v except (TypeError, ValueError): return [None for a in seq] def _calcSasa(self, res_info_by_atom): """ Calculate the solvent accessible surface area for each atom and residue. :param res_info_by_atom: A dictionary of `ResInfo` objects. The values of this dictionary will be updated with the calculated accessible side chain surface area. :type res_info_by_atom: dict :return: A `OneIndexedList` of solvent accessible surface area for each atom in the structure """ BACKBONE_ATOMS = {"CA", "C", "O", "N"} asa_by_atom = [0] * self._struc.atom_total asa_by_atom = OneIndexedList(asa_by_atom) vertex_coords = self._surf.vertex_coords nearest = self._surf.nearest_atom_indices for vertex_nums in self._surf.patch_vertices: cur_coords = [vertex_coords[i] for i in vertex_nums] area = triangle_area(cur_coords) per_vertex_area = old_div(area, 3.0) for cur_vertex in vertex_nums: cur_atom_index = nearest[cur_vertex] if cur_atom_index == -1: # This happens with occasional vertices when using an ASL continue asa_by_atom[cur_atom_index] += per_vertex_area atom_obj = self._struc.atom[cur_atom_index] res_info = res_info_by_atom[cur_atom_index] res_info.sasa += per_vertex_area if atom_obj.pdbname.strip() not in BACKBONE_ATOMS: res_info.side_chain_sasa += per_vertex_area return asa_by_atom def _calcAccessibility(self, res_data_by_res): """ Calculate side chain accessibility (side chain surface area divided by the maximum possible side chain surface area for that residue type). :param res_data_by_res: A dictionary of `ResInfo` objects. The values of this dictionary will be updated with the calculated accessibility values. :type res_data_by_res: dict """ side_chain_acc = structure_utils.calculate_sasa_dict() for res in res_data_by_res.values(): try: full_acc = side_chain_acc[res.resname.strip()] except KeyError: continue else: res.accessibility = (old_div(res.side_chain_sasa, full_acc)) * 100 def _calcReactiveResidues(self, res_data_by_res, asa_by_atom, all_struc): """ Determine which residues match the reactive residue sites. Reactive residue definitions are read from the user's profile, so any changes saved via other panels will be picked up and used here. :param res_data_by_res: A dictionary of `ResInfo` objects. The reactive_type dictionary will be created and populated for each `ResInfo` object. :type res_data_by_res: dict :param asa_by_atom: A list of solvent accessible surface area for each atom in the structure :type asa_by_atom: `OneIndexedList` :param all_struc: A list of list of `schrodinger.structure._Residue` objects. Each list of residues represents a single covalently connected component. :type all_struc: list """ patterns = find_gui.readMSVPatterns() seq_dicts = self._genSeqDictsForReactiveResiduesSearch( asa_by_atom, all_struc) all_pattern_names = [cur_pattern[0] for cur_pattern in patterns] for res in res_data_by_res.values(): # Initialize the reactive_type dictionary for each ResInfo object res.reactive_type = OrderedDict( (name, False) for name in all_pattern_names) for cur_pattern in patterns: pattern_name, pattern_def, hotspot, _ = cur_pattern # Note that hotspot is always a string. It's either a string # containing an integer or a string containing "None". pattern_results = sequence.find_generalized_pattern( seq_dicts, pattern_def) if not pattern_results: # The pattern definition was invalid continue for results_for_seq, cur_res_list in zip(pattern_results, all_struc): for start, end in results_for_seq: if hotspot == "None": to_mark = list(range(start, end + 1)) else: to_mark = [start + int(hotspot) - 1] for i in to_mark: cur_res = cur_res_list[i] cur_data = res_data_by_res[cur_res] cur_data.reactive_type[pattern_name] = True def _genSeqDictsForReactiveResiduesSearch(self, asa_by_atom, all_struc): """ Create the sequence dictionaries required for find_generalized_pattern() :param asa_by_atom: A list of solvent accessible surface area for each atom in the structure :type asa_by_atom: `OneIndexedList` :param all_struc: A list of list of `schrodinger.structure._Residue` objects. Each list of residues represents a single covalently connected component. :type all_struc: list :return: The sequence dictionaries required for find_generalized_pattern() :rtype: list """ seq_objs = [] for cur_res_list in all_struc: atom_indices = [] for res in cur_res_list: atom_indices.extend(res.getAtomIndices()) seq_objs.append( sequence.StructureSequence(self._struc, atom_indices)) # Convert OneIndexedList to zero-indexed list asa_by_atom = list(asa_by_atom) seq_dicts = [ sequence.convert_structure_sequence_for_pattern_search( cur_seq_obj, asa_by_atom) for cur_seq_obj in seq_objs ] return seq_dicts def _calcDisulfides(self, res_data_by_res): """ Determine which residues have a disulfide bond. :param res_data_by_res: A dictionary of `ResInfo` objects. The disulfide attribute (Boolean) will be set for each `ResInfo` object. :type res_data_by_res: dict """ for res, res_info in res_data_by_res.items(): res_info.disulfide = self._resHasDisulfide(res) def _resHasDisulfide(self, res): """ Does the specified residue contain a disulfide bond? :param res: The residue to check :type res: `schrodinger.structure._Residue` :return: True if `res` contains a disulfide bond. False otherwise. :rtype: bool """ if not res.pdbres.strip().startswith("CY"): # This allows for the Amber/Charmm cysteine name variants such as # CYX return False sulfurs = [atom for atom in res.atom if atom.element == "S"] if len(sulfurs) != 1: return False for bound_atom in sulfurs[0].bonded_atoms: if (bound_atom.chain != res.chain or bound_atom.resnum != res.resnum or bound_atom.inscode != res.inscode): return True return False
[docs] def calcAntibodyTopography(self, res_data_by_res): """ Determine the antibody topography for each residue. For non-antibody structures, all values will be None. PANEL-7988 :param res_data_by_res: A dictionary of `ResInfo` objects. The topography attribute (str) will be set for each `ResInfo` object. :type res_data_by_res: dict :return: Antibody topography data :rtype: dict(str, str) """ # Annotate the input structure: curated_st = curate_antibody.curate_struct(self._struc) topo_data = {} if curated_st is not None: # If curated_st is None, the structure is not an antibody. for prop in ('s_bioluminateReadOnly_Heavy_Chain', 's_bioluminateReadOnly_Light_Chain'): try: chain = curated_st.property[prop] except KeyError: # If this is a single chain antibody, skip over the # antibody chain that is not present continue seq, resid_list = loop_utils.get_seq_resid(curated_st, chain) seq_regions = antibody.SeqRegions(seq).regions regions = {} for label in REGION_LABELS: if label in seq_regions: regions[label] = seq_regions[label] for ires, resid in enumerate(resid_list): for name, (istart, iend) in regions.items(): if ires >= istart and ires <= iend: topo_data[resid] = name break else: topo_data[resid] = None for res, res_info in res_data_by_res.items(): res_info.topography = topo_data.get(str(res)) return topo_data
[docs] def write(self, basename): """ Write out the surface and all calculated values :param basename: The filename to write the output to. The surface will be written to basename.vis and the calculated values will be written to basename.pkl. :type basename: str """ self.getAnalysis().write(basename)
[docs] def getAnalysis(self): return PatchAnalysis( self._struc, self._surf, self._asl, self._clogp_smoothed, self._partial_charge_smoothed, self._neighboring_vertices, self._neighboring_triangles, self._res_data_by_vertex, self._properties, self._topo_data)
[docs]class PatchFinder(object): """ Find surface patches using the output of `PreAnalyzer`. """
[docs] def __init__(self, analysis): """ Class for grouping patches from a PatchAnalysis object based on specific settings. :param analysis: Pre-analysis object :type analysis: PatchAnalysis """ self.st = analysis.st self.surf = analysis.surf self.asl = analysis.asl self._clogp_smoothed = analysis.clogp_smoothed self._partial_charge_smoothed = analysis.partial_charge_smoothed self._neighboring_vertices = analysis.neighboring_vertices self._neighboring_triangles = analysis.neighboring_triangles self._vertex_coords = self.surf.vertex_coords self._nearest_atom_indices = self.surf.nearest_atom_indices self._res_data_by_vertex = analysis.res_data_by_vertex self.prot_properties = analysis.prot_properties self.topo_data = analysis.topo_data _, res_lists = get_connected_components(self.st) self.res_lists = [map(str, res_list) for res_list in res_lists]
[docs] @classmethod def read(cls, basename): """ Read the `PreAnalyzer` data from the specified basename and use it to create a `PatchFinder` object. :param basename: The basename for the `PreAnalyzer` output files :type basename: str """ analysis = PatchAnalysis.read(basename) return cls(analysis)
[docs] def write(self, basename): analysis = PatchAnalysis(self.st, self.surf, self.asl, self._clogp_smoothed, self._partial_charge_smoothed, self._neighboring_vertices, self._neighboring_triangles, self._res_data_by_vertex, self.prot_properties) analysis.write(basename)
[docs] def calculate(self, settings): """ Calculate patches and residue aggregation data using the specified settings. :param settings: The settings to use for the patch calculations :type settings: settings.PatchSettings :return: A tuple of: - the calculated patches as a list of `Patch` objects - the residue aggregation data as a list of {ResidueAggData} objects :rtype: tuple """ patches = self._calcPatches(settings) res_data = self._calcResData(patches) patches = self._filterAndNumberPatches(patches) self._calcAggScores(patches, res_data) self._annotateProperties(res_data) return patches, res_data
def _calcAggScores(self, patches, res_data): """ Calculate Agg scores, and set ResData.aggscore properties, and self.prot_properties.aggscore_sum property. :param patches: List of patches :type patches: list(patch_finder.Patch) :param res_data: List of residue info objects - each residue can appear multiple times, if it's present in multiple patches. :type res_data: list(ResInfo) """ aggscore_by_res = ext_desc.calc_agg_scores(self.st, patches) for oneresdata in res_data: res_id = oneresdata.getIdStr() oneresdata.aggscore = aggscore_by_res.get(res_id, 0.0) self.prot_properties.aggscore_sum = sum(aggscore_by_res.values()) def _annotateProperties(self, res_data): """ Annotate the structure with the structure-level patch properties and the residue-level properties to the appropriate CA atom """ # Annotate residue-level properties to CA atom res_props = [ "zyggregator", "aggrescan", "aggscore", "sasa", "side_chain_sasa", # For now, we'll comment out non-float properties # we can enable these later if necessary but UX isnt currently obvious #"accessibility", #"reactive_type", #"disulfide", #"topography" ] for oneresdata in res_data: resasl = oneresdata.getAsl() caasl = f'(atom.ptype " CA ") AND ({resasl})' assert analyze.validate_asl(caasl) try: caai = analyze.evaluate_asl(self.st, caasl)[0] except IndexError: print("Skipping non AA residue..") continue caa = self.st.atom[caai] for prop in res_props: val = getattr(oneresdata, prop) if val is None: val = 0.0 caa.property[f"{RES_PROP_PREFIX}{prop}"] = val # Annotate structure-level properties propkeys = { 'total_charge': self.prot_properties.charge, 'atomic_weight': self.prot_properties.weight, 'hydrophobic_moment': self.prot_properties.hydrophobic_moment, 'positive_SA': self.prot_properties.positive, 'negative_SA': self.prot_properties.negative, 'acceptor_SA': self.prot_properties.acceptor, 'donor_SA': self.prot_properties.donor, 'Sum_AggScore': self.prot_properties.aggscore_sum } for suffix, val in propkeys.items(): fullkey = PATCH_PROP_PREFIX + suffix self.st.property[fullkey] = val def _calcPatches(self, settings): """ Calculate patches using the specified settings. :param settings: The settings to use for the patch calculations :type settings: `settings.PatchSettings` :return: A list of `Patch` objects :rtype: list """ pos = self._calcPatchesFor(self._partial_charge_smoothed, settings.positive, operator.gt, PatchType.positive) neg = self._calcPatchesFor(self._partial_charge_smoothed, settings.negative, operator.lt, PatchType.negative) hydr = self._calcPatchesFor(self._clogp_smoothed, settings.hydrophobic, operator.gt, PatchType.hydrophobic) patches = pos + neg + hydr return patches def _calcPatchesFor(self, smoothed, settings, op, patch_type): """ Calculate the patches using the specified data :param smoothed: The smoothed values to use for patch calculation :type smoothed: list :param settings: The settings to use for the patch calculations :type settings: `settings.PatchTypeSettings` :param op: An operator indicating whether patches vertices should contains values less than or greater than the threshold value in `settings`. :type op: function :param patch_type: The type of patch to find :type patch_type: `settings.PatchType` :return: A list of `Patch` objects :rtype: list """ num_vertices = len(self._vertex_coords) vertex_examined = [False] * num_vertices patches = [] for i in range(num_vertices): if vertex_examined[i]: continue elif not op(smoothed[i], settings.threshold): vertex_examined[i] = True continue vertices = self._findAllPatchVertices(i, smoothed, op, settings.threshold, vertex_examined) cur_patch = Patch(vertices, patch_type, self._vertex_coords, smoothed, self._neighboring_triangles, self._neighboring_vertices, self._nearest_atom_indices, settings.color) if cur_patch.size >= settings.size: patches.append(cur_patch) return patches def _findAllPatchVertices(self, starting_vertex, smoothed, op, threshold, vertex_examined): """ Find all vertices that are part of the same patch as `starting_vertex`. :param starting_vertex: The index of the vertex to start the search at. :type starting_vertex: int :param smoothed: The smoothed values to use for patch calculation :type smoothed: list :param op: An operator indicating whether patches vertices should contains values less than or greater than the `threshold` value. :type op: function :param threshold: All patch vertices must have a smoothed value less than or greater than this value. :type threshold: float :param vertex_examined: For each vertex, a Boolean indicating whether or not the vertex has been examined during this patch search. Will be updated by this method. :type vertex_examined: list :return: A list of vertex indices :rtype: list """ patch_vertices = {starting_vertex} new_vertices = {starting_vertex} while new_vertices: prev_vertices = new_vertices new_vertices = set() for cur_vertex in prev_vertices: for cur_neighbor in self._neighboring_vertices[cur_vertex]: vertex_examined[cur_neighbor] = True if op(smoothed[cur_neighbor], threshold): new_vertices.add(cur_neighbor) new_vertices -= patch_vertices patch_vertices.update(new_vertices) return patch_vertices def _calcResData(self, patches): """ The function takes in a list of patches and calculates for each patch the following properties that are set in the object: - energy: The combined score for residues in the patch (including residues that were excluded later filtered from the patch). - res: list of residue data - contribution: sorted list of energy contribution per residue to the patch (highest contributor is listed first) - patch_atms: list of atom indices that contribute to the patch. This list is sorted by residue affiliation - seg: segmentation vector. Determine, how many atoms per residue are contributing to the patch :param patches: A list of `Patch` objects :type patches: list :return: A list of `ResPatchInfo` objects :rtype: list """ t0 = time.time() rdata = self._res_data_by_vertex adata = self._nearest_atom_indices chrga = self._partial_charge_smoothed slgpa = self._clogp_smoothed # FIXME this method needs some serious re-factoring. # List of ResInfo objects that are part of a patch (or multiple # patches), each appearing only once in the list: res_data = [] for cpatch in patches: vertices = cpatch.vertices # ensure proper association of vertexpoint to resdata rdat = [rdata[i] if rdata[i] else None for i in vertices] adat = [adata[i] if adata[i] else None for i in vertices] if cpatch.type is PatchType.hydrophobic: vdat = [slgpa[i] if slgpa[i] else None for i in vertices] else: vdat = [chrga[i] if chrga[i] else None for i in vertices] # ensure to use valid data for regrouping msk = [a and b and z for a, b, z in zip(rdat, adat, vdat)] rdat = [a for i, a in enumerate(rdat) if msk[i]] adat = [a for i, a in enumerate(adat) if msk[i]] vdat = [a for i, a in enumerate(vdat) if msk[i]] if not rdat or not adat or not vdat: continue # group by residue rnm = [ '%s_%i_%s%s' % (a.chain, int(a.resnum), a.resname, a.inscode) for a in rdat ] idxL, cnt = shlp.sort_and_count(rnm) # If the patch contains only one residue: if cnt and len(cnt) == 1: res = rdat[idxL[0]] aidx = set([adat[k] for k in idxL]) # atom indices contrib = abs(sum(vdat)) cpatch.energy = contrib cpatch.contribution = [contrib] cpatch.seg = [len(aidx)] # nr of contributing atoms cpatch.patch_atms = list(aidx) # consistency cpatch.residues = [res] res_data.append(cpatch.residues) continue # If got here, then this patch has more than 1 residue idx = shlp.vslice(idxL, cnt) r = [rdat[x[0]] for x in idx] # group by contributing atoms rc = [] # residue contribution rs = [] # nr of atoms per residue that contribute ra = [] # atom indices for vx in idx: aidx = [adat[x] for x in vx] ax, ac = shlp.sort_and_count(aidx) if not ac: rs.append(0) # this should not happen ra.append([]) rc.append(0.0) continue atms = shlp.vslice([aidx[z] for z in ax], ac) atms = [a[0] for a in atms] ra.append(atms) rs.append(len(atms)) rc.append(abs(sum([vdat[x] for x in vx]))) # order by largest residue contributions x = sorted(list(range(len(rc))), key=rc.__getitem__) x.reverse() # Filter out residues with low contribution. PANEL-8195 min_cont = MIN_RES_CONTRIBUTION[cpatch.type] x = [a for a in x if rc[a] >= min_cont] # Order and filter r, rc, ra, and rs lists in the same way: r = [r[a] for a in x] rc = [rc[a] for a in x] ra = [ra[a] for a in x] rs = [rs[a] for a in x] cpatch.contribution = rc # score for each contributing res cpatch.energy = sum(shlp.castIf(cpatch.contribution)) cpatch.seg = rs # nr of contributing atoms cpatch.patch_atms = shlp.flatten(ra) # consistency reasons cpatch.residues = r res_data.append(cpatch.residues) print(" ..... done calculating res_data (%i patches): %.3f " % \ (len(patches), time.time() - t0)) # Sanity check: for patch in patches: assert len(patch.contribution) == len(patch.residues) # TODO in the future, consider ordering this list by original # sequence order, as current ordering is not very useful. return shlp.flatten(res_data) def _filterAndNumberPatches(self, patches): """ Discard any patches where all residues got filtered out since these patches aren't biologically meaningful. After that, number all remaining patches. :param patches: All patches found via `_calcPatches`. Residue data for the patches must have been calculated using `_calcResData`. :type patches: list[Patch] :return: The filtered and numbered patches. :rtype: list[Patch] """ patches = [patch for patch in patches if patch.residues] for i, cur_patch in enumerate(patches, start=1): cur_patch.num = i return patches
[docs] def getColorsForPatchVertices(self, patch, psettings): """ Return colors for all vertices of the given patch. :param patch: Patch to get colors for. :type patch: `patch_finder.Patch` :return: Colors for each vertex of the patch :rtype: list of (float, float, float) """ color_ramp = psettings.getPatchColorRamp(patch.type) if psettings.display_mode == settings.COLOR_AGGSCORE: vertex_values = [] for vert in patch.vertices: rdata = self._res_data_by_vertex[vert] aggscore = rdata.aggscore vertex_values.append(aggscore) else: vertex_values = patch.vtxvals vcolors = [color_ramp._getRGBFloat(val) for val in vertex_values] return vcolors
[docs] def colorSurfByPatches(self, surf, patches, psettings): # Initialize vertex_colors to default color (used for vertices that # are not part of a patch): default_color = [a / 255.0 for a in settings.NO_PATCH_COLOR] vertex_colors = [ numpy.array(default_color, dtype=numpy.float32) for _ in range(surf.vertex_count) ] for patch in patches: vcolors = self.getColorsForPatchVertices(patch, psettings) for patch_color, vertex in zip(vcolors, patch.vertices): vertex_colors[vertex] = patch_color surf.setColoring(vertex_colors)