Source code for schrodinger.application.bioluminate.patch_utils.patch_finder

# -*- coding: utf-8 -*-

import functools
import operator
import os
import pickle
import time
from collections import OrderedDict
from past.utils import old_div

import numpy

from schrodinger import structure
from schrodinger import surface
from schrodinger.application.bioluminate.patch_utils import settings
from schrodinger.application.bioluminate.patch_utils.settings import PatchType
from schrodinger.infra import mm
from schrodinger.infra import mmlist
from schrodinger.infra.util import OneIndexedList
from schrodinger.protein import sequence
from schrodinger.structutils import analyze
from schrodinger.ui.sequencealignment import find_gui
from schrodinger.ui.sequencealignment import structure_utils

try:
    from schrodinger.application.prime.packages import additivedescriptors
    from schrodinger.application.prime.packages import agg_profiles
    from schrodinger.application.prime.packages import antibody
    from schrodinger.application.prime.packages import curate_antibody
    from schrodinger.application.prime.packages import loop_utils
    from schrodinger.application.prime.packages import msurfext as mext
    from schrodinger.application.prime.packages import struct_helper as shlp
    from schrodinger.application.prime.packages.protein_descriptors import \
        ext_desc
except ImportError:
    # allow the module to be imported even if Prime isn't available (for unit
    # testing)
    additivedescriptors = None
    agg_profiles = None
    antibody = None
    curate_antibody = None
    loop_utils = None
    ext_desc = None

# TODO: Would be good to go DRY and move this list into antibody.py
REGION_LABELS = [
    'HFR1', 'HFR2', 'HFR3', 'HFR4', 'H1', 'H2', 'H3', 'CH1', 'Hinge', 'CH2',
    'CH3', 'LFR1', 'LFR2', 'LFR3', 'LFR4', 'L1', 'L2', 'L3', 'CL'
]

SURF_NAME = "Protein Patches"
CLOGP_PROP = "r_psp_ClogP"
CLOGP_ITERS = 30
PARTIAL_CHARGE_PROP = "r_m_charge1"
PARTIAL_CHARGE_ITERS = CLOGP_ITERS

PATCH_PROP_PREFIX = "r_bioluminate_patch_"
RES_PROP_PREFIX = 'r_bioluminate_residue_'

# We add this value on to all Zyggregator scores.  This shifts the scores so
# that all scores > 0 correspond to a high likelihood of aggregation.
ZYGG_ADD = 4.25

_pre_analyze_path = os.path.dirname(__file__)
_pre_analyze_path = os.path.abspath(_pre_analyze_path)
PRE_ANALYZE_PATH = os.path.join(_pre_analyze_path, "pre_analyze.py")

# Do not include residues in patches, if their contribution is less:
MIN_RES_CONTRIBUTION = {
    PatchType.positive: 5.0,
    PatchType.negative: 5.0,
    PatchType.hydrophobic: 2.5,
}

DFLTCLRS = ((255, 255, 255), (0, 0, 0))
DFLTVALS = (0.0, 1.0)

# *************************************************************************** #
#                            Functions                                        #
# *************************************************************************** #


[docs]def read_asl_file(basename):
    with open(basename + ".txt", "r") as asl_in:
        return asl_in.read()


[docs]def write_asl_file(basename, asl):
    asl_file = basename + ".txt"
    with open(asl_file, "w") as asl_out:
        asl_out.write(asl)
    return asl_file


[docs]def get_connected_components(st):
    """
    Determine all covalently connected components of `st` (i.e. unbroken chain
    segments).

    :return: A tuple of: (1) A list of list of residue names (str). Each list
        of residue names represents a single covalently connected component.
        (2) A list of list of `schrodinger.structure._Residue` objects. This
        object mirrors the structure of the list of list of residue names.
    """

    residues = structure.get_residues_by_connectivity(st)
    residues = iter(residues)
    cur_res = next(residues)
    all_seq = [[]]
    all_struc = [[]]
    for next_res in residues:
        resname = cur_res.pdbres.strip()
        all_seq[-1].append(resname)
        all_struc[-1].append(cur_res)
        if not cur_res.isConnectedToResidue(next_res):
            all_seq.append([])
            all_struc.append([])
        cur_res = next_res
    resname = cur_res.pdbres.strip()
    all_seq[-1].append(resname)
    all_struc[-1].append(cur_res)
    return all_seq, all_struc


def _smooth_grid_vals(vpos, apos, aval, wfcn='ele', rcutoff=10.0, verbose=True):
    """
    wrapper function to call smooth_grid_values in msurfext -> ensures that all
    parameters are set properly.

    :param vpos: input vertex coordinates (x,y,z for each entry is expected)
    :type  vpos: list/tuple
    :param apos: input atom positions (x,y,z format)
    :type  apos: list/tuple
    :param aval: atom-specific values (e.g. charges, hydrophobicity)
    :type  aval: list/tuple

    :param wfcn: specify the type of smoothing - ('ele' for partial charge
                 smoothing, 'lipo' for lipophilic data smoothing)
    :type  wfcn: string
    :param rcutoff: distance radius - specify, how far out the atom value
                    should be "felt". Change only, if you know what you
                    are doing.
    :type  rcutoff: float
    :param verbose: verbosity flag

    :rtype:  list
    :return: list of length vpos with smoothed values
    """

    t0 = time.time()
    if verbose:
        print("smoothing grid values:  %s (%i vtx %i atm) .... " % (          \
                                                wfcn, len(vpos), len(apos)))

    if len(vpos) < 3:
        raise ValueError("smoothing: invalid vertex position input!")

    if not all((True if a and len(a) == 3 else False for a in vpos)):
        raise ValueError("smoothing: invalid vertex position input!")

    if not apos or len(apos) < 1:
        raise ValueError("smoothing: invalid atom position input!")
    if not all((a and len(a) == 3 for a in apos)):
        raise ValueError("smoothing: invalid atom position input!")

    if not aval or len(apos) != len(aval):
        raise ValueError("smoothing: invalid atom values!")

    sm_vals = mext.smooth_grid_values(vpos,
                                      apos,
                                      aval,
                                      wfcn=wfcn,
                                      rcutoff=rcutoff)
    if verbose:
        print("...done. %.4f seconds." % (time.time() - t0))

    return sm_vals


def _set_vertex_colors(vtxvals, clrscheme=DFLTCLRS, clrvals=DFLTVALS):
    """
    return for each vertex point a corresponding vertex color based on the
    smoothed vertex values.

    :param vtxvals: smoothed vertex values
    :type  vtxvals: list/tuple

    :param clrcheme: list/tuple of reference colors (RGB for each entry)
    :param clrvals:  list/tuple of reference values
    """

    if len(clrvals) < 2 or len(clrscheme) != len(clrvals):
        raise ValueError("color smooth: invalid color range!")

    if len(clrscheme) and not all([a and len(a) == 3 for a in clrscheme]):
        raise ValueError("color smooth: invalid color scheme!")

    clrs = mext.clr_at_value(clrvals, clrscheme, vtxvals)

    return clrs


[docs]def triangle_area(coords):
    """
    Calculate the area of a triangle defined by three vertices

    :param coords: A list of (x, y, z) coordinates for each vertex of the
        triangle, where each coordinate is a numpy array.
    :type coords: list

    :return: The area of the specified triangle
    :rtype: float

    :note: The formula used here is taken from
        http://mathworld.wolfram.com/TriangleArea.html
    """

    a, b, c = list(map(numpy.array, coords))
    cross = numpy.cross(c - a, c - b)
    return 0.5 * numpy.linalg.norm(cross)


[docs]class Patch(object):
    """
    Data about a single patch.
    """

    RES_TYPES = OrderedDict((("Arginine", "ARG"), ("Cysteine", "CYS"),
                             ("Tryptophan", "TRP"), ("Tyrosine", "TYR")))

[docs]    def __init__(self, vertices, patch_type, vertex_coords, all_smoothed,
                 neighboring_triangles, neighboring_vertices,
                 nearest_atom_indices, patch_color):
        """
        :param vertices: A set of all vertices that belong to this patch
        :type vertices: set

        :param patch_type: The type of patch this is
        :type patch_type: `settings.PatchType`

        :param vertex_coords: The (x, y, z) coordinates for each surface vertex
        :type vertex_coords: list

        :param all_smoothed: The smoothed value for each vertex
        :type all_smoothed: list

        :param neighboring_triangles: A list of neighboring triangles for each
            vertex, where each triangle is represented as a frozenset of three
            vertex indices.
        :type neighboring_triangles: list

        :param neighboring_vertices: A list of neighboring vertices for each
            vertex.  Neighbors are given as a set.
        :type neighboring_vertices: list

        :param nearest_atom_indices: A list of the nearest atom index for each
            vertex.
        :type nearest_atom_indices: list

        @patch_color: color of the patch (r,g,b)

        """

        self.residues = []  # ordered list of residues
        self.contribution = []  # residue contribution to the patch
        self.seg = []  # nr atoms per residue contributing
        self.patch_atms = []  # atom indices of each residue
        self.type = patch_type
        self.vertices = vertices
        self.nearest_atom_indices = {nearest_atom_indices[i] for i in vertices}

        self.size = self._calcSize(vertices, vertex_coords,
                                   neighboring_triangles)
        self.label_vertex = self._calcLabelVertex(vertices, vertex_coords,
                                                  neighboring_vertices)

        self._calcSmoothedStats(all_smoothed, vertices)

        vtxVals = [all_smoothed[i] for i in vertices]

        if patch_type is PatchType.negative:
            vtxVals = [a * -1.0 for a in vtxVals]
        self.vtxvals = vtxVals
        self.visible = True
        self.num = None

    def _calcSize(self, vertices, vertex_coords, neighboring_triangles):
        """
        Calculate the surface area of the patch.

        :param vertices: A set of all vertices that belong to this patch
        :type vertices: set

        :param vertex_coords: The (x, y, z) coordinates for each surface vertex
        :type vertex_coords: list

        :param neighboring_triangles: A list of neighboring triangles for each
            vertex, where each triangle is represented as a frozenset of three
            vertex indices.
        :type neighboring_triangles:

        :return: The surface area of the patch
        :rtype: float
        """

        triangles = set()
        for vertex in vertices:
            for cur_triangle in neighboring_triangles[vertex]:
                if vertices.issuperset(cur_triangle):
                    triangles.add(cur_triangle)

        size = 0
        for cur_triangle in triangles:
            coords = [vertex_coords[i] for i in cur_triangle]
            size += triangle_area(coords)
        return size

    def _calcLabelVertex(self, vertices, vertex_coords, neighboring_vertices):
        """
        Calculate the central vertex of the patch, which is the vertex to label
        in the workspace.  Rather than using graph eccentricity, we define the
        central vertex as the vertex with the minimum Euclidean distance to
        the farthest patch edge vertex.

        :param vertices: A set of all vertices that belong to this patch
        :type vertices: set

        :param vertex_coords: The (x, y, z) coordinates for each surface vertex
        :type vertex_coords: list

        :param neighboring_vertices: A list of neighboring vertices for each
            vertex.  Neighbors are given as a set.
        :type neighboring_vertices: list

        :return: The central vertex index
        :rtype: int
        """

        outer_vertices = [
            cur_vertex for cur_vertex in vertices
            if not vertices.issuperset(neighboring_vertices[cur_vertex])
        ]
        outer_coords = [
            vertex_coords[cur_vertex] for cur_vertex in outer_vertices
        ]
        if not outer_coords:
            # If this patch doesn't have any defined edges, then return an
            # arbitrary vertex.  This can happen if there are holes in the
            # surface all around the patch.
            return next(iter(vertices))
        outer_coords = list(map(numpy.array, outer_coords))
        min_max_sq_dist = float("inf")
        central_vertex = -1
        for cur_vertex in vertices:
            cur_coords = numpy.array(vertex_coords[cur_vertex])
            squares = (outer_coords - cur_coords)**2
            sq_dist = squares.sum(axis=1)
            max_sq_dist = sq_dist.max()
            if max_sq_dist < min_max_sq_dist:
                min_max_sq_dist = max_sq_dist
                central_vertex = cur_vertex
        return central_vertex

    def _calcSmoothedStats(self, all_smoothed, vertices):
        """
        Calculate the minimum, maximum, and average value for patch vertices.
        The results will be stored as `self.min`, `self.max`, and
        `self.average`.

        :param all_smoothed: The smoothed value for each vertex
        :type all_smoothed: list

        :param vertices: A set of all vertices that belong to this patch
        :type vertices: set
        """

        smoothed = [all_smoothed[i] for i in vertices]
        if self.type is PatchType.negative:
            # We want max to be the most extreme value, so swap min and max for
            # negative patches
            self.min = max(smoothed)
            self.max = min(smoothed)
        else:
            self.min = min(smoothed)
            self.max = max(smoothed)
        self.average = old_div(sum(smoothed), len(smoothed))

    def __eq__(self, other):
        return self.num == other.num

[docs]    def detailsData(self):
        """
        Generate a list describing the details of this patch

        :return: A list of details for the patch, where each detail is a list of
            [group, category, patch details for that category]
        :rtype: list
        """
        data = OrderedDict()
        data["Patch Properties"] = [
            ["Type", self.type.long_name], ["Size",
                                            "%.2f Å²" % self.size],
            ['Score', "%.1f kcal/mol" % self.energy],
            ['Intensity',
             "%.3f" % (old_div(self.energy, self.size))]
        ]
        group = "Surface-Exposed Critical Residues"
        data[group] = []
        for long_name, short_name in self.RES_TYPES.items():
            label = "%ss" % long_name
            res_list = (res for res in self.residues
                        if res.resname.strip() == short_name)
            value = self._resListText(res_list)
            data[group].append([label, value])

        res_list = (res for res in self.residues if res.disulfide)
        value = self._resListText(res_list)
        data[group].append(["Disulfides", value])

        reactive_types = list(self.residues[0].reactive_type)
        group = "Reactive Residues"
        data[group] = []
        for site_name in reactive_types:
            label = site_name.capitalize()
            res_list = (
                res for res in self.residues if res.reactive_type[site_name])
            value = self._resListText(res_list)
            data[group].append([label, value])
        data["Patch Residues"] = [[
            "Residues", "; ".join(res.fullname for res in self.residues)
        ]]
        for data_group, rows in data.items():
            for idx in range(len(rows)):
                data[data_group][idx][0] += ":"
        return data

    def _resListText(self, res_list):
        """
        Generate a string describing the specified list of residues.  The
        percentage of side chain accessibility will be listed for each residue.
        "None" will be returned if there are no residues.

        :param res_list: A list of `ResInfo` objects to describe
        :type res_list: list

        :return: A string describing the of residues.
        :rtype: str
        """
        # The unicode non-breaking space \xa0 is used here to better format
        # the "Details" dialog of the Patch Analysis panel.
        res_desc = [
            "%s\xa0(%i%%\xa0SASA)" % (res.fullname, round(res.accessibility))
            for res in res_list
        ]
        if res_desc:
            return "; ".join(res_desc)
        else:
            return "None"


[docs]@functools.total_ordering
class ResInfo(object):
    """
    Data about a single residue.  Includes aggregation, surface area, and
    reactive residue grouping data.
    """

[docs]    def __init__(self, res, zyggregator=None, aggrescan=None, aggscore=0.0):
        """
        :param res: The residue that this aggregation data describes
        :type res: `schrodinger.structure._Residue`

        :param zyggregator: The Zyggregator score for the residue
        :type zyggregator: float

        :param aggrescan: The Aggrescan score for the residue
        :type aggrescan: float
        """

        self.chain = res.chain
        self.resnum = res.resnum
        self.inscode = res.inscode
        self.resname = res.pdbres
        self.zyggregator = zyggregator
        self.aggrescan = aggrescan
        self.aggscore = aggscore
        self.sasa = 0
        self.side_chain_sasa = 0
        self.accessibility = None
        self.reactive_type = None
        self.disulfide = None
        self.topography = None

    @property
    def fullname(self):
        """
        The full residue name, formatted similar to A:TYR100.
        """

        chain = self.chain
        if chain == " ":
            chain = ""
        else:
            chain += ":"
        resname = self.resname.strip()
        inscode = self.inscode
        if inscode == " ":
            inscode = ""
        return "%s%s%i%s" % (chain, resname, self.resnum, inscode)

[docs]    def getIdStr(self):
        """
        Return a string ID for this residue, e.g. "A:2b" - same format as
        structure._Residue.___str__() return value.
        """
        chain = self.chain
        if chain == " ":
            chain = "_"
        res_str = "%s:%d" % (chain, self.resnum)
        if self.inscode != " ":
            res_str += self.inscode
        return res_str

    def __eq__(self, other):
        return (self.chain, self.resnum,
                self.inscode) == (other.chain, other.resnum, other.inscode)

    def __lt__(self, other):
        return (self.chain, self.resnum,
                self.inscode) < (other.chain, other.resnum, other.inscode)

    def __hash__(self):
        return hash((self.chain, self.resnum, self.inscode))

[docs]    def getAsl(self):
        """
        Return the ASL for finding this residue by chain/resnum/inscode.
        """

        return '(chain.name "{}" AND res.num {} AND res.inscode "{}")'.format(
            self.chain, self.resnum, self.inscode)


# This makes it possible to unpickle state saved by 18-1 release:
ResData = ResInfo


[docs]class ResPatchData(object):
    """
    This class represents a residue's contribution to a patch.
    """

[docs]    def __init__(self, patch, res_info, contribution):
        self.patch = patch
        self.res_info = res_info
        self.contribution = contribution

[docs]    def getAsl(self):
        """
        Return the ASL for finding this patch's residue by chain/resnum/inscode.
        """
        return self.res_info.getAsl()

[docs]    def getIdStr(self):
        """
        Return a string ID for the residue, e.g. "A:2b" - same format as
        structure._Residue.___str__() return value.
        """

        return self.res_info.getIdStr()


[docs]class ProteinProperties(object):
    """
    Calculates protein-level properties.
    Properties will be stored at the ct-level and returned for display on the Properties tab

    :cvar KYTE_DOOLITTLE_SCALE: A hydrophobicity scale taken from
        A simple method for displaying the hydropathic character of a protein.
        J. Kyte, R.F. Doolittle, J Mol Biol. 1982 May 5;157(1):105-32.
    :vartype KYTE_DOOLITTLE_SCALE: dict
    """

    KYTE_DOOLITTLE_SCALE = {
        'ALA': 1.80,
        'CYS': 2.50,
        'ASP': -3.50,
        'GLU': -3.50,
        'PHE': 2.80,
        'GLY': -0.40,
        'HIS': -3.20,
        'HIP': -3.20,
        'HIE': -3.20,
        'HID': -3.20,
        'ILE': 4.50,
        'LYS': -3.90,
        'LEU': 3.80,
        'MET': 1.90,
        'ASN': -3.50,
        'PRO': -1.60,
        'GLN': -3.50,
        'ARG': -4.50,
        'SER': -0.80,
        'THR': -0.70,
        'VAL': 4.20,
        'TRP': -0.90,
        'TYR': -1.30
    }

[docs]    def __init__(self, struc, asa_by_atom):
        """
        :param struc: The structure to calculate the properties of
        :type struc: `schrodinger.structure.Structure`

        :param asa_by_atom: The accessible surface area of each atom
        :type asa_by_atom: `OneIndexedList`
        """

        self.charge = sum(atom.formal_charge for atom in struc.atom)
        self.weight = sum(atom.atomic_weight for atom in struc.atom)
        self.hydrophobic_moment = self._calcHydrophobicMoment(struc)
        self.positive, self.negative = self._calcPosNegSurfaceArea(
            struc, asa_by_atom)
        self.acceptor, self.donor = self._calcHBondSurfaceArea(
            struc, asa_by_atom)

    def _calcHydrophobicMoment(self, struc):
        """
        Calculate the hydrophobic moment

        :param struc: The structure to calculate the hydrophobic moment of
        :type struc: `schrodinger.structure.Structure`

        :return: The hydrophobic moment
        :rtype: float

        :note: This implementation is based on calc_protein_descriptors in
            psp-src/python.modules/pro_descriptors.py.
        """

        center_of_mass = analyze.center_of_mass(struc)
        moment_vec = numpy.array([0.0, 0.0, 0.0])
        for atom in struc.atom:
            if atom.pdbname.strip() != "CA":
                continue
            resname = atom.pdbres.strip()
            try:
                hydrophobicity = self.KYTE_DOOLITTLE_SCALE[resname]
            except KeyError:
                # Ignore non-standard residues, since we don't know their
                # hydrophobicity
                continue
            else:
                moment_vec += (atom.xyz - center_of_mass) * hydrophobicity
        moment = (moment_vec**2).sum()
        moment = numpy.sqrt(moment)
        return moment

    def _calcPosNegSurfaceArea(self, struc, asa_by_atom):
        """
        Calculate the surface area for positively and negatively charged atoms

        :param struc: The structure to calculate the surface areas of
        :type struc: `schrodinger.structure.Structure`

        :param asa_by_atom: The accessible surface area of each atom
        :type asa_by_atom: `OneIndexedList`

        :return: A tuple of
              - The surface area for positively charged atoms (float)
              - The surface area for negatively charged atoms (float)
        :rtype: tuple
        """

        positive = negative = 0
        for atom, asa in zip(struc.atom, asa_by_atom):
            charge = atom.partial_charge
            if charge > 0:
                positive += asa
            elif charge < 0:
                negative += asa
        return positive, negative

    def _calcHBondSurfaceArea(self, struc, asa_by_atom):
        """
        Calculate the surface area for hydrogen bond acceptors and donors

        :param struc: The structure to calculate the surface areas of
        :type struc: `schrodinger.structure.Structure`

        :param asa_by_atom: The accessible surface area of each atom
        :type asa_by_atom: `OneIndexedList`

        :return: A tuple of
              - The surface area for hydrogen bond acceptors (float)
              - The surface area for hydrogren bond donors (float)
        :rtype: tuple
        """

        donor_handle, acceptor_handle = mm.mmct_hbond_get_participants(struc)

        # For hydrogen bond donors, the heavy atom bound to the hydrogen is
        # considered a participant of the H-bond. Such atoms are repeated in
        # this list if they have more than one hydrogen on it. Eg. tertiary
        # amide N will appear 3 times in this list, once for each H on it.
        # For this reason, remove duplicates by converting to a set:
        donors = set(list(mmlist.mmlist(donor_handle)))

        acceptors = list(mmlist.mmlist(acceptor_handle))

        # Verify that the acceptor atoms are all unique:
        assert len(acceptors) == len(set(acceptors))

        acceptor_sa = sum(asa_by_atom[i] for i in acceptors)
        donor_sa = sum(asa_by_atom[i] for i in donors)

        return acceptor_sa, donor_sa


[docs]class PatchAnalysis(object):
    """
    Object representing a pre-analysis instance, which is saved by the backend
    and loaded into the GUI.  It is also saved/restored when panel state is
    saved and restored.
    """

[docs]    def __init__(self,
                 st,
                 surf,
                 asl,
                 clogp_smoothed,
                 partial_charge_smoothed,
                 neighboring_vertices,
                 neighboring_triangles,
                 res_data_by_vertex,
                 prot_properties,
                 topo_data=None):
        """
        :param st: Analyzed structure.
        :type st: `structure.Structure`

        :param surf: The surface to find patches on
        :type surf: `schrodinger.surface.Surface`

        :param asl: Asl for atoms that were analyzed
        :type asl: str

        :param clogp_smoothed: The cLogP (hydrophobicity) values smoothed over
            the surface.  This list contains the smoothed value at each surface
            vertex.
        :type clogp_smoothed: list

        :param partial_charge_smoothed: The partial charge values smoothed over
            the surface.  This list contains the smoothed value at each surface
            vertex.
        :type partial_charge_smoothed: list

        :param neighboring_vertices: A list of neighboring vertices for each
            vertex.
        :type neighboring_vertices: list

        :param neighboring_triangles: A list of neighboring triangles for each
            vertex, where each triangle is represented as a frozenset of three
            vertex indices.
        :type neighboring_triangles: list

        :param res_data_by_vertex: A list of residue data for each vertex.  The
            `ResInfo` object is given for the residue closest to the vertex.
        :type res_data_by_vertex: list

        :param prot_properties: The protein properties to be loaded into the
            Properties tab.
        :type prot_properties: `ProteinProperties`

        :param topo_data: The topography value of all the residues in the
            structure.
        :type topo_data: dict(str, str)
        """
        self.st = st
        self.surf = surf
        self.asl = asl
        self.clogp_smoothed = clogp_smoothed
        self.partial_charge_smoothed = partial_charge_smoothed
        self.neighboring_vertices = neighboring_vertices
        self.neighboring_triangles = neighboring_triangles
        self.res_data_by_vertex = res_data_by_vertex
        self.prot_properties = prot_properties

        # For backward compatibility with loading psazip files into the panel
        if topo_data is not None:
            self.topo_data = topo_data
        else:
            pre_analyzer = PreAnalyzer(st, None)
            self.topo_data = pre_analyzer.calcAntibodyTopography({})

        # When restoring state from 18-1 release, add aggscore atribute, which
        # was not being written then. This work-around is needed to avoid
        # creating ResInfo objects without this attribute.
        for res in self.res_data_by_vertex:
            # Skip the deleted residues
            if res and not hasattr(res, 'aggscore'):
                res.aggscore = 0.0

[docs]    def write(self, basename):
        self.st.write(basename + '.maegz')
        self.surf.write(basename + '.vis')
        write_asl_file(basename, self.asl)
        data = (self.clogp_smoothed, self.partial_charge_smoothed,
                self.neighboring_vertices, self.neighboring_triangles,
                self.res_data_by_vertex, self.prot_properties, self.topo_data)
        with open(basename + ".pkl", "wb") as pkl_out:
            pickle.dump(data, pkl_out, protocol=2)

[docs]    @classmethod
    def read(cls, basename):
        """
        Read the `PreAnalyzer` data from the specified basename and use it to
        create a `PatchFinder` object.

        :param basename: The basename for the `PreAnalyzer` output files
        :type basename: str
        """

        st = structure.Structure.read(basename + '.maegz')
        surf = surface.Surface.read(basename + ".vis")
        asl = read_asl_file(basename)
        with open(basename + ".pkl", "rb") as pkl_in:
            # latin encoding allows loading files written in Py2 (2018-1rel)
            data = pickle.load(pkl_in, encoding='latin1')
        return cls(st, surf, asl, *data)


[docs]class PreAnalyzer(object):
    """
    Perform patch finding calculations that only need to be done once per
    structure.  This class is intended to be run under job control.
    """

[docs]    def __init__(self, struc, asl):
        """
        :param struc: The structure to analyze
        :type struc: `schrodinger.structure.Structure`

        :param asl: An ASL describing atoms of `struc` to analyze
        :type asl: str
        """

        self._struc = struc
        self._asl = asl
        self._surf = None
        self._clogp_smoothed = None
        self._partial_charge_smoothed = None
        self._neighboring_vertices = None
        self._neighboring_triangles = None
        self._res_data_by_vertex = None
        self._properties = None
        self._topo_data = None

[docs]    @classmethod
    def readAndRun(cls, basename):
        """
        Read a structure and ASL from the specified files and run the analysis.

        :param basename: The full path to the file to analyzes except the
            ".maegz" and ".txt" extensions
        :type basename: str

        :return: The completed pre-analysis
        :rtype: `PreAnalyzer`
        """

        struc = structure.Structure.read(basename + ".maegz")
        asl = read_asl_file(basename)
        pre_analyzer = cls(struc, asl)
        pre_analyzer.run()
        return pre_analyzer

[docs]    def run(self):
        """
        Perform the pre-analysis.  The results will be stored in instance
        attributes and can be written to a pickle file using `write`.
        """

        self._prepareStructure()
        self._prepareSurface()
        self._res_data_by_vertex, asa_by_atom, self._topo_data = \
            self._preCalcResData()
        self._properties = ProteinProperties(self._struc, asa_by_atom)

    def _prepareStructure(self):
        """
        Perform necessary calculations on the structure.  The results will be
        stored in atom-level properties.
        """

        self._calcCLogP()
        self._calcPartialCharges()

    def _calcCLogP(self):
        """
        Calculate cLogP, the partition coefficient, for the entire structure.
        This measures hydrophobicity.  Results will be stored in the
        `CLOGP_PROP` atom-level property.
        """

        calc = additivedescriptors.LogP()
        calc.getScore(self._struc, ct_property=CLOGP_PROP)

    def _calcPartialCharges(self):
        """
        Calculate the partial charge on each atom.  Results will be stored in
        the `PARTIAL_CHARGE_PROP` atom-level property.
        """

        mm.mmffld_initialize(mm.MMERR_DEFAULT_HANDLER)
        ff_handle = mm.mmffld_new(14)
        mm.mmffld_enterMol(ff_handle, self._struc)
        mm.mmffld_deleteMol(ff_handle, self._struc)
        mm.mmffld_delete(ff_handle)
        mm.mmffld_terminate()

    def _prepareSurface(self):
        """
        Generate the surface and perform necessary calculations on it.  The
        results will be stored in instance attributes.
        """

        # A copy of input structure with non-protein atoms deleted.
        # This is used to create surfaces while ignoring non-protein atoms
        # so as to show the waters/ligands/DNA appearing outside of the surface
        # rather than holes (caused due to backend deleting non-protein atoms as well)
        struc_copy = self._struc.copy()
        sel_atoms = analyze.evaluate_asl(struc_copy, self._asl)
        non_prot_atoms = analyze.evaluate_asl(struc_copy, 'not protein')
        renumber_map = struc_copy.deleteAtoms(non_prot_atoms, renumber_map=True)
        sel_atoms = [
            renumber_map[atom]
            for atom in sel_atoms
            if renumber_map[atom] is not None
        ]

        self._surf = surface.Surface.newMolecularSurface(struc_copy,
                                                         SURF_NAME,
                                                         atoms=sel_atoms)
        self._surf.setTransparency(15)
        self._calculateSurfaceNeighbors()
        self._calcSmoothedValues()

    def _calculateSurfaceNeighbors(self):
        """
        For each vertex on the surface, calculate all neighboring triangles and
        store the results in `self._neighboring_vertices`.
        """

        self._neighboring_vertices = [
            set() for _ in range(self._surf.vertex_count)
        ]
        self._neighboring_triangles = [
            [] for _ in range(self._surf.vertex_count)
        ]
        for vertices in self._surf.patch_vertices:
            vertex_set = frozenset(vertices)
            for vertex in vertices:
                self._neighboring_triangles[vertex].append(vertex_set)
                neighbors = vertex_set - {vertex}
                self._neighboring_vertices[vertex].update(neighbors)

    def _calcSmoothedValues(self):
        """
        Smooth the specified atom-level charges for partial charges and
        slogP-values over the surface.

        """

        KBOLTZ = 0.00198722  # Boltzman conatant
        T = 300.0  # temperature
        LN10 = 2.30259  # log 10

        ct = self._struc
        atms = [a for a in ct.atom]
        slogp = [a.property.get(CLOGP_PROP, 0.0) for a in atms]
        slogp = [a * KBOLTZ * T * LN10 for a in slogp]

        elev = [a.property.get(PARTIAL_CHARGE_PROP, 0.0) for a in atms]
        elev = [a * KBOLTZ * T * LN10 for a in elev]

        apos = [a.xyz for a in atms]
        vpos = self._surf.vertex_coords
        vpos = [list(a) for a in vpos]

        val = _smooth_grid_vals(vpos, apos, slogp, wfcn='lipo', rcutoff=5.0)
        self._clogp_smoothed = val

        val = _smooth_grid_vals(vpos, apos, elev, wfcn='ele', rcutoff=10.0)
        self._partial_charge_smoothed = val

    def _preCalcResData(self):
        """
        Calculate the aggregation, surface area, and reactive residue grouping
        data for each residue.

        :return: A tuple of: (1) A list of residue data for each vertex. The
            `ResInfo` object is given for the residue closest to the vertex.
            (2) A list of the solvent accessible surface area for each atom
            (`OneIndexedList`) (3) A dict of residue ids and topography value
            of all the residues in the structure.
        :rtype: tuple
        """
        all_seq, all_struc = get_connected_components(self._struc)
        res_data_by_res = self._calcAggregation(all_seq, all_struc)

        # dict where keys are atom indices and values are ResidueInfo objects:
        res_info_by_atom = {}
        for res in self._struc.residue:
            for atom in res.atom:
                res_info_by_atom[atom.index] = res_data_by_res[res]

        asa_by_atom = self._calcSasa(res_info_by_atom)
        self._calcAccessibility(res_data_by_res)
        self._calcReactiveResidues(res_data_by_res, asa_by_atom, all_struc)
        self._calcDisulfides(res_data_by_res)
        topo_data = self.calcAntibodyTopography(res_data_by_res)
        res_data_by_vertex = self._resDataByVertex(res_info_by_atom)
        return res_data_by_vertex, asa_by_atom, topo_data

    def _calcAggregation(self, all_seq, all_struc):
        """
        Calculate the Zyggregator and Aggrescan values for each residue.

        :param all_seq: A list of list of residue names (str).  Each list of
            residue names represents a single covalently connected component.
        :type all_seq: list

        :param all_struc: A list of list of `schrodinger.structure._Residue`
            objects.  This object mirrors the structure of the list of list of
            residue names.
        :type all_struc: list

        :return: A dictionary of `ResInfo` objects containing aggregation data.
            Keys are `_Residue` objects.
        :rtype: dict
        """

        agg_by_res = {}
        for cur_seq, cur_struc in zip(all_seq, all_struc):
            if len(cur_seq) >= 5:
                zygg = self._calcZyggregator(cur_seq)
                aggr = self._calcAggrescan(cur_seq)

                # adjustment to the Zyggregator profile based on the aggrescan
                # 0 cutoff to scale to the same universe

                if zygg[0] and aggr[0]:
                    cutoff = ZYGG_ADD
                    aggp = []
                    for i, v in enumerate(aggr):
                        if not i:
                            continue
                        if (aggr[i - 1] > 0.0 and v < 0.0) or                 \
                                        (v > 0.0 and aggr[i - 1] < 0.0):
                            aggp += [zygg[i - 1], zygg[i]]

                    if aggp:
                        cutoff = old_div(sum(aggp), float(len(aggp)))
                        cutoff += 0.35  # extra cushion, see paper

                        # due to charges at the N/C termini we believe that there
                        # is no APR propensity within the first/last 5 residues

                    lz = len(zygg)
                    x = list(range(5)) + list(range(lz - 5, lz))
                    zygg = [a - cutoff for a in zygg]
                    for i in x:
                        zygg[i] = min(zygg[i], 0.15)
            else:
                # Zyggregator and Aggrescan require at least five covalently
                # connected residues for scoring
                zygg = aggr = [None] * len(cur_seq)
            for res, cur_zygg, cur_aggr in zip(cur_struc, zygg, aggr):
                agg_by_res[res] = ResInfo(res, cur_zygg, cur_aggr)
        return agg_by_res

    def _resDataByVertex(self, res_info_by_atom):
        """
        Create a list of the `ResInfo` object corresponding to each surface
        surface.

        :param res_info_by_atom: A dictionary of `ResInfo` objects
        :type res_info_by_atom: dict

        :return: A list of `ResInfo` objects
        :rtype: list
        """

        res_data_by_vertex = []
        for atom_index in self._surf.nearest_atom_indices:
            if atom_index == -1:
                val = None
            else:
                val = res_info_by_atom[atom_index]

            res_data_by_vertex.append(val)
        return res_data_by_vertex

    def _calcZyggregator(self, seq):
        """
        Calculate the Zyggregator aggregation score for the specified residues

        :param seq: A list of residue name for a single covalently connected
            segment.
        :type seq: list

        :return: A list of the Zyggregator score for each residue.
        :rtype: list
        """

        charge = sum(
            atom.property[PARTIAL_CHARGE_PROP] for atom in self._struc.atom)

        try:
            p_agg, z_agg, pval, zperc75 = \
            agg_profiles.calc_zygg_profile(seq, charge=charge)
            # pval is a list of the Zyggregator scores, one value per residue.

            return pval

        except (TypeError, ValueError):
            return [None for a in seq]

    def _calcAggrescan(self, seq):
        """
        Calculate the Aggrescan aggregation score for the specified residues

        :param seq: A list of residue name for a single covalently connected
            segment.
        :type seq: list

        :return: A list of the Aggrescan score for each residue.
        :rtype: list
        """

        try:
            a3vA, a4vS, a3v, a4v = agg_profiles.calc_aggrescan_profile(seq)
            return a4v
        except (TypeError, ValueError):
            return [None for a in seq]

    def _calcSasa(self, res_info_by_atom):
        """
        Calculate the solvent accessible surface area for each atom and residue.

        :param res_info_by_atom: A dictionary of `ResInfo` objects. The values
            of this dictionary will be updated with the calculated accessible
            side chain surface area.
        :type res_info_by_atom: dict

        :return: A `OneIndexedList` of solvent accessible surface area for
            each atom in the structure
        """

        BACKBONE_ATOMS = {"CA", "C", "O", "N"}
        asa_by_atom = [0] * self._struc.atom_total
        asa_by_atom = OneIndexedList(asa_by_atom)
        vertex_coords = self._surf.vertex_coords
        nearest = self._surf.nearest_atom_indices
        for vertex_nums in self._surf.patch_vertices:
            cur_coords = [vertex_coords[i] for i in vertex_nums]
            area = triangle_area(cur_coords)
            per_vertex_area = old_div(area, 3.0)
            for cur_vertex in vertex_nums:
                cur_atom_index = nearest[cur_vertex]
                if cur_atom_index == -1:
                    # This happens with occasional vertices when using an ASL
                    continue
                asa_by_atom[cur_atom_index] += per_vertex_area
                atom_obj = self._struc.atom[cur_atom_index]
                res_info = res_info_by_atom[cur_atom_index]
                res_info.sasa += per_vertex_area
                if atom_obj.pdbname.strip() not in BACKBONE_ATOMS:
                    res_info.side_chain_sasa += per_vertex_area
        return asa_by_atom

    def _calcAccessibility(self, res_data_by_res):
        """
        Calculate side chain accessibility (side chain surface area divided by
        the maximum possible side chain surface area for that residue type).

        :param res_data_by_res: A dictionary of `ResInfo` objects. The values
            of this dictionary will be updated with the calculated accessibility
            values.
        :type res_data_by_res: dict
        """

        side_chain_acc = structure_utils.calculate_sasa_dict()
        for res in res_data_by_res.values():
            try:
                full_acc = side_chain_acc[res.resname.strip()]
            except KeyError:
                continue
            else:
                res.accessibility = (old_div(res.side_chain_sasa,
                                             full_acc)) * 100

    def _calcReactiveResidues(self, res_data_by_res, asa_by_atom, all_struc):
        """
        Determine which residues match the reactive residue sites.  Reactive
        residue definitions are read from the user's profile, so any changes
        saved via other panels will be picked up and used here.

        :param res_data_by_res: A dictionary of `ResInfo` objects. The
            reactive_type dictionary will be created and populated for each
            `ResInfo` object.
        :type res_data_by_res: dict

        :param asa_by_atom: A list of solvent accessible surface area for each
            atom in the structure
        :type asa_by_atom: `OneIndexedList`

        :param all_struc: A list of list of `schrodinger.structure._Residue`
            objects.  Each list of residues represents a single covalently connected
            component.
        :type all_struc: list
        """

        patterns = find_gui.readMSVPatterns()
        seq_dicts = self._genSeqDictsForReactiveResiduesSearch(
            asa_by_atom, all_struc)
        all_pattern_names = [cur_pattern[0] for cur_pattern in patterns]
        for res in res_data_by_res.values():
            # Initialize the reactive_type dictionary for each ResInfo object
            res.reactive_type = OrderedDict(
                (name, False) for name in all_pattern_names)
        for cur_pattern in patterns:
            pattern_name, pattern_def, hotspot, _ = cur_pattern
            # Note that hotspot is always a string.  It's either a string
            # containing an integer or a string containing "None".
            pattern_results = sequence.find_generalized_pattern(
                seq_dicts, pattern_def)
            if not pattern_results:
                # The pattern definition was invalid
                continue
            for results_for_seq, cur_res_list in zip(pattern_results,
                                                     all_struc):
                for start, end in results_for_seq:
                    if hotspot == "None":
                        to_mark = list(range(start, end + 1))
                    else:
                        to_mark = [start + int(hotspot) - 1]
                    for i in to_mark:
                        cur_res = cur_res_list[i]
                        cur_data = res_data_by_res[cur_res]
                        cur_data.reactive_type[pattern_name] = True

    def _genSeqDictsForReactiveResiduesSearch(self, asa_by_atom, all_struc):
        """
        Create the sequence dictionaries required for find_generalized_pattern()

        :param asa_by_atom: A list of solvent accessible surface area for each
            atom in the structure
        :type asa_by_atom: `OneIndexedList`

        :param all_struc: A list of list of `schrodinger.structure._Residue`
            objects.  Each list of residues represents a single covalently connected
            component.
        :type all_struc: list

        :return: The sequence dictionaries required for
            find_generalized_pattern()
        :rtype: list
        """

        seq_objs = []
        for cur_res_list in all_struc:
            atom_indices = []
            for res in cur_res_list:
                atom_indices.extend(res.getAtomIndices())
            seq_objs.append(
                sequence.StructureSequence(self._struc, atom_indices))
        # Convert OneIndexedList to zero-indexed list
        asa_by_atom = list(asa_by_atom)
        seq_dicts = [
            sequence.convert_structure_sequence_for_pattern_search(
                cur_seq_obj, asa_by_atom) for cur_seq_obj in seq_objs
        ]
        return seq_dicts

    def _calcDisulfides(self, res_data_by_res):
        """
        Determine which residues have a disulfide bond.

        :param res_data_by_res: A dictionary of `ResInfo` objects. The
            disulfide attribute (Boolean) will be set for each `ResInfo` object.
        :type res_data_by_res: dict
        """

        for res, res_info in res_data_by_res.items():
            res_info.disulfide = self._resHasDisulfide(res)

    def _resHasDisulfide(self, res):
        """
        Does the specified residue contain a disulfide bond?

        :param res: The residue to check
        :type res: `schrodinger.structure._Residue`

        :return: True if `res` contains a disulfide bond.  False otherwise.
        :rtype: bool
        """

        if not res.pdbres.strip().startswith("CY"):
            # This allows for the Amber/Charmm cysteine name variants such as
            # CYX
            return False
        sulfurs = [atom for atom in res.atom if atom.element == "S"]
        if len(sulfurs) != 1:
            return False
        for bound_atom in sulfurs[0].bonded_atoms:
            if (bound_atom.chain != res.chain or
                    bound_atom.resnum != res.resnum or
                    bound_atom.inscode != res.inscode):
                return True
        return False

[docs]    def calcAntibodyTopography(self, res_data_by_res):
        """
        Determine the antibody topography for each residue. For non-antibody
        structures, all values will be None. PANEL-7988

        :param res_data_by_res: A dictionary of `ResInfo` objects. The
            topography attribute (str) will be set for each `ResInfo` object.
        :type res_data_by_res: dict

        :return: Antibody topography data
        :rtype: dict(str, str)
        """

        # Annotate the input structure:
        curated_st = curate_antibody.curate_struct(self._struc)

        topo_data = {}
        if curated_st is not None:
            # If curated_st is None, the structure is not an antibody.
            for prop in ('s_bioluminateReadOnly_Heavy_Chain',
                         's_bioluminateReadOnly_Light_Chain'):
                try:
                    chain = curated_st.property[prop]
                except KeyError:
                    # If this is a single chain antibody, skip over the
                    # antibody chain that is not present
                    continue
                seq, resid_list = loop_utils.get_seq_resid(curated_st, chain)
                seq_regions = antibody.SeqRegions(seq).regions
                regions = {}
                for label in REGION_LABELS:
                    if label in seq_regions:
                        regions[label] = seq_regions[label]
                for ires, resid in enumerate(resid_list):
                    for name, (istart, iend) in regions.items():
                        if ires >= istart and ires <= iend:
                            topo_data[resid] = name
                            break
                    else:
                        topo_data[resid] = None

        for res, res_info in res_data_by_res.items():
            res_info.topography = topo_data.get(str(res))
        return topo_data

[docs]    def write(self, basename):
        """
        Write out the surface and all calculated values

        :param basename: The filename to write the output to.  The surface will
            be written to basename.vis and the calculated values will be written to
            basename.pkl.
        :type basename: str
        """
        self.getAnalysis().write(basename)

[docs]    def getAnalysis(self):
        return PatchAnalysis(
            self._struc, self._surf, self._asl, self._clogp_smoothed,
            self._partial_charge_smoothed, self._neighboring_vertices,
            self._neighboring_triangles, self._res_data_by_vertex,
            self._properties, self._topo_data)


[docs]class PatchFinder(object):
    """
    Find surface patches using the output of `PreAnalyzer`.
    """

[docs]    def __init__(self, analysis):
        """
        Class for grouping patches from a PatchAnalysis object based on
        specific settings.

        :param analysis: Pre-analysis object
        :type analysis: PatchAnalysis
        """

        self.st = analysis.st
        self.surf = analysis.surf
        self.asl = analysis.asl
        self._clogp_smoothed = analysis.clogp_smoothed
        self._partial_charge_smoothed = analysis.partial_charge_smoothed
        self._neighboring_vertices = analysis.neighboring_vertices
        self._neighboring_triangles = analysis.neighboring_triangles
        self._vertex_coords = self.surf.vertex_coords
        self._nearest_atom_indices = self.surf.nearest_atom_indices
        self._res_data_by_vertex = analysis.res_data_by_vertex
        self.prot_properties = analysis.prot_properties
        self.topo_data = analysis.topo_data
        _, res_lists = get_connected_components(self.st)
        self.res_lists = [map(str, res_list) for res_list in res_lists]

[docs]    @classmethod
    def read(cls, basename):
        """
        Read the `PreAnalyzer` data from the specified basename and use it to
        create a `PatchFinder` object.

        :param basename: The basename for the `PreAnalyzer` output files
        :type basename: str
        """

        analysis = PatchAnalysis.read(basename)

        return cls(analysis)

[docs]    def write(self, basename):
        analysis = PatchAnalysis(self.st, self.surf, self.asl,
                                 self._clogp_smoothed,
                                 self._partial_charge_smoothed,
                                 self._neighboring_vertices,
                                 self._neighboring_triangles,
                                 self._res_data_by_vertex, self.prot_properties)
        analysis.write(basename)

[docs]    def calculate(self, settings):
        """
        Calculate patches and residue aggregation data using the specified
        settings.

        :param settings: The settings to use for the patch calculations
        :type settings: settings.PatchSettings

        :return: A tuple of:
              - the calculated patches as a list of `Patch` objects
              - the residue aggregation data as a list of {ResidueAggData} objects
        :rtype: tuple
        """

        patches = self._calcPatches(settings)
        res_data = self._calcResData(patches)
        patches = self._filterAndNumberPatches(patches)
        self._calcAggScores(patches, res_data)
        self._annotateProperties(res_data)

        return patches, res_data

    def _calcAggScores(self, patches, res_data):
        """
        Calculate Agg scores, and set ResData.aggscore properties, and
        self.prot_properties.aggscore_sum property.

        :param patches: List of patches
        :type patches: list(patch_finder.Patch)

        :param res_data: List of residue info objects - each residue can
            appear multiple times, if it's present in multiple patches.
        :type res_data: list(ResInfo)
        """
        aggscore_by_res = ext_desc.calc_agg_scores(self.st, patches)

        for oneresdata in res_data:
            res_id = oneresdata.getIdStr()
            oneresdata.aggscore = aggscore_by_res.get(res_id, 0.0)

        self.prot_properties.aggscore_sum = sum(aggscore_by_res.values())

    def _annotateProperties(self, res_data):
        """
        Annotate the structure with the structure-level patch properties
        and the residue-level properties to the appropriate CA atom
        """

        # Annotate residue-level properties to CA atom
        res_props = [
            "zyggregator",
            "aggrescan",
            "aggscore",
            "sasa",
            "side_chain_sasa",
            # For now, we'll comment out non-float properties
            # we can enable these later if necessary but UX isnt currently obvious
            #"accessibility",
            #"reactive_type",
            #"disulfide",
            #"topography"
        ]
        for oneresdata in res_data:
            resasl = oneresdata.getAsl()
            caasl = f'(atom.ptype " CA ") AND ({resasl})'
            assert analyze.validate_asl(caasl)
            try:
                caai = analyze.evaluate_asl(self.st, caasl)[0]
            except IndexError:
                print("Skipping non AA residue..")
                continue
            caa = self.st.atom[caai]
            for prop in res_props:
                val = getattr(oneresdata, prop)
                if val is None:
                    val = 0.0
                caa.property[f"{RES_PROP_PREFIX}{prop}"] = val

        # Annotate structure-level properties
        propkeys = {
            'total_charge': self.prot_properties.charge,
            'atomic_weight': self.prot_properties.weight,
            'hydrophobic_moment': self.prot_properties.hydrophobic_moment,
            'positive_SA': self.prot_properties.positive,
            'negative_SA': self.prot_properties.negative,
            'acceptor_SA': self.prot_properties.acceptor,
            'donor_SA': self.prot_properties.donor,
            'Sum_AggScore': self.prot_properties.aggscore_sum
        }
        for suffix, val in propkeys.items():
            fullkey = PATCH_PROP_PREFIX + suffix
            self.st.property[fullkey] = val

    def _calcPatches(self, settings):
        """
        Calculate patches using the specified settings.

        :param settings: The settings to use for the patch calculations
        :type settings: `settings.PatchSettings`

        :return: A list of `Patch` objects
        :rtype: list
        """

        pos = self._calcPatchesFor(self._partial_charge_smoothed,
                                   settings.positive, operator.gt,
                                   PatchType.positive)
        neg = self._calcPatchesFor(self._partial_charge_smoothed,
                                   settings.negative, operator.lt,
                                   PatchType.negative)
        hydr = self._calcPatchesFor(self._clogp_smoothed, settings.hydrophobic,
                                    operator.gt, PatchType.hydrophobic)
        patches = pos + neg + hydr
        return patches

    def _calcPatchesFor(self, smoothed, settings, op, patch_type):
        """
        Calculate the patches using the specified data

        :param smoothed: The smoothed values to use for patch calculation
        :type smoothed: list

        :param settings: The settings to use for the patch calculations
        :type settings: `settings.PatchTypeSettings`

        :param op: An operator indicating whether patches vertices should
            contains values less than or greater than the threshold value in
            `settings`.
        :type op: function

        :param patch_type: The type of patch to find
        :type patch_type: `settings.PatchType`

        :return: A list of `Patch` objects
        :rtype: list
        """

        num_vertices = len(self._vertex_coords)
        vertex_examined = [False] * num_vertices
        patches = []
        for i in range(num_vertices):
            if vertex_examined[i]:
                continue
            elif not op(smoothed[i], settings.threshold):
                vertex_examined[i] = True
                continue
            vertices = self._findAllPatchVertices(i, smoothed, op,
                                                  settings.threshold,
                                                  vertex_examined)
            cur_patch = Patch(vertices, patch_type, self._vertex_coords,
                              smoothed, self._neighboring_triangles,
                              self._neighboring_vertices,
                              self._nearest_atom_indices, settings.color)
            if cur_patch.size >= settings.size:
                patches.append(cur_patch)
        return patches

    def _findAllPatchVertices(self, starting_vertex, smoothed, op, threshold,
                              vertex_examined):
        """
        Find all vertices that are part of the same patch as `starting_vertex`.

        :param starting_vertex: The index of the vertex to start the search at.
        :type starting_vertex: int

        :param smoothed: The smoothed values to use for patch calculation
        :type smoothed: list

        :param op: An operator indicating whether patches vertices should
            contains values less than or greater than the `threshold` value.
        :type op: function

        :param threshold: All patch vertices must have a smoothed value less
            than or greater than this value.
        :type threshold: float

        :param vertex_examined: For each vertex, a Boolean indicating whether or
            not the vertex has been examined during this patch search.  Will be
            updated by this method.
        :type vertex_examined: list

        :return: A list of vertex indices
        :rtype: list
        """

        patch_vertices = {starting_vertex}
        new_vertices = {starting_vertex}
        while new_vertices:
            prev_vertices = new_vertices
            new_vertices = set()
            for cur_vertex in prev_vertices:
                for cur_neighbor in self._neighboring_vertices[cur_vertex]:
                    vertex_examined[cur_neighbor] = True
                    if op(smoothed[cur_neighbor], threshold):
                        new_vertices.add(cur_neighbor)
            new_vertices -= patch_vertices
            patch_vertices.update(new_vertices)
        return patch_vertices

    def _calcResData(self, patches):
        """
        The function takes in a list of patches and calculates for each patch
        the following properties that are set in the object:

        - energy: The combined score for residues in the patch (including
          residues that were excluded later filtered from the patch).
        - res:  list of residue data
        - contribution: sorted list of energy contribution per residue to
          the patch (highest contributor is listed first)
        - patch_atms:  list of atom indices that contribute to the patch.
          This list is sorted by residue affiliation
        - seg: segmentation vector. Determine, how many atoms per residue
          are contributing to the patch

        :param patches: A list of `Patch` objects
        :type patches: list

        :return: A list of `ResPatchInfo` objects
        :rtype: list
        """

        t0 = time.time()

        rdata = self._res_data_by_vertex
        adata = self._nearest_atom_indices
        chrga = self._partial_charge_smoothed
        slgpa = self._clogp_smoothed

        # FIXME this method needs some serious re-factoring.

        # List of ResInfo objects that are part of a patch (or multiple
        # patches), each appearing only once in the list:
        res_data = []
        for cpatch in patches:
            vertices = cpatch.vertices

            # ensure proper association of vertexpoint to resdata
            rdat = [rdata[i] if rdata[i] else None for i in vertices]
            adat = [adata[i] if adata[i] else None for i in vertices]

            if cpatch.type is PatchType.hydrophobic:
                vdat = [slgpa[i] if slgpa[i] else None for i in vertices]
            else:
                vdat = [chrga[i] if chrga[i] else None for i in vertices]

                # ensure to use valid data for regrouping
            msk = [a and b and z for a, b, z in zip(rdat, adat, vdat)]
            rdat = [a for i, a in enumerate(rdat) if msk[i]]
            adat = [a for i, a in enumerate(adat) if msk[i]]
            vdat = [a for i, a in enumerate(vdat) if msk[i]]
            if not rdat or not adat or not vdat:
                continue

            # group by residue
            rnm = [
                '%s_%i_%s%s' % (a.chain, int(a.resnum), a.resname, a.inscode)
                for a in rdat
            ]

            idxL, cnt = shlp.sort_and_count(rnm)

            # If the patch contains only one residue:
            if cnt and len(cnt) == 1:
                res = rdat[idxL[0]]
                aidx = set([adat[k] for k in idxL])  # atom indices
                contrib = abs(sum(vdat))
                cpatch.energy = contrib
                cpatch.contribution = [contrib]
                cpatch.seg = [len(aidx)]  # nr of contributing atoms
                cpatch.patch_atms = list(aidx)  # consistency
                cpatch.residues = [res]
                res_data.append(cpatch.residues)
                continue

            # If got here, then this patch has more than 1 residue

            idx = shlp.vslice(idxL, cnt)
            r = [rdat[x[0]] for x in idx]

            # group by contributing atoms

            rc = []  # residue contribution
            rs = []  # nr of atoms per residue that contribute
            ra = []  # atom indices
            for vx in idx:
                aidx = [adat[x] for x in vx]
                ax, ac = shlp.sort_and_count(aidx)
                if not ac:
                    rs.append(0)  # this should not happen
                    ra.append([])
                    rc.append(0.0)
                    continue

                atms = shlp.vslice([aidx[z] for z in ax], ac)
                atms = [a[0] for a in atms]
                ra.append(atms)
                rs.append(len(atms))
                rc.append(abs(sum([vdat[x] for x in vx])))

                # order by largest residue contributions

            x = sorted(list(range(len(rc))), key=rc.__getitem__)
            x.reverse()

            # Filter out residues with low contribution. PANEL-8195
            min_cont = MIN_RES_CONTRIBUTION[cpatch.type]
            x = [a for a in x if rc[a] >= min_cont]

            # Order and filter r, rc, ra, and rs lists in the same way:
            r = [r[a] for a in x]
            rc = [rc[a] for a in x]
            ra = [ra[a] for a in x]
            rs = [rs[a] for a in x]

            cpatch.contribution = rc  # score for each contributing res
            cpatch.energy = sum(shlp.castIf(cpatch.contribution))

            cpatch.seg = rs  # nr of contributing atoms
            cpatch.patch_atms = shlp.flatten(ra)  # consistency reasons
            cpatch.residues = r
            res_data.append(cpatch.residues)

        print(" ..... done calculating res_data (%i patches): %.3f " %       \
                                            (len(patches), time.time() - t0))

        # Sanity check:
        for patch in patches:
            assert len(patch.contribution) == len(patch.residues)

        # TODO in the future, consider ordering this list by original
        # sequence order, as current ordering is not very useful.
        return shlp.flatten(res_data)

    def _filterAndNumberPatches(self, patches):
        """
        Discard any patches where all residues got filtered out since these
        patches aren't biologically meaningful.  After that, number all
        remaining patches.

        :param patches: All patches found via `_calcPatches`.  Residue data for
            the patches must have been calculated using `_calcResData`.
        :type patches: list[Patch]

        :return: The filtered and numbered patches.
        :rtype: list[Patch]
        """
        patches = [patch for patch in patches if patch.residues]
        for i, cur_patch in enumerate(patches, start=1):
            cur_patch.num = i
        return patches

[docs]    def getColorsForPatchVertices(self, patch, psettings):
        """
        Return colors for all vertices of the given patch.

        :param patch: Patch to get colors for.
        :type patch: `patch_finder.Patch`

        :return: Colors for each vertex of the patch
        :rtype: list of (float, float, float)
        """
        color_ramp = psettings.getPatchColorRamp(patch.type)
        if psettings.display_mode == settings.COLOR_AGGSCORE:
            vertex_values = []
            for vert in patch.vertices:
                rdata = self._res_data_by_vertex[vert]
                aggscore = rdata.aggscore
                vertex_values.append(aggscore)
        else:
            vertex_values = patch.vtxvals

        vcolors = [color_ramp._getRGBFloat(val) for val in vertex_values]
        return vcolors

[docs]    def colorSurfByPatches(self, surf, patches, psettings):
        # Initialize vertex_colors to default color (used for vertices that
        # are not part of a patch):
        default_color = [a / 255.0 for a in settings.NO_PATCH_COLOR]
        vertex_colors = [
            numpy.array(default_color, dtype=numpy.float32)
            for _ in range(surf.vertex_count)
        ]

        for patch in patches:
            vcolors = self.getColorsForPatchVertices(patch, psettings)
            for patch_color, vertex in zip(vcolors, patch.vertices):
                vertex_colors[vertex] = patch_color
        surf.setColoring(vertex_colors)