Source code for schrodinger.application.bioluminate.surfcomp

# -*- coding: utf-8 -*-
"""
See PYTHON-2433
WARNING: Solvents are not accounted for in any way.
"""

# Contributors: Matvey Adzhigirey

import argparse
import sys
from collections import defaultdict
from past.utils import old_div

import numpy

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import mmsurf
from schrodinger.infra.mmbitset import Bitset
from schrodinger.structutils import analyze

mmsurf.mmsurf_initialize(mm.MMERR_DEFAULT_HANDLER)

CONST_W = 0.5  # Å^-2, see the paper attached to PYTHON-2433
NON_BURIED_DIST = 1.5  # see the paper attached to PYTHON-2433
SURF_DIST_THRESHOLD = 3.0


[docs]def flatten_list(iterable):
    """
    Given an iterable of lists, return a flattened list of
    all values. This algorithm scales in linear time.
    """
    out_list = []
    list(map(out_list.extend, iterable))
    return out_list


[docs]def create_surface(st, calc_normals=True, grid_spacing=0.5):
    """
    Creates a surface for the given structure. Caller must call
    mmsurf_delete() after it's done using the surface.

    :return: MMsurf handle
    :rtype: int
    """

    bs = Bitset(size=st.atom_total)
    bs.fill()

    # Maestro settings: high=0.3, medium=0.6, low=0.8 (maestro default is 0.8)
    #grid_spacing = 0.3 # roughly 16Å²
    #grid_spacing = 0.4 # roughly 10Å²
    #grid_spacing = 0.5 # roughly 6Å²

    probe_radius = 1.4  # meastro default is 1.4
    # Higher number = smoother surface. Lower number = surface follows the shapes
    # of the atoms' VDW better.

    surface_type = mmsurf.MOLSURF_MOLECULAR
    vdw_scaling = 1.0  # maestro default is 1.0
    for_area = not calc_normals  # Whether this surface is for calculating the
    # area, only, in which the normal vectors will not be calculated.

    surf = mmsurf.mmsurf_molsurf(st, bs, grid_spacing, probe_radius,
                                 vdw_scaling, surface_type, for_area)

    return surf


[docs]def find_buddies_on_other_surf(buried_a_coords, buried_b_coords):
    """
    For each point in the first list, find the closest point from the
    second list. Returns a dict where keys are indices to the first
    list, and values are tuples of (index to second list, sq. distance).
    """

    # For each atom on A, find the B atom closest to it:
    to_surf_index = {}
    tmp_b_coords = []
    for i, (ba, coord_b) in enumerate(buried_b_coords.items()):
        to_surf_index[i] = ba
        tmp_b_coords.append(coord_b)
    # FIXME instead of creating an intermediate list, append directly
    # to the numpy array

    # Convert to a numpy array (float32):
    tmp_b_coords = numpy.array(tmp_b_coords, dtype=numpy.float32)

    link_cell = mm.mmct_create_distance_cell_xyz2(tmp_b_coords,
                                                  SURF_DIST_THRESHOLD)

    # For each buried A coordinate, find the closest buried B coordinate:
    b_points_by_a = {}
    for ia, coord_a in buried_a_coords.items():
        closest_index = mm.mmct_query_distance_cell_closest(link_cell, *coord_a)
        if closest_index != -1:
            # match found
            coord_b = tmp_b_coords[closest_index]
            ib = to_surf_index[closest_index]
            dist_squared = numpy.sum((coord_b - coord_a)**2)
            b_points_by_a[ia] = (ib, dist_squared)

    mm.mmct_delete_distance_cell(link_cell)

    return b_points_by_a


def _calc_sc_per_atom(buried_a_coords, buried_b_coords, a_normals, b_normals,
                      a_nearest):
    """

    :param buried_a_coords: dict of buried surface coordinates of surface A
    :param buried_b_coords: dict of buried surface coordinates of surface B
    :param a_normals: array of surface normal vectors of surface A
    :param b_normals: array of surface normal vectors of surface B
    :param a_nearest: dict of an atom index of the closest atom in molecule
        A to the keyed surface point (index).
    """

    b_points_by_a = find_buddies_on_other_surf(buried_a_coords, buried_b_coords)

    # print "    Number of points to be analyzed:", len(b_points_by_a)
    # These points are within 3A to the other surface yet farther than 1.5A from
    # the non-buried surface

    # For each point on surface A, find the point on surface B closest to it, and
    # calculate complementarity for that point:

    s_by_atom = defaultdict(list)
    for ia in buried_a_coords.keys():
        closest_match = b_points_by_a.get(ia)
        if closest_match is None:
            # There is no atom on the other surface within the threshold dist
            s = 0.0
        else:
            ib, dist_squared = closest_match
            norm_a = a_normals[ia]  # nA
            norm_b = b_normals[ib]  # n'A

            # Calculate the surface complementarity of the surface A to
            # the surface B at this point. In the paper, the equation is:
            # S(A->B)(xA) = (nA * n'A) exp[-w(|xA - x'A|)^2]

            coor_dotp = numpy.dot(norm_a, -norm_b)
            # Need to reverse the B normal (hence the negative sign)

            s = coor_dotp * numpy.exp(-CONST_W * dist_squared)

        nearest_atom = a_nearest[ia]
        s_by_atom[nearest_atom].append(s)

    return s_by_atom


def _calc_complementarity(st, atoms1, atoms2, grid_spacing):
    """
    Given 2 atom lists, calculate surface complementarity for each atom in each
    input set that is within 1.5A of the non-buried surface. The results are
    returned as 2 dicts. The keys in the dict are atom indices in the
    corresponding set, values are the Sc values for each surface point for that
    atom, ranging from ~0 to 1.

    Atoms are considered interacting if:

    - they are buried in the full ct surface
    - they are on the surface of the interacting surfaces
    - and they are within 3.0 A of the other interacting surface.

    Non-interacting atoms are not included in the returned dictionary.
    """

    if atoms2 is None:
        atoms2 = set(range(1, st.atom_total + 1)) - set(atoms1)
        if not atoms2:
            raise ValueError("The <atoms1> list matched all atoms in CT")

    solvent_atoms = analyze.evaluate_asl(st, "water")
    # FIXME Search for other solvents in addition to water

    # print "Creating subset CTs..."
    lig_st = st.copy()
    del_atoms = [a for a in range(1, lig_st.atom_total + 1) if a not in atoms1
                ]  # or a in solvent_atoms ]
    lig_renumber_dict = lig_st.deleteAtoms(del_atoms, renumber_map=True)
    reverse_lig_renumber_dict = {
        value: key for key, value in lig_renumber_dict.items()
    }

    rec_st = st.copy()
    del_atoms = [a for a in range(1, rec_st.atom_total + 1) if a not in atoms2
                ]  # or a in solvent_atoms ]
    rec_renumber_dict = rec_st.deleteAtoms(del_atoms, renumber_map=True)
    reverse_rec_renumber_dict = {
        value: key for key, value in rec_renumber_dict.items()
    }

    # Some error-checking:
    if lig_st.atom_total == 0:
        raise ValueError("The <atoms1> list is empty")
    if rec_st.atom_total == 0:
        raise ValueError("The <atoms2> list is empty")

    whole_st = st.copy()
    del_atoms = [
        a for a in range(1, whole_st.atom_total + 1) if a in solvent_atoms
    ]
    whole_st.deleteAtoms(del_atoms)

    # print "Creating surfaces..."
    total_surf = create_surface(whole_st, True, grid_spacing)
    rec_surf = create_surface(rec_st, True, grid_spacing)
    lig_surf = create_surface(lig_st, True, grid_spacing)

    lig_coords = mmsurf.mmsurf_get_all_vertex_coords(lig_surf)
    rec_coords = mmsurf.mmsurf_get_all_vertex_coords(rec_surf)
    # print "Number of ligand surface points:", len(lig_coords)
    # print "Number of receptor surface points:", len(rec_coords)

    lig_normals = mmsurf.mmsurf_get_all_vertex_normals(lig_surf)
    rec_normals = mmsurf.mmsurf_get_all_vertex_normals(rec_surf)
    lig_nearest = mmsurf.mmsurf_get_nearest_atoms(lig_surf)
    rec_nearest = mmsurf.mmsurf_get_nearest_atoms(rec_surf)

    # Get the coordinates of all non-buried points on the CT:
    nb_coords = mmsurf.mmsurf_get_all_vertex_coords(total_surf)
    # print "Number of the points on the whole CT surface:", len(nb_coords)

    # The SASA is calculated only to report the resolution of the surface:
    # total_sa = mmsurf.mmsurf_get_surface_area(total_surf)
    # print "Approximate resolution of the surface: %.1f points per Å²" % (len(nb_coords) / total_sa)

    mmsurf.mmsurf_delete(total_surf)
    mmsurf.mmsurf_delete(rec_surf)
    mmsurf.mmsurf_delete(lig_surf)

    # print "Filtering out points close to non-buried surface..."
    # Remove points from both surfaces that are too close to the non-buried
    # surface:

    # Convert to an array of floats instead of doubles, as that's what
    # the distance cell API is expecting:
    nb_coords = nb_coords.astype(numpy.float32)

    # print "Creating a NB distance cell"
    nb_cell_handle = mm.mmct_create_distance_cell_xyz2(nb_coords,
                                                       NON_BURIED_DIST)

    # print "Distance cell execution..."
    buried_lig_coords = {}  # Key: point index; value: point coords
    for i, coord_lig in enumerate(lig_coords):
        if mm.mmct_query_distance_cell_count(nb_cell_handle, *coord_lig) == 0:
            buried_lig_coords[i] = coord_lig
    # print "Number of filtered ligand surface points:", len(buried_lig_coords)

    # print "Distance cell execution..."
    buried_rec_coords = {}  # Key: point index; value: point coords
    for i, coord_rec in enumerate(rec_coords):
        if mm.mmct_query_distance_cell_count(nb_cell_handle, *coord_rec) == 0:
            buried_rec_coords[i] = coord_rec
    # print "Number of filtered receptor surface points:", len(buried_rec_coords)

    mm.mmct_delete_distance_cell(nb_cell_handle)

    # print "  processing the points on the ligand surface..."
    s_by_lig_atom = _calc_sc_per_atom(buried_lig_coords, buried_rec_coords,
                                      lig_normals, rec_normals, lig_nearest)

    # Convert atom indices to global (complex CT) reference:
    # Note: the any() in the next line excludes any atom where all surface
    # complementarity values are 0.0. These atoms were incorrectly classified as
    # buried because they are not on the full ct surface (i.e. in a pocket).
    s_by_lig_atom = {
        reverse_lig_renumber_dict[a]: v
        for a, v in s_by_lig_atom.items()
        if any(v)
    }

    # print "  processing the points on the receptor surface..."
    s_by_rec_atom = _calc_sc_per_atom(buried_rec_coords, buried_lig_coords,
                                      rec_normals, lig_normals, rec_nearest)

    # Convert atom indices to global (complex CT) reference:
    # Note: the any() in the next line excludes any atom where all surface
    # complementarity values are 0.0. These atoms were incorrectly classified as
    # buried because they are not on the full ct surface (i.e. in a pocket).
    s_by_rec_atom = {
        reverse_rec_renumber_dict[a]: v
        for a, v in s_by_rec_atom.items()
        if any(v)
    }

    return s_by_lig_atom, s_by_rec_atom


[docs]def calc_complementarity_by_atom(st, atoms1, atoms2=None, grid_spacing=0.5):
    """
    Return the values for surface complementarity between the specified surfaces,
    grouped by atom from <atoms1>.

    :type st: structure._Structure object
    :param st: Structure to which <atoms1> and <atoms2> are indices in.

    :type atoms1: Iterable of atom indices
    :param atoms1: Atom numbers from the surface for which to calculate the
            complementrairity.

    :type atoms2: Iterable of atom indices
    :param atoms2: Atom numbers for the other surface. if not specified, use
        all other atoms from the CT.

    :type grid_spacing: float
    :param grid_spacing: Density (resolution) of the surfaces. Default is 0.5.
        Maestro defaults for this settings are: high=0.3, medium=0.6, low=0.8.

    :rtype: dict of lists.
    :return: Return a dict where keys are atom indices (only atoms that are
        on the interaction surface) and values are lists of surface
        complementarity (Sc) values for that atom. Each value corresponds to
        an interacting point on the surface of that atom. This list will be
        empty for fully buried atoms. Calculate the median of these values to
        determine the Sc for that atom. Calculate the median of all values for
        all residue's atoms to determine the Sc for that residue; etc.
    """

    s_by_set1, s_by_set2 = _calc_complementarity(st, atoms1, atoms2,
                                                 grid_spacing)

    # Combine the 2 dicts:
    s_by_set1.update(s_by_set2)

    return s_by_set1


[docs]def calc_total_complementarity(st, atoms1, atoms2=None, grid_spacing=0.5):
    """
    Return the total complementarity between the specified surfaces.

    :type st: structure._Structure object
    :param st: Structure to which <atoms1> and <atoms2> are indices in.

    :type atoms1: Iterable of atom indices
    :param atoms1: Atom numbers from the surface for which to calculate the
            complementrairity.

    :type atoms2: Iterable of atom indices
    :param atoms2: Atom numbers for the other surface. if not specified, use
        all other atoms from the CT.

    :type grid_spacing: float
    :param grid_spacing: Density (resolution) of the surfaces. Default is 0.5.
        Maestro defaults for this settings are: high=0.3, medium=0.6, low=0.8.

    :rtype: float
    :return: the surface complementarity (Sc) between the 2 surfaces.
    """

    s_by_set1, s_by_set2 = _calc_complementarity(st, atoms1, atoms2,
                                                 grid_spacing)

    # Sum up all the values and calculate the median:
    all_values1 = flatten_list(s_by_set1.values())
    median1 = numpy.median(all_values1)

    all_values2 = flatten_list(s_by_set2.values())
    median2 = numpy.median(all_values2)

    # Calculate the surface complementarity score for this interaction:
    # Equation in paper: Ss = ( {Sa->b} + {Sb->a} ) / 2
    diff = abs(median2 - median1)
    final_sc = old_div((median1 + median2), 2.0)

    # return final_sc

    # Statistic for the complementarity calculations:
    if False:
        print("Ligand surface points considered:", len(all_values1))
        print("Ligand atoms considered:", len(s_by_set1))
        print("  S min:", min(all_values1))
        print("  S max:", max(all_values1))
        print("  S average:", old_div(sum(all_values1),
                                      float(len(all_values1))))
        print("  Median ligand Sc: %.2f" % median1)

        print("Receptor surface points considered:", len(all_values2))
        print("Receptor atoms considered:", len(s_by_set2))
        print("  S min:", min(all_values2))
        print("  S max:", max(all_values2))
        print("  S average:", old_div(sum(all_values2),
                                      float(len(all_values2))))
        print("  Median receptor Sc: %.2f" % median2)

        # Calculate the surface complementarity score for this interaction:
        # Equation in paper: Ss = ( {Sa->b} + {Sb->a} ) / 2
        perc = diff / final_sc * 100.0
        print("Difference between the medians: %.3f (%.1f%%)" % (diff, perc))
        print("Average of the medians (FINAL Sc): %.2f" % final_sc)

    return final_sc


def _run():
    """
    For testing this module from command-line.
    """

    script_desc = "$SCHRODINGER/run %s -asl <asl> <infile>" % __file__
    parser = argparse.ArgumentParser(description=script_desc)
    parser.add_argument("infile", help="Input structure file (PDB, Maestro)")
    parser.add_argument(
        "-asl",
        help="ASL for one of the atom sets (called 'ligand' in the stdout)")
    try:
        args = parser.parse_args()
    except IOError as msg:
        parser.error(str(msg))
        sys.exit(1)

    if not args.infile:
        parser.error("Please specify a structure file")
    if not args.asl:
        parser.error("Please specify the ligand ASL")
    st = structure.Structure.read(args.infile)

    ligand_atoms = analyze.evaluate_asl(st, args.asl)
    receptor_atoms = analyze.evaluate_asl(st, "NOT %s" % args.asl)
    if not ligand_atoms:
        raise ValueError("The ASL did not match any atoms")
    if not receptor_atoms:
        raise ValueError("The ASL has matched all atoms in the structure")

    print("Number of ligand atoms:", len(ligand_atoms))
    print("Number of receptor atoms:", len(receptor_atoms))

    print("\nTesting calc_total_complementarity()...")
    calc_total_complementarity(st, ligand_atoms, receptor_atoms)

    print("\nTesting calc_complementarity_by_atom()...")
    out_dict = calc_complementarity_by_atom(st, ligand_atoms, receptor_atoms)

    all_values1 = flatten_list(out_dict.values())
    sc1 = numpy.median(all_values1)
    print("Total Sc for the ligand surface: %.2f" % sc1)

    print("DONE")


# For testing this module:
if __name__ == '__main__':
    if False:
        import cProfile
        cProfile.run('_run()', sort=1)  # sort by 'tottime'
    else:
        _run()