Source code for schrodinger.application.combiglide.corehop

"""
This module contains classes and functions for Core Hopping.

Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Pat Lorton

import sqlite3
from past.utils import old_div
from struct import unpack

from scipy.special import comb

from schrodinger.infra import mm
from schrodinger.structutils import block_data
from schrodinger.structutils import smiles as smiles_mod

ANY = 999  # Same as ANY constant of vcs-src/chsr/data_struct_macros.h
ATTACHMENT_BLOCK = 'm_attachment'


def _convert_blob_to_int_array(blob):
    """
    This is used to convert a sqlite3 blob into an int array
    """
    cb = []
    idx = 0
    while idx < len(blob):
        cb.append(blob[idx:idx + 4])
        idx += 4
    return [unpack("<L", x)[0] for x in cb]


[docs]class SubstCoreDatabase:
    """
    This class can be used to calculate substitution scores, where a core
    hopping database has been used to generate passed ligands.
    """

    frag_handle = None  # Class static variable

[docs]    def __init__(self, database_fname):
        """
        The class is initialized with the database used for core hopping, then
        it is queried with ligands which that core hopping run produced.
        """

        self.database_fname = database_fname
        self.conn = sqlite3.connect(database_fname)
        self.cursor = self.conn.cursor()
        self.smiles_gen = smiles_mod.SmilesGenerator(
            stereo=smiles_mod.STEREO_FROM_ANNOTATION_AND_GEOM)

        # Only need one frag handle for all instances of the class
        if SubstCoreDatabase.frag_handle is None:
            mm.mmbuild_initialize(mm.error_handler)
            mm.mmfrag_initialize(mm.error_handler)
            SubstCoreDatabase.frag_handle = mm.mmfrag_new("organic")
            mm.mmfrag_set_fragment_name(SubstCoreDatabase.frag_handle,
                                        "Hydrogen")
            mm.mmfrag_set_direction(SubstCoreDatabase.frag_handle, "forward")

[docs]    def get_subst_score(self, st):
        """
        Query the database using the provided Structure to attain a subst score.

        """

        smiles = st.property['s_cgch_core_smiles']
        sidechain_atoms = set()
        for atom in st.atom:
            if 'b_cg_iscore' in atom.property:
                if not atom.property['b_cg_iscore']:
                    sidechain_atoms.add(int(atom))

        from_atoms = set()  # Add all from atoms to set
        for atom in st.atom:
            if int(atom) not in sidechain_atoms:
                for ba in atom.bonded_atoms:
                    if int(ba) in sidechain_atoms:
                        # Get the base from in case this atom is a linker
                        gn_split = atom.growname.replace(' ', '').split('.')
                        base_from = int(gn_split[0])
                        from_atoms.add(base_from)

# Make sure the smiles exists inside the database
        self.cursor.execute(
            "select COUNT(smiles) from core where smiles='%s';" % smiles)
        for row in self.cursor:
            try:
                assert row[0] == 1
            except:
                raise IndexError("Smiles: '%s' not found in Database: '%s'" %
                                 (smiles, self.database_fname))

        self.cursor.execute(
            "select n_smiles_matches, smiles_matches FROM core WHERE smiles=='%s' "
            % smiles)

        # a list of lists containing all smiles matches maps
        smiles_matches = []
        for n_smiles_matches, smiles_match_blob in self.cursor:

            smiles_matches_whole = _convert_blob_to_int_array(smiles_match_blob)
            subset_size = old_div(len(smiles_matches_whole), n_smiles_matches)
            for i in range(0, len(smiles_matches_whole), subset_size):
                smiles_matches.append(smiles_matches_whole[i:i + subset_size])

        self.cursor.execute(
            "SELECT times_found, subst_atoms, parent_smiles FROM " +
            "subst_pattern where core_smiles=='%s';" % smiles)

        score = 0
        for times_found, subst_atoms_blob, parent_smiles in self.cursor:
            subst_atoms = _convert_blob_to_int_array(subst_atoms_blob)
            for smiles_match in smiles_matches:
                matches = 0
                for subst_atom in subst_atoms:
                    if smiles_match[subst_atom] in from_atoms:
                        matches += 1
                if matches == len(subst_atoms):
                    unweighted_score = old_div(float(len(subst_atoms)), \
                        round(comb(len(from_atoms), len(subst_atoms))))
                    score += unweighted_score * (1 - pow(0.5, times_found))
                    break


# Calculate the subst score using the smiles
        return score


[docs]class AttachmentPoint:
    """
    Class to map attachment point parameters to the coresponding rows in the
    'm_attachment' maestro block. Used to write attachment blocks for core
    hopping.
    """

[docs]    def __init__(self,
                 from_anum,
                 to_anum,
                 name='',
                 atnum=ANY,
                 numbonds=ANY,
                 fcharge=ANY):
        if from_anum < 1 or to_anum < 1:
            raise ValueError(
                f"Attachment pair {from_anum},{to_anum} contains an "
                "invalid (zero or negative) atom index")
        self.name = name
        self.from_anum = from_anum
        self.to_anum = to_anum
        self.atnum = atnum
        self.numbonds = numbonds
        self.fcharge = fcharge

[docs]    def makeRowObject(self):
        """
        Creates a dictionary object for which each key corresponds to a column
        in the 'm_attachment' attachment block used to specify core hopping
        attachment points.

        :return: dictionary for block_data to easily append a row to 'm_attachment'
                 with.
        :rtype: dict
        """
        return {
            'i_m_atom1': self.from_anum,
            'i_m_atom2': self.to_anum,
            'i_m_num_reagents': 0,
            'i_cgch_minlinker': 0,
            'i_cgch_maxlinker': 0,
            'i_m_ap_atnum': self.atnum,
            'i_m_ap_numbonds': self.numbonds,
            'i_m_ap_fcharge': self.fcharge,
            's_m_attachment_name': self.name,
            's_m_reagent_path': '',
            's_m_functional_group': ''
        }


[docs]def write_attachment_block(st, attachments):
    """
    Writes the attachment points specified for a core hopping job
    to the template core structure for downstream consumption.

    :param st: template core structure for core hopping search
    :type st: structure.Structure

    :param attachments: list specifying attachment points
    :type attachments: list(corehop.AttachmentPoint)
    """
    new_data = {}
    for index, attachment in enumerate(attachments, 1):
        attachment.name = f'Attachment {index}'
        new_row = attachment.makeRowObject()
        block_data.append_row_to_data(new_data, ATTACHMENT_BLOCK, new_row)
    block_data.write_blocks(st, new_data, truncate=True)