Source code for schrodinger.application.combiglide.corehop

"""
This module contains classes and functions for Core Hopping.

Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Pat Lorton

import sqlite3
from past.utils import old_div
from struct import unpack

from scipy.special import comb

from schrodinger.infra import mm
from schrodinger.structutils import block_data
from schrodinger.structutils import smiles as smiles_mod

ANY = 999  # Same as ANY constant of vcs-src/chsr/data_struct_macros.h
ATTACHMENT_BLOCK = 'm_attachment'


def _convert_blob_to_int_array(blob):
    """
    This is used to convert a sqlite3 blob into an int array
    """
    cb = []
    idx = 0
    while idx < len(blob):
        cb.append(blob[idx:idx + 4])
        idx += 4
    return [unpack("<L", x)[0] for x in cb]


[docs]class SubstCoreDatabase: """ This class can be used to calculate substitution scores, where a core hopping database has been used to generate passed ligands. """ frag_handle = None # Class static variable
[docs] def __init__(self, database_fname): """ The class is initialized with the database used for core hopping, then it is queried with ligands which that core hopping run produced. """ self.database_fname = database_fname self.conn = sqlite3.connect(database_fname) self.cursor = self.conn.cursor() self.smiles_gen = smiles_mod.SmilesGenerator( stereo=smiles_mod.STEREO_FROM_ANNOTATION_AND_GEOM) # Only need one frag handle for all instances of the class if SubstCoreDatabase.frag_handle is None: mm.mmbuild_initialize(mm.error_handler) mm.mmfrag_initialize(mm.error_handler) SubstCoreDatabase.frag_handle = mm.mmfrag_new("organic") mm.mmfrag_set_fragment_name(SubstCoreDatabase.frag_handle, "Hydrogen") mm.mmfrag_set_direction(SubstCoreDatabase.frag_handle, "forward")
[docs] def get_subst_score(self, st): """ Query the database using the provided Structure to attain a subst score. """ smiles = st.property['s_cgch_core_smiles'] sidechain_atoms = set() for atom in st.atom: if 'b_cg_iscore' in atom.property: if not atom.property['b_cg_iscore']: sidechain_atoms.add(int(atom)) from_atoms = set() # Add all from atoms to set for atom in st.atom: if int(atom) not in sidechain_atoms: for ba in atom.bonded_atoms: if int(ba) in sidechain_atoms: # Get the base from in case this atom is a linker gn_split = atom.growname.replace(' ', '').split('.') base_from = int(gn_split[0]) from_atoms.add(base_from) # Make sure the smiles exists inside the database self.cursor.execute( "select COUNT(smiles) from core where smiles='%s';" % smiles) for row in self.cursor: try: assert row[0] == 1 except: raise IndexError("Smiles: '%s' not found in Database: '%s'" % (smiles, self.database_fname)) self.cursor.execute( "select n_smiles_matches, smiles_matches FROM core WHERE smiles=='%s' " % smiles) # a list of lists containing all smiles matches maps smiles_matches = [] for n_smiles_matches, smiles_match_blob in self.cursor: smiles_matches_whole = _convert_blob_to_int_array(smiles_match_blob) subset_size = old_div(len(smiles_matches_whole), n_smiles_matches) for i in range(0, len(smiles_matches_whole), subset_size): smiles_matches.append(smiles_matches_whole[i:i + subset_size]) self.cursor.execute( "SELECT times_found, subst_atoms, parent_smiles FROM " + "subst_pattern where core_smiles=='%s';" % smiles) score = 0 for times_found, subst_atoms_blob, parent_smiles in self.cursor: subst_atoms = _convert_blob_to_int_array(subst_atoms_blob) for smiles_match in smiles_matches: matches = 0 for subst_atom in subst_atoms: if smiles_match[subst_atom] in from_atoms: matches += 1 if matches == len(subst_atoms): unweighted_score = old_div(float(len(subst_atoms)), \ round(comb(len(from_atoms), len(subst_atoms)))) score += unweighted_score * (1 - pow(0.5, times_found)) break # Calculate the subst score using the smiles return score
[docs]class AttachmentPoint: """ Class to map attachment point parameters to the coresponding rows in the 'm_attachment' maestro block. Used to write attachment blocks for core hopping. """
[docs] def __init__(self, from_anum, to_anum, name='', atnum=ANY, numbonds=ANY, fcharge=ANY): if from_anum < 1 or to_anum < 1: raise ValueError( f"Attachment pair {from_anum},{to_anum} contains an " "invalid (zero or negative) atom index") self.name = name self.from_anum = from_anum self.to_anum = to_anum self.atnum = atnum self.numbonds = numbonds self.fcharge = fcharge
[docs] def makeRowObject(self): """ Creates a dictionary object for which each key corresponds to a column in the 'm_attachment' attachment block used to specify core hopping attachment points. :return: dictionary for block_data to easily append a row to 'm_attachment' with. :rtype: dict """ return { 'i_m_atom1': self.from_anum, 'i_m_atom2': self.to_anum, 'i_m_num_reagents': 0, 'i_cgch_minlinker': 0, 'i_cgch_maxlinker': 0, 'i_m_ap_atnum': self.atnum, 'i_m_ap_numbonds': self.numbonds, 'i_m_ap_fcharge': self.fcharge, 's_m_attachment_name': self.name, 's_m_reagent_path': '', 's_m_functional_group': '' }
[docs]def write_attachment_block(st, attachments): """ Writes the attachment points specified for a core hopping job to the template core structure for downstream consumption. :param st: template core structure for core hopping search :type st: structure.Structure :param attachments: list specifying attachment points :type attachments: list(corehop.AttachmentPoint) """ new_data = {} for index, attachment in enumerate(attachments, 1): attachment.name = f'Attachment {index}' new_row = attachment.makeRowObject() block_data.append_row_to_data(new_data, ATTACHMENT_BLOCK, new_row) block_data.write_blocks(st, new_data, truncate=True)