Source code for schrodinger.livedesign.bbchem_endpoints

"""
Collection of functions intended as bbchem web endpoints.

Copyright Schrodinger, LLC. All rights reserved.
"""

import enum
from typing import Iterable
from typing import Iterator
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Union

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import EnumerateStereoisomers
from rdkit.Chem import rdChemReactions
from rdkit.Chem import rdTautomerQuery
from rdkit.DataStructs.cDataStructs import ExplicitBitVect

from schrodinger.livedesign import convert
from schrodinger.livedesign import draw
from schrodinger.livedesign import molhash
from schrodinger.livedesign import preprocessor
from schrodinger.livedesign import rgroup_decomposition
from schrodinger.livedesign import substructure
from schrodinger.livedesign.molhash import ATOM_PROP_MAP_NUMBER
from schrodinger.livedesign.preprocessor import MOL_PROP_R_LABEL

DEFAULT_MAX_STEREOISOMERS = 512


[docs]class RegistrationDescriptors(NamedTuple): average_molecular_weight: float exact_molecular_weight: float total_charge: int molecular_formula: str
[docs]class RegistrationData(NamedTuple): sdf: Union[str, bytes] requested_hash: str no_stereo_hash: str display_smiles: str descriptors: RegistrationDescriptors has_attachment_point: bool
[docs]def registration_process( data: Union[str, bytes], options: Optional[preprocessor.PreprocessorOptions] = None, hash_scheme: molhash.HashScheme = molhash.HashScheme.ALL_LAYERS, data_field_names: Optional[Iterable] = None, escape: Optional[str] = None) -> Iterator[RegistrationData]: """ Runs through the registration pipeline for each compound provided in the input data, which includes the preprocessor and canonicalization. :param data: input text string to be deserialized into RDKit mols :param options: preprocessor options """ def get_descriptors(mol): return RegistrationDescriptors( average_molecular_weight=Chem.Descriptors.MolWt(mol), exact_molecular_weight=Chem.Descriptors.ExactMolWt(mol), total_charge=Chem.GetFormalCharge(mol), molecular_formula=Chem.rdMolDescriptors.CalcMolFormula(mol), ) def mol_has_attachment_point(mol): return any( convert.atom_has_attachment_point(atom) for atom in mol.GetAtoms()) for mol in convert.get_sd_reader(data): mol = preprocessor.preprocess(mol, options) hash_layers = molhash.get_mol_layers(mol, data_field_names, escape) yield RegistrationData( sdf=preprocessor.convert_to_molblock(mol, options), requested_hash=molhash.get_molhash(hash_layers, hash_scheme), no_stereo_hash=molhash.get_molhash( hash_layers, molhash.HashScheme.STEREO_INSENSITIVE_LAYERS), display_smiles=hash_layers[molhash.HashLayer.CANONICAL_SMILES], descriptors=get_descriptors(mol), has_attachment_point=mol_has_attachment_point(mol))
[docs]class FingerprintUse(enum.Enum): # Uses a tautomer insensitive pattern fingerprint SUBSTRUCTURE_SEARCH = enum.auto() # Uses a Morgan fingerprint with radius=2 SIMILARITY_SCORE = enum.auto()
[docs]def generate_fingerprint( mol: Chem.rdchem.Mol, use: FingerprintUse, substructure_options: Optional[substructure.QueryOptions] = None ) -> ExplicitBitVect: """ Generates a substructure or similarity fingerprint for a given mol. :param mol: RDKit mol to generate fingerprint of :param fingerprint_type: type of fingerprint to generate :param substructure_options: substructure matching options """ if use == FingerprintUse.SIMILARITY_SCORE: # Query features do not make sense for morgan fingerprinting preprocessor.assert_not_query(mol) # Work on a copy with all non-stereogenic Hydrogens removed mol = Chem.RemoveHs(mol, sanitize=False) mol.UpdatePropertyCache(False) Chem.FastFindRings(mol) return AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024) elif use == FingerprintUse.SUBSTRUCTURE_SEARCH: fp = ExplicitBitVect(2048, True) # SHARED-8528: don't calculate substructure fingerprints for large/dense # structures, since the search algorithm expects a comparatively sparse # graph. if mol.GetNumAtoms() > 50 and mol.GetNumBonds() > mol.GetNumAtoms() * 2: return fp for query_mol in substructure.expand_query(mol, substructure_options): fp &= rdTautomerQuery.PatternFingerprintTautomerTarget( query_mol, fingerprintSize=2048) return fp
[docs]def generate_image( mol: Chem.rdchem.Mol, alignment_mol: Optional[Chem.rdchem.Mol] = None, substructure_options: Optional[substructure.QueryOptions] = None, highlight_mol: Optional[Chem.rdchem.Mol] = None, draw_options: Optional[draw.ImageGenOptions] = None ) -> Union[str, bytes]: """ Generates an image used in LiveDesign which may have a request for compound alignment, or substructure highlighting, or both. :param mol: compound to generate an image of :param alignment_mol: molecule to align to prior to image generation :param substructure_options: substructure matching options :param highlight_mol: core to highlight in generated image :param draw_options: image generation options :return: generated image as a string """ if alignment_mol: substructure.apply_substructure_coordinates(mol, alignment_mol, substructure_options) if highlight_mol: draw_options = draw.set_highlight(mol, highlight_mol, substructure_options, draw_options) return draw.draw_image(mol, draw_options)
[docs]def generate_reaction_image( rxn: Chem.rdChemReactions.ChemicalReaction, draw_options: Optional[draw.ImageGenOptions] = None ) -> Union[str, bytes]: """ Generates an image of a reaction used in LiveDesign :param rxn: reaction to generate an image of :param draw_options: image generation options :return: generated image as a string """ return draw.draw_image(rxn, draw_options)
[docs]def generate_sar_analysis_image( match_mol: Chem.rdchem.Mol, scaffold_mol: Chem.rdchem.Mol, substructure_options: Optional[substructure.QueryOptions] = None, draw_options: Optional[draw.ImageGenOptions] = None ) -> Union[str, bytes]: """ Generates an image used in LiveDesign that is specifically from SAR analysis output, highlighting the core and all r-groups from the decomposition. :param match_mol: source molecule for R-group decomposition to highlight and generate image of :param scaffold_mol: scaffold molecule on which to find R-groups :param substructure_options: substructure matching options :param draw_options: image generation options :return: generated image as a string """ match_mol_copy = Chem.Mol(match_mol) for at in match_mol_copy.GetAtoms(): at.SetIntProp(ATOM_PROP_MAP_NUMBER, at.GetIdx()) substructure_options = substructure_options or substructure.QueryOptions() decomp = rgroup_decomposition.get_rgroup_decomp(scaffold_mol, match_mol_copy, substructure_options) if decomp is None: raise ValueError( "Unable to generate SAR analysis image; core does not match the requested structure" ) # Align mol to original scaffold; skipped if there are internal rgroups substructure.apply_substructure_coordinates(match_mol, scaffold_mol) options = draw.set_rgroup_highlight(match_mol, decomp, draw_options) return draw.draw_image(match_mol, options)
[docs]def pop_properties(mol: Chem.rdchem.Mol) -> dict: """ :param mol: molecule to extract, then clear all properties from :return: map of all removed properties as strings """ props = {k: str(v) for k, v in mol.GetPropsAsDict().items()} preprocessor.remove_properties(mol) return props
[docs]def set_properties(mol: Chem.rdchem.Mol, new_props: dict): """ :param mol: molecule to clear, then set given properties on :param new_props: map of properties to add onto the molecule """ preprocessor.remove_properties(mol) for key, value in new_props.items(): mol.SetProp(key, str(value))
[docs]def split_fragments(mol: Chem.rdchem.Mol): """ :param: input molecule :return: iterable containing each fragment mol """ return Chem.GetMolFrags(mol, asMols=True, sanitizeFrags=False)
[docs]def num_substructure_matches( match_mol: Chem.rdchem.Mol, query_mol: Chem.rdchem.Mol, options: Optional[substructure.QueryOptions] = None) -> int: """ Returns the number of substructure matches between two molecules. :param match_mol: molecule to find substructure matches in :param query_mol: substructure molecule on which to find matches :param options: substructure query options :return: number of substructure matches found """ return sum(1 for match in substructure.substructure_matches( match_mol, query_mol, options))
[docs]def has_substructure_match( match_mol: Chem.rdchem.Mol, query_mol: Chem.rdchem.Mol, options: Optional[substructure.QueryOptions] = None) -> bool: """ :param match_mol: molecule to find substructure matches in :param query_mol: substructure molecule on which to find matches :param options: substructure query options :return: whether a substructure match was found """ options = options or substructure.QueryOptions() try: next(substructure.substructure_matches(match_mol, query_mol, options)) except StopIteration: return False return True
[docs]def enumerate_stereoisomers( mol: Chem.rdchem.Mol, max_stereoisomers: int = DEFAULT_MAX_STEREOISOMERS ) -> Iterator[Chem.rdchem.Mol]: """ Generates stereoisomers from a specified SDF structure string. :param structure: structure from which to generate stereoisomers :param max_stereoisomers: maximum number of stereoisomers to generate :return: generated stereoisomers """ mol_copy = Chem.Mol(mol) Chem.AssignStereochemistry(mol_copy, flagPossibleStereoCenters=True) options = EnumerateStereoisomers.StereoEnumerationOptions( unique=True, maxIsomers=max_stereoisomers, rand=0xF00D) return EnumerateStereoisomers.EnumerateStereoisomers(mol_copy, options)
[docs]def rgroup_decompose( scaffold_mol: Chem.rdchem.Mol, match_mol: Chem.rdchem.Mol, options: Optional[substructure.QueryOptions] = None) -> List[dict]: """ Decomposes a molecule into its core and R-groups given a scaffold :param scaffold_mol: scaffold molecule on which to find R-groups :param match_mol: source molecule for R-group decomposition :param stereospecific: whether to consider bond stereochemistry and atom chirality of scaffold :return: list of dicts of R-group matches """ # Otherwise return the single result options = options or substructure.QueryOptions() match = rgroup_decomposition.get_rgroup_decomp(scaffold_mol, match_mol, options) if match is None: return None # RDKit's RGroup decomposition returns rgroup mols that use rgroups # (instead of dummy atoms or attachment points) to represent where the # rgroup attaches to the scaffold. We want to replace these rgroups with # dummy atoms to be consistent with rgroups ran through the preprocessor # and allow rgroups to render correctly. for rlabel, mol in match.items(): if rlabel.startswith("R"): match[ rlabel] = rgroup_decomposition.replace_rgroups_with_dummy_atoms( mol) return match
[docs]def get_rgroup_labels(scaffold_mol: Chem.rdchem.Mol) -> List[str]: """ :param scaffold_mol: scaffold molecule :return: R-group labels present on the scaffold """ rlabels = set() for atm in scaffold_mol.GetAtoms(): if atm.GetAtomicNum() == 0 and atm.HasProp(MOL_PROP_R_LABEL): rlabel = atm.GetIntProp(MOL_PROP_R_LABEL) while rlabel in rlabels: rlabel += 1 rlabels.add(rlabel) return sorted([f"R{x}" for x in rlabels])
[docs]def setup_reaction(rxn_input: str) -> str: """ Tidy up and convert user sketched reactions into a format that can be used for reaction enumeration. :param rxn_input: a SMARTS string describing the user's reaction. :return: a SMARTS string describing the cleaned up reaction """ # The setup_reaction module has some heavy dependencies which we # prefer to avoid importing unless required. from schrodinger.livedesign import setup_reaction try: rxn = convert._to_rdkit_reaction(rxn_input, convert.Format.SMARTS) except ValueError: # FIXME: LiveDesign current uses MDL RXN input rxn = convert._to_rdkit_reaction(rxn_input, convert.Format.SDF) cleaned_rxn = setup_reaction.setup_reaction(rxn) return rdChemReactions.ReactionToSmarts(cleaned_rxn)