Source code for schrodinger.livedesign.substructure

"""
Substructure searching and alignment

Copyright Schrodinger LLC, All Rights Reserved.
"""

from typing import Generator
from typing import NamedTuple
from typing import Optional

from rdkit import Chem
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdMolEnumerator
from rdkit.Chem import rdTautomerQuery

NO_MATCH_ERROR_MSG = "Substructure match with reference not found"


[docs]class QueryOptions(NamedTuple): """ :cvar adjust_conjugated_five_rings: whether to set bond queries in conjugated five-member rings to SINGLE|DOUBLE|AROMATIC :cvar adjust_single_bonds_between_aromatic_atoms: whether to sets non-ring single bonds between two aromatic atoms to SINGLE|AROMATIC :cvar adjust_single_bonds_to_degree_one_neighbors: whether to set single bonds bewteen aromatic atoms and degree-one neighbors to SINGLE|AROMATIC :cvar tautomer_insensitive: whether to consider tautomer insensitivity :cvar stereospecific: whether to consider stereochemistry and chirality """ adjust_conjugated_five_rings: bool = False adjust_single_bonds_between_aromatic_atoms: bool = False adjust_single_bonds_to_degree_one_neighbors: bool = False tautomer_insensitive: bool = False stereospecific: bool = True
[docs]def replace_generic_h_queries(query): """ Replaces QH, AH, MH, and XH queries with something which works in the RDKit. Reminder: - QH = "any atom except carbon" - AH = "any atom, including H" - MH = "any metal, or H" - XH = "halogen or H" """ atoms_with_h_queries = [] for atom in query.GetAtoms(): if atom.HasQuery() and atom.GetQueryType() in ( 'QH', 'AH', 'MH', 'XH') and atom.GetDegree() == 1: atoms_with_h_queries.append(atom.GetIdx()) if not atoms_with_h_queries: return [query] original_query = Chem.RWMol(query) original_query.BeginBatchEdit() all_queries = [original_query] has_h_query = Chem.AtomFromSmarts('[!H0]') for query_atom_idx in atoms_with_h_queries: for i in range(len(all_queries)): mol = Chem.RWMol(all_queries[i]) nbr = mol.GetAtomWithIdx(query_atom_idx).GetNeighbors()[0] if nbr.GetIdx() in atoms_with_h_queries: continue if nbr.HasQuery(): nbr.ExpandQuery(has_h_query.GetQuery()) else: # replace neighbor with a query atom that has at least 1 hydrogen nbr_with_h = Chem.AtomFromSmarts(f'[#{nbr.GetAtomicNum()}!H0]') mol.ReplaceAtom(nbr.GetIdx(), nbr_with_h) mol.RemoveAtom(query_atom_idx) all_queries.append(mol) for mol in all_queries: mol.CommitBatchEdit() return all_queries
[docs]def expand_query( base_query: Chem.rdchem.Mol, options: QueryOptions) -> Generator[Chem.rdchem.Mol, None, None]: """ Expands a given query, accounting for tautomer matching, link nodes, and variable bonds. If the substructure options dictate it, each generated query is also adjusted. """ options = options or QueryOptions() query_params = Chem.AdjustQueryParameters.NoAdjustments() query_params.adjustConjugatedFiveRings = \ options.adjust_conjugated_five_rings query_params.adjustSingleBondsBetweenAromaticAtoms = \ options.adjust_single_bonds_between_aromatic_atoms query_params.adjustSingleBondsToDegreeOneNeighbors = \ options.adjust_single_bonds_to_degree_one_neighbors query_params.makeDummiesQueries = True base_query = Chem.rdmolops.MergeQueryHs(base_query) query_mols = rdMolEnumerator.Enumerate(base_query) or [base_query] for mol in query_mols: if options.tautomer_insensitive: # initialize RingInfo Chem.FastFindRings(mol) try: # SHARED-8672: When rgroup decomposition options have the # adjust_conjugated_five_rings and tautomer_insensitive turned # on, sometimes a kekulization error will be raised when creating # the tautomer query. If that occurs, ignore the tautomers so that # the original scaffold can still match tqry = rdTautomerQuery.TautomerQuery(mol) query = tqry.GetTemplateMolecule() except Chem.rdchem.KekulizeException: query = mol pass else: query = mol query = Chem.AdjustQueryProperties(query, query_params) query.UpdatePropertyCache(False) yield from replace_generic_h_queries(query)
[docs]def substructure_matches(mol: Chem.rdchem.Mol, query_mol: Chem.rdchem.Mol, options: Optional[QueryOptions] = None): """ Generates all substructure matches against a given query mol """ options = options or QueryOptions() params = Chem.rdchem.SubstructMatchParameters() params.useChirality = options.stereospecific params.useEnhancedStereo = options.stereospecific for query in expand_query(query_mol, options): yield from mol.GetSubstructMatches(query, params)
[docs]def apply_substructure_coordinates(mol: Chem.rdchem.Mol, template_mol: Chem.rdchem.Mol, options: Optional[QueryOptions] = None): """ Applies coordinates from the provided template to the input mol; used for compound alignment requests in image generation. NOTE: If the substructure match to the template fails, the alignment is skipped altogether, leaving the input mol coordinates as they were """ options = options or QueryOptions() template_mol = next(expand_query(template_mol, options)) params = Chem.rdchem.SubstructMatchParameters() params.useChirality = options.stereospecific params.useEnhancedStereo = options.stereospecific if not mol.HasSubstructMatch(template_mol, params): return rdDepictor.GenerateDepictionMatching2DStructure(mol, template_mol)