Source code for schrodinger.rdkit.substructure

"""
Substructure searching and alignment

Copyright Schrodinger LLC, All Rights Reserved.
"""

from typing import Generator
from typing import Iterable
from typing import NamedTuple
from typing import Optional
from typing import Tuple

from rdkit import Chem
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdMolEnumerator
from rdkit.Chem import rdTautomerQuery

NO_MATCH_ERROR_MSG = "Substructure match with reference not found"


[docs]class QueryOptions(NamedTuple): """ :cvar stereospecific: whether to consider stereochemistry and chirality :cvar fuzzy_aromatic_matching: whether to make aromatic queries less strict :cvar tautomer_insensitive: whether to consider tautomer insensitivity """ stereospecific: bool = True fuzzy_aromatic_matching: bool = True tautomer_insensitive: bool = False # TODO: deprecate once removed from bbchem repo; for now they are no-ops adjust_conjugated_five_rings: bool = False adjust_single_bonds_between_aromatic_atoms: bool = False adjust_single_bonds_to_degree_one_neighbors: bool = False
[docs]def replace_generic_h_queries(query): """ Replaces QH, AH, MH, and XH queries with something which works in the RDKit. Reminder: - QH = "any atom except carbon" - AH = "any atom, including H" - MH = "any metal, or H" - XH = "halogen or H" """ atoms_with_h_queries = [] for atom in query.GetAtoms(): if atom.HasQuery() and atom.GetQueryType() in ( 'QH', 'AH', 'MH', 'XH') and atom.GetDegree() == 1: atoms_with_h_queries.append(atom.GetIdx()) if not atoms_with_h_queries: return [query] original_query = Chem.RWMol(query) original_query.BeginBatchEdit() all_queries = [original_query] has_h_query = Chem.AtomFromSmarts('[!H0]') for query_atom_idx in atoms_with_h_queries: for i in range(len(all_queries)): mol = Chem.RWMol(all_queries[i]) nbr = mol.GetAtomWithIdx(query_atom_idx).GetNeighbors()[0] if nbr.GetIdx() in atoms_with_h_queries: continue if nbr.HasQuery(): nbr.ExpandQuery(has_h_query.GetQuery()) else: # replace neighbor with a query atom that has at least 1 hydrogen nbr_with_h = Chem.AtomFromSmarts(f'[#{nbr.GetAtomicNum()}!H0]') mol.ReplaceAtom(nbr.GetIdx(), nbr_with_h) mol.RemoveAtom(query_atom_idx) all_queries.append(mol) for mol in all_queries: mol.CommitBatchEdit() return all_queries
[docs]def expand_query( base_query: Chem.rdchem.Mol, options: QueryOptions) -> Generator[Chem.rdchem.Mol, None, None]: """ Expands a given query, accounting for tautomer matching, link nodes, and variable bonds. If the substructure options dictate it, each generated query is also adjusted. """ options = options or QueryOptions() query_params = Chem.AdjustQueryParameters.NoAdjustments() query_params.adjustConjugatedFiveRings = options.fuzzy_aromatic_matching query_params.adjustSingleBondsBetweenAromaticAtoms = \ options.fuzzy_aromatic_matching query_params.adjustSingleBondsToDegreeOneNeighbors = \ options.fuzzy_aromatic_matching query_params.makeDummiesQueries = True base_query = Chem.rdmolops.MergeQueryHs(base_query) query_mols = rdMolEnumerator.Enumerate(base_query) or [base_query] for mol in query_mols: if options.tautomer_insensitive: # initialize RingInfo Chem.FastFindRings(mol) try: # SHARED-8672: When rgroup decomposition options have the # fuzzy_aromatic_matching and tautomer_insensitive turned # on, sometimes a kekulization error will be raised when creating # the tautomer query. If that occurs, ignore the tautomers so that # the original scaffold can still match tqry = rdTautomerQuery.TautomerQuery(mol) query = tqry.GetTemplateMolecule() except Chem.rdchem.KekulizeException: query = mol pass else: query = mol query = Chem.AdjustQueryProperties(query, query_params) query.UpdatePropertyCache(False) yield from replace_generic_h_queries(query)
[docs]def substructure_matches(mol: Chem.rdchem.Mol, query_mol: Chem.rdchem.Mol, options: Optional[QueryOptions] = None): """ Generates all substructure matches against a given query mol """ options = options or QueryOptions() params = Chem.rdchem.SubstructMatchParameters() params.useChirality = options.stereospecific params.useEnhancedStereo = options.stereospecific for query in expand_query(query_mol, options): yield from mol.GetSubstructMatches(query, params)
[docs]def verify_template_has_coordinates(mol: Chem.rdchem.Mol): """ :raise ValueError: if the mol has no coordinates present """ if mol.GetNumConformers() == 0: raise ValueError("Template mol requires coordinates for alignment")
[docs]def apply_substructure_coordinates(mol: Chem.rdchem.Mol, template_mol: Chem.rdchem.Mol, options: Optional[QueryOptions] = None): """ Applies coordinates from the provided template to the input mol; used for compound alignment requests in image generation. :param mol: mol to apply new coordinates :param template_mol: reference from which to draw coordinates :param options: query options for substructure matching :raise ValueError: if the mol has no coordinates present NOTE: If the substructure match to the template fails, the alignment is skipped altogether, leaving the input mol coordinates as they were """ verify_template_has_coordinates(template_mol) options = options or QueryOptions() template_mol = next(expand_query(template_mol, options)) params = Chem.rdchem.SubstructMatchParameters() params.useChirality = options.stereospecific params.useEnhancedStereo = options.stereospecific if not mol.HasSubstructMatch(template_mol, params): return rdDepictor.GenerateDepictionMatching2DStructure(mol, template_mol)
[docs]def apply_substructure_coordinates_from_mapping( mol: Chem.rdchem.Mol, template_mol: Chem.rdchem.Mol, atom_mapping: Iterable[Tuple[int, int]]): """ Applies coordinates from the provided template to the input mol utilizing an explicit atom mapping :param mol: mol to apply new coordinates :param template_mol: reference from which to draw coordinates :param atom_mapping: (template_idx, mol_idx) pairs to use :raise ValueError: if the mol has no coordinates present """ verify_template_has_coordinates(template_mol) with rdDepictor.UsingCoordGen(True): rdDepictor.GenerateDepictionMatching2DStructure(mol, template_mol, atomMap=atom_mapping)