Source code for schrodinger.rdkit.substructure
"""
Substructure searching and alignment
Copyright Schrodinger LLC, All Rights Reserved.
"""
from typing import Generator
from typing import Iterable
from typing import NamedTuple
from typing import Optional
from typing import Tuple
from rdkit import Chem
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdMolEnumerator
from rdkit.Chem import rdTautomerQuery
NO_MATCH_ERROR_MSG = "Substructure match with reference not found"
[docs]class QueryOptions(NamedTuple):
"""
:cvar stereospecific: whether to consider stereochemistry and chirality
:cvar fuzzy_aromatic_matching: whether to make aromatic queries less strict
:cvar tautomer_insensitive: whether to consider tautomer insensitivity
"""
stereospecific: bool = True
fuzzy_aromatic_matching: bool = True
tautomer_insensitive: bool = False
# TODO: deprecate once removed from bbchem repo; for now they are no-ops
adjust_conjugated_five_rings: bool = False
adjust_single_bonds_between_aromatic_atoms: bool = False
adjust_single_bonds_to_degree_one_neighbors: bool = False
[docs]def replace_generic_h_queries(query):
"""
Replaces QH, AH, MH, and XH queries with something which works in the RDKit.
Reminder:
- QH = "any atom except carbon"
- AH = "any atom, including H"
- MH = "any metal, or H"
- XH = "halogen or H"
"""
atoms_with_h_queries = []
for atom in query.GetAtoms():
if atom.HasQuery() and atom.GetQueryType() in (
'QH', 'AH', 'MH', 'XH') and atom.GetDegree() == 1:
atoms_with_h_queries.append(atom.GetIdx())
if not atoms_with_h_queries:
return [query]
original_query = Chem.RWMol(query)
original_query.BeginBatchEdit()
all_queries = [original_query]
has_h_query = Chem.AtomFromSmarts('[!H0]')
for query_atom_idx in atoms_with_h_queries:
for i in range(len(all_queries)):
mol = Chem.RWMol(all_queries[i])
nbr = mol.GetAtomWithIdx(query_atom_idx).GetNeighbors()[0]
if nbr.GetIdx() in atoms_with_h_queries:
continue
if nbr.HasQuery():
nbr.ExpandQuery(has_h_query.GetQuery())
else:
# replace neighbor with a query atom that has at least 1 hydrogen
nbr_with_h = Chem.AtomFromSmarts(f'[#{nbr.GetAtomicNum()}!H0]')
mol.ReplaceAtom(nbr.GetIdx(), nbr_with_h)
mol.RemoveAtom(query_atom_idx)
all_queries.append(mol)
for mol in all_queries:
mol.CommitBatchEdit()
return all_queries
[docs]def expand_query(
base_query: Chem.rdchem.Mol,
options: QueryOptions) -> Generator[Chem.rdchem.Mol, None, None]:
"""
Expands a given query, accounting for tautomer matching, link nodes, and
variable bonds. If the substructure options dictate it, each generated
query is also adjusted.
"""
options = options or QueryOptions()
query_params = Chem.AdjustQueryParameters.NoAdjustments()
query_params.adjustConjugatedFiveRings = options.fuzzy_aromatic_matching
query_params.adjustSingleBondsBetweenAromaticAtoms = \
options.fuzzy_aromatic_matching
query_params.adjustSingleBondsToDegreeOneNeighbors = \
options.fuzzy_aromatic_matching
query_params.makeDummiesQueries = True
base_query = Chem.rdmolops.MergeQueryHs(base_query)
query_mols = rdMolEnumerator.Enumerate(base_query) or [base_query]
for mol in query_mols:
if options.tautomer_insensitive:
# initialize RingInfo
Chem.FastFindRings(mol)
try:
# SHARED-8672: When rgroup decomposition options have the
# fuzzy_aromatic_matching and tautomer_insensitive turned
# on, sometimes a kekulization error will be raised when creating
# the tautomer query. If that occurs, ignore the tautomers so that
# the original scaffold can still match
tqry = rdTautomerQuery.TautomerQuery(mol)
query = tqry.GetTemplateMolecule()
except Chem.rdchem.KekulizeException:
query = mol
pass
else:
query = mol
query = Chem.AdjustQueryProperties(query, query_params)
query.UpdatePropertyCache(False)
yield from replace_generic_h_queries(query)
[docs]def substructure_matches(mol: Chem.rdchem.Mol,
query_mol: Chem.rdchem.Mol,
options: Optional[QueryOptions] = None):
"""
Generates all substructure matches against a given query mol
"""
options = options or QueryOptions()
params = Chem.rdchem.SubstructMatchParameters()
params.useChirality = options.stereospecific
params.useEnhancedStereo = options.stereospecific
for query in expand_query(query_mol, options):
yield from mol.GetSubstructMatches(query, params)
[docs]def verify_template_has_coordinates(mol: Chem.rdchem.Mol):
"""
:raise ValueError: if the mol has no coordinates present
"""
if mol.GetNumConformers() == 0:
raise ValueError("Template mol requires coordinates for alignment")
[docs]def apply_substructure_coordinates(mol: Chem.rdchem.Mol,
template_mol: Chem.rdchem.Mol,
options: Optional[QueryOptions] = None):
"""
Applies coordinates from the provided template to the input mol; used for
compound alignment requests in image generation.
:param mol: mol to apply new coordinates
:param template_mol: reference from which to draw coordinates
:param options: query options for substructure matching
:raise ValueError: if the mol has no coordinates present
NOTE: If the substructure match to the template fails, the alignment is
skipped altogether, leaving the input mol coordinates as they were
"""
verify_template_has_coordinates(template_mol)
options = options or QueryOptions()
template_mol = next(expand_query(template_mol, options))
params = Chem.rdchem.SubstructMatchParameters()
params.useChirality = options.stereospecific
params.useEnhancedStereo = options.stereospecific
if not mol.HasSubstructMatch(template_mol, params):
return
rdDepictor.GenerateDepictionMatching2DStructure(mol, template_mol)
[docs]def apply_substructure_coordinates_from_mapping(
mol: Chem.rdchem.Mol, template_mol: Chem.rdchem.Mol,
atom_mapping: Iterable[Tuple[int, int]]):
"""
Applies coordinates from the provided template to the input mol utilizing
an explicit atom mapping
:param mol: mol to apply new coordinates
:param template_mol: reference from which to draw coordinates
:param atom_mapping: (template_idx, mol_idx) pairs to use
:raise ValueError: if the mol has no coordinates present
"""
verify_template_has_coordinates(template_mol)
with rdDepictor.UsingCoordGen(True):
rdDepictor.GenerateDepictionMatching2DStructure(mol,
template_mol,
atomMap=atom_mapping)