Source code for schrodinger.livedesign.rgroup_decomposition

"""
Collection of functions used for RGroup Decomposition.

Copyright Schrodinger, LLC. All rights reserved.
"""

from typing import List

from rdkit import Chem
from rdkit.Chem import rdmolops
from rdkit.Chem import rdRGroupDecomposition

from schrodinger.rdkit import substructure
from schrodinger.livedesign.molhash import ATOM_PROP_MAP_NUMBER
from schrodinger.livedesign.preprocessor import ATOM_PROP_ATOM_LABEL
from schrodinger.livedesign.preprocessor import ATOM_PROP_DUMMY_LABEL
from schrodinger.livedesign.preprocessor import MOL_PROP_ATTACHPT
from schrodinger.livedesign.preprocessor import MOL_PROP_R_LABEL

ATOM_PROP_MATCH_TO_RG = 'match_to_rg'


[docs]def replace_rgroups_with_dummy_atoms(mol: Chem.rdchem.Mol) -> Chem.rdchem.Mol: updated_rgroup = Chem.Mol(mol) for at in updated_rgroup.GetAtoms(): if at.HasProp(ATOM_PROP_MAP_NUMBER) and at.HasProp( ATOM_PROP_DUMMY_LABEL) and at.HasProp(MOL_PROP_R_LABEL): at.ClearProp(ATOM_PROP_MAP_NUMBER) at.ClearProp(ATOM_PROP_DUMMY_LABEL) at.ClearProp(MOL_PROP_R_LABEL) at.SetAtomicNum(0) # dummy atoms should be represented as * in sdfs params = rdmolops.AdjustQueryParameters.NoAdjustments() params.makeDummiesQueries = True return rdmolops.AdjustQueryProperties(updated_rgroup, params)
def _get_rlabel_bundles(edit_core): """ Returns a set of tuples containing the r labels on each fragment of the provided core. For example, a core that looks like: R1 - C - R3 R2 - C - C - R4 would return {(1, 3), (2, 4)} """ visited = [False] * edit_core.GetNumAtoms() rlabel_bundles = set() for at in edit_core.GetAtoms(): if visited[at.GetIdx()]: continue q = [at.GetIdx()] current_bundle = [] while q: idx = q.pop() if not visited[idx]: visited[idx] = True edit_core_atom = edit_core.GetAtomWithIdx(idx) nbrs = [ neigh.GetIdx() for neigh in edit_core_atom.GetNeighbors() ] q.extend(nbrs) try: r_label = edit_core_atom.GetIntProp(MOL_PROP_R_LABEL) except KeyError: continue current_bundle.append(r_label) rlabel_bundles.add(tuple(current_bundle)) return rlabel_bundles def _join_attchpts_at_atom(edit_core, attch_pts, rg_num): edit_core.BeginBatchEdit() new_at_idx = edit_core.AddAtom(Chem.AtomFromSmarts('*')) for attch_pt in attch_pts: for neighbor in edit_core.GetAtomWithIdx(attch_pt).GetNeighbors(): old_bond = edit_core.GetBondBetweenAtoms(attch_pt, neighbor.GetIdx()) edit_core.AddBond(neighbor.GetIdx(), new_at_idx, old_bond.GetBondType()) # ensure conjugated and aromatic properties are the same as in the original # core, otherwise the stitched core will not match with the original new_bond = edit_core.GetBondBetweenAtoms(neighbor.GetIdx(), new_at_idx) new_bond.SetIsConjugated(old_bond.GetIsConjugated()) new_bond.SetIsAromatic(old_bond.GetIsAromatic()) edit_core.RemoveBond(attch_pt, neighbor.GetIdx()) edit_core.RemoveAtom(attch_pt) edit_core.GetAtomWithIdx(new_at_idx).SetIntProp(MOL_PROP_R_LABEL, int(rg_num[1::])) edit_core.CommitBatchEdit() return edit_core def _close_partial_rings(edit_core, attch_pts, rg_num): """ Closes rings that were closed on the original scaffold. :param edit_core: core split at rgroups :param attch_pts: atoms with attachment points to join together :param rg_num: rgroup number :return: core with partial rings closed """ join_atoms = dict() join_atoms_together = None for at_idx in attch_pts: at = edit_core.GetAtomWithIdx(at_idx) if at.HasProp(ATOM_PROP_MATCH_TO_RG): rlabel = at.GetIntProp(ATOM_PROP_MATCH_TO_RG) if rlabel in join_atoms: join_atoms_together = [join_atoms[rlabel], at_idx] break join_atoms[rlabel] = at_idx if join_atoms_together is None: # no ring to close return edit_core return _join_attchpts_at_atom(edit_core, join_atoms_together, rg_num) def _rgroup_includes_ring_bonds(rlabels, rlabel_bundles, rg): # check if rgroup closes a ring if not any( [set(rlabels).issubset(set(bundle)) for bundle in rlabel_bundles]): return False # check if one atom has two attachment points for at in rg.GetAtoms(): if len([ neigh for neigh in at.GetNeighbors() if neigh.HasProp(MOL_PROP_R_LABEL) ]) == 2: return False return True def _get_stitched_core(res): """ Reconstructs the core returned from rgroup decomposition to match the original core by stitching the core back together at the internal rgroups. """ # Each 'bundle' is a set of rlabels on some fragment of the core rlabel_bundles = _get_rlabel_bundles(res['Core']) edit_core = Chem.RWMol(res['Core']) existing_rgroup_labels = set() for rg_num, rg in res.items(): if rg_num == 'Core': continue # Skip if this rgroup has already been considered rlabels = set() for at in rg.GetAtoms(): try: rlabels.add(at.GetIntProp(MOL_PROP_R_LABEL)) except KeyError: pass rlabels = tuple(sorted(rlabels)) if rlabels not in existing_rgroup_labels: existing_rgroup_labels.add(rlabels) else: continue # Skip terminal RGroups if len(rlabels) == 1: continue # Find where rgroup belongs attch_pts = [] for at in edit_core.GetAtoms(): try: if at.GetIntProp(MOL_PROP_R_LABEL) in rlabels: attch_pts.append(at.GetIdx()) except KeyError: pass # Replace rgroup with a single atom if that was the structure of the # original core if attch_pts: if not _rgroup_includes_ring_bonds(rlabels, rlabel_bundles, rg): edit_core = _join_attchpts_at_atom(edit_core, attch_pts, rg_num) else: edit_core = _close_partial_rings(edit_core, attch_pts, rg_num) return edit_core def _make_unspecified_bonds_queries(mol): """ Replaces bonds with unspecified types with an 'any' query bond """ edit_mol = Chem.RWMol(mol) query_bond = Chem.BondFromSmarts('~') for bnd in edit_mol.GetBonds(): if bnd.GetBondType() == Chem.rdchem.BondType.UNSPECIFIED: edit_mol.ReplaceBond(bnd.GetIdx(), query_bond) return edit_mol.GetMol() def _remove_duplicate_rgroups(original_scaffold, res): # Stitch core back together then match it to the original scaffold to # determine which RGroup in the result corresponds to which RGroup in the # original scaffold. stitched_core = _get_stitched_core(res) original_scaffold = _make_unspecified_bonds_queries(original_scaffold) stitched_core = _make_unspecified_bonds_queries(stitched_core) new_res = {'Core': original_scaffold} match = stitched_core.GetSubstructMatch(original_scaffold) if len(match) != stitched_core.GetNumAtoms(): return None for at in original_scaffold.GetAtoms(): if at.HasProp(MOL_PROP_R_LABEL): new_core_at = stitched_core.GetAtomWithIdx(match[at.GetIdx()]) rgroup_label = f'R{new_core_at.GetProp(MOL_PROP_R_LABEL)}' new_res[f'R{at.GetProp(MOL_PROP_R_LABEL)}'] = res[rgroup_label] return new_res def _replace_core_query(cores): """ Split each of the cores at the rgroups to allow for RGroup decomposition when an rgroup is 'inside' a scaffold """ scaff_split = False new_cores = [] for core in cores: # find dummy atoms with a degree>1 that are not rgroup labels core_query = Chem.MolFromSmarts('[#0D{2-}]') matches = [ at for at in core.GetSubstructMatches(core_query) if core.GetAtomWithIdx(at[0]).HasProp(MOL_PROP_R_LABEL) ] if not matches: new_cores.append(core) continue res = Chem.RWMol(core) null_query = Chem.AtomFromSmarts('*') atoms_removed = [] next_rgroup = len( [at for at in core.GetAtoms() if at.HasProp(MOL_PROP_R_LABEL)]) + 1 res.BeginBatchEdit() for match in matches: midx = match[0] match = res.GetAtomWithIdx(midx) nbrs = match.GetNeighbors() first = True for nbr in nbrs: if nbr.GetIdx() not in atoms_removed: idx = res.AddAtom(Chem.Atom(0)) atom = res.GetAtomWithIdx(idx) if first: atom.SetProp(ATOM_PROP_DUMMY_LABEL, match.GetProp(ATOM_PROP_DUMMY_LABEL)) atom.SetIntProp(MOL_PROP_R_LABEL, match.GetIntProp(MOL_PROP_R_LABEL)) first = False else: atom.SetProp(ATOM_PROP_DUMMY_LABEL, f'R{next_rgroup}') atom.SetIntProp(MOL_PROP_R_LABEL, next_rgroup) next_rgroup += 1 atom.SetIntProp(ATOM_PROP_MATCH_TO_RG, match.GetIntProp(MOL_PROP_R_LABEL)) bond = res.GetBondBetweenAtoms(nbr.GetIdx(), midx) new_bond_idx = res.AddBond(nbr.GetIdx(), idx, bond.GetBondType()) res.GetBondBetweenAtoms(nbr.GetIdx(), idx).SetIsConjugated( bond.GetIsConjugated()) res.RemoveBond(nbr.GetIdx(), midx) res.RemoveAtom(midx) atoms_removed.append(midx) res.CommitBatchEdit() new_cores.append(res) scaff_split = True return new_cores, scaff_split def _is_atom_conjugated(atom): for bnd in atom.GetBonds(): if bnd.GetIsConjugated(): return True return False def _adjust_bonds_to_degree_one_neighbors_of_conjugated_atoms(scaff): res = Chem.RWMol(scaff) for atom in scaff.GetAtoms(): if atom.GetDegree() != 1: continue bond = atom.GetBonds()[0] if bond.GetBondType( ) != Chem.BondType.SINGLE or not _is_atom_conjugated( bond.GetOtherAtom(atom)): continue qb = Chem.BondFromSmarts('-,=,:') res.ReplaceBond(bond.GetIdx(), qb) return res def _clean_bonds_to_attachment_points(match): for k, mol in match.items(): if k == 'Core': continue attch_pt_num = 1 for at in mol.GetAtoms(): # Assign correct attachment point properties if at.HasProp(MOL_PROP_ATTACHPT): at.ClearProp(MOL_PROP_ATTACHPT) if at.HasProp(MOL_PROP_R_LABEL): at.SetProp(ATOM_PROP_ATOM_LABEL, f'_AP{attch_pt_num}') attch_pt_num += 1 # All bonds to attachment points should be single, nonaromatic # bonds for consistency with read from .sdf for bnd in at.GetBonds(): bnd.SetIsAromatic(False) bnd.SetBondType(Chem.rdchem.BondType.SINGLE) return match def _rgroup_is_single_non_graph_hydrogen(rgroup): if rgroup.GetNumAtoms() != 2: return False hydrogen_at = None if rgroup.GetAtomWithIdx(0).GetAtomicNum() == 1: hydrogen_at = rgroup.GetAtomWithIdx(0) elif rgroup.GetAtomWithIdx(1).GetAtomicNum() == 1: hydrogen_at = rgroup.GetAtomWithIdx(1) else: return False if not hydrogen_at.HasProp(ATOM_PROP_MAP_NUMBER): # not in match mol graph return True return False
[docs]def get_scaff_without_non_graph_hydrogen_rgroups(decomp): """ Removes non-graph hydrogen rgroups from the given scaffold so that the scaffold coordinates can be used to align the match mol. """ rgroup_numbers_to_remove = [] for label, rgroup in decomp.items(): if label == 'Core': continue if _rgroup_is_single_non_graph_hydrogen(rgroup): rgroup_numbers_to_remove.append(int(label[1::])) if not rgroup_numbers_to_remove: return decomp['Core'] rwscaff = Chem.RWMol(decomp['Core']) rwscaff.BeginBatchEdit() for at in rwscaff.GetAtoms(): if at.HasProp(MOL_PROP_R_LABEL) and at.GetIntProp( MOL_PROP_R_LABEL) in rgroup_numbers_to_remove: rwscaff.RemoveAtom(at.GetIdx()) rwscaff.CommitBatchEdit() return Chem.Mol(rwscaff)
[docs]def assert_valid_scaffold(scaffold_mol: Chem.rdchem.Mol): """ Determines if provided scaffold is valid by checking for adjacent rgroups and attachment points. """ for at in scaffold_mol.GetAtoms(): if at.HasProp(MOL_PROP_R_LABEL): for neigh in at.GetNeighbors(): if neigh.HasProp(MOL_PROP_R_LABEL): raise ValueError( "Invalid scaffold; rgroups cannot be adjacent") elif at.HasProp(ATOM_PROP_ATOM_LABEL) and at.GetProp( ATOM_PROP_ATOM_LABEL).startswith("_AP"): raise ValueError( "Invalid scaffold; scaffolds cannot have attachment points")
[docs]def get_rgroup_decomp(scaffold_mol: Chem.rdchem.Mol, match_mol: Chem.rdchem.Mol, options: substructure.QueryOptions) -> List[dict]: # SHARED-8298: Scaffolds cannot have adjacent RGroups assert_valid_scaffold(scaffold_mol) scaffold_copy = Chem.Mol(scaffold_mol) if not options.stereospecific: # remove bond stereochemistry and chiral tags from scaffold_mol Chem.RemoveStereochemistry(scaffold_copy) scaffolds = [scaffold_copy] if options.fuzzy_aromatic_matching: scaffold_copy = _adjust_bonds_to_degree_one_neighbors_of_conjugated_atoms( scaffold_copy) scaffolds = [m for m in substructure.expand_query(scaffold_copy, options)] scaffolds, scaff_split = _replace_core_query(scaffolds) decomp_params = rdRGroupDecomposition.RGroupDecompositionParameters() decomp_params.onlyMatchAtRGroups = True decomp_params.allowNonTerminalRGroups = True decomp_params.substructMatchParams.useEnhancedStereo = True decomp_params.removeAllHydrogenRGroups = False decomp_params.removeAllHydrogenRGroupsAndLabels = False matches, no_match = rdRGroupDecomposition.RGroupDecompose( scaffolds, [match_mol], options=decomp_params) if no_match: return None # Otherwise return the first result with duplicate rgroups removed match = matches.pop(0) match = _clean_bonds_to_attachment_points(match) if scaff_split: match = _remove_duplicate_rgroups(Chem.Mol(scaffold_mol), match) # ensure core returned is the provided scaffold if match is not None: match['Core'] = Chem.Mol(scaffold_mol) return match