Source code for schrodinger.application.scaffold_enumeration.posvarbond

'''
Implements "position variant bond" enumeration (see ENUM-252).
'''

import collections
import itertools

import rdkit.Chem

from schrodinger.utils import log

from . import common

logger = log.get_output_logger(__name__)

#------------------------------------------------------------------------------#

# * "multicenter S-group" (MCG) is a pair of "group" and "center";
#   "group" is a set of atoms, "center" is a (dummy) atom with
#   exactly one bond ("center" cannot be a member of the "group");
#
# * multiple MCGs can be associated with a molecule; "center" of any
#   MCG may not belong to a "group" in any other MCG; "groups" from
#   different MCGs may overlap
#
# * "center" atom may not be bonded to any of the "group" atoms in its
#   MCG or "center" atom of any other MCG
#
# * "position variant bond" (PVB) is defined by "multicenter S-group": in a
#   specific PVB realization, the single bond of the MCG "center" is
#   re-connected from the "center" (dummy) to one of the "group" atoms,
#   and the "center" itself is removed

MulticenterSgroup = collections.namedtuple('MulticenterSgroup',
                                           ['atoms', 'center'])

#------------------------------------------------------------------------------#


def _is_multicenter_sgroup(g):
    '''
    :param g: MRV S-group as dictionary.
    :type g: dict
    '''

    return g.get('role', '') == 'MulticenterSgroup'


#------------------------------------------------------------------------------#


def _drop_multicenter_sgroups(mol):
    '''
    Remove multicenter S-groups from the S-groups associated with `mol`.
    '''

    common.set_sgroups(
        mol,
        [g for g in common.get_sgroups(mol) if not _is_multicenter_sgroup(g)])


#------------------------------------------------------------------------------#


def _collect_posvarbonds(mol):
    '''
    Collects "position variant bond" specs from the molecule.

    :return: List of "multicenter S-groups".
    :rtype: list(MulticenterSgroup)
    '''

    a2idx = common.get_atom_id_map(mol)

    outcome = []

    for g in common.get_sgroups(mol):
        if _is_multicenter_sgroup(g):
            try:
                atom_ids = set(g['atomRefs'].split())
                center_id = g['center']
            except KeyError:
                logger.warning('incomplete multicenter S-group: %s', g)
                continue
            try:
                atoms = sorted(a2idx[a] for a in atom_ids)
                center = a2idx[center_id]
            except KeyError:
                logger.warning(
                    'undefined atom id(s) in multicenter S-group: %s', g)
                continue
            outcome.append(MulticenterSgroup(atoms, center))

    return outcome


#------------------------------------------------------------------------------#


def _validate_posvarbonds(mol, posvarbonds):
    '''
    Validates "position variant bonds" (defined by "multicenter S-groups").

    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol

    :param posvarbonds: List of multicenter S-groups.
    :type posvarbonds: list(MulticenterSgroup)

    :return: Validation success and error message.
    :rtype: (bool, str)
    '''

    centers = set()
    for pvb in posvarbonds:
        if pvb.center in centers:
            return False, 'shared "center" is not allowed'
        else:
            centers.add(pvb.center)

    num_atoms = mol.GetNumAtoms()

    for pvb in posvarbonds:
        if not pvb.atoms:
            return False, 'no atoms'

        if len(set(pvb.atoms)) != len(pvb.atoms):
            return False, 'duplicate atoms'

        for i in itertools.chain(pvb.atoms, (pvb.center,)):
            if not (i >= 0 and i < num_atoms):
                return False, 'invalid atom indices'

        if set(pvb.atoms) & centers:
            return False, 'nested multicenter S-groups are not allowed'

        center_atom = mol.GetAtomWithIdx(pvb.center)
        if center_atom.GetDegree() != 1:
            return False, 'center of a multicenter S-group is not of degree one'

        other_atom = center_atom.GetBonds()[0].GetOtherAtomIdx(pvb.center)

        if other_atom in pvb.atoms:
            return False, 'self-bonded position variant bond'

        if other_atom in centers:
            return (False, 'position variant bond between multicenter '
                    'S-groups is not allowed')

    return True, ''


#------------------------------------------------------------------------------#


def _apply_posvarbond(rwmol, pvbond, i):
    '''
    Creates i-th realization of the `pvbond`. Does not remove the
    "multicenter S-group" center atom.

    :param rwmol: Molecule.
    :type rwmol: rdkit.Chem.RWMol

    :param pvbond: Position variant bond metadata.
    :type pvbond: MulticenterSgroup

    :param i: Index of the desired realization (0 <= i < len(pvbond.atoms)).
    :type i: int
    '''

    center_atom = rwmol.GetAtomWithIdx(pvbond.center)
    center_bond = center_atom.GetBonds()[0]

    atom1_idx = center_bond.GetOtherAtomIdx(pvbond.center)
    atom2_idx = pvbond.atoms[i]
    num_bonds = rwmol.AddBond(atom1_idx, atom2_idx)

    rwmol.ReplaceBond(num_bonds - 1, center_bond)


#------------------------------------------------------------------------------#


[docs]class PosVarBondEnumerable(common.EnumerableMixin):
[docs] def __init__(self, mol, pvbonds=None): ''' :param mol: RDKit molecule. :type mol: rdkit.Chem.Mol :param pvbonds: List of position variant bonds. :type pvbonds: list(MulticenterSgroup) ''' if pvbonds is None: pvbonds = _collect_posvarbonds(mol) valid, msg = _validate_posvarbonds(mol, pvbonds) if not valid: raise ValueError('PosVarBondEnumerable: ' + msg) self.pvbonds = sorted(pvbonds, key=lambda pvb: pvb.center, reverse=True) self.mol = mol
[docs] def getExtents(self): return [len(b.atoms) for b in self.pvbonds]
[docs] def getRealization(self, idx): ''' :param idx: "Index" of a realization. :type idx: iterable over int :return: RDKit molecule without "position variant bonds". :rtype: rdkit.Chem.Mol ''' if self.pvbonds: rwmol = rdkit.Chem.RWMol(self.mol) _drop_multicenter_sgroups(rwmol) for (i, pvb) in zip(idx, self.pvbonds): _apply_posvarbond(rwmol, pvb, i) # remove "centers" for pvb in self.pvbonds: rwmol.RemoveAtom(pvb.center) return rwmol.GetMol() else: return self.mol
#------------------------------------------------------------------------------#