Source code for schrodinger.structutils.smiles

"""
A module for generating SMILES and unique SMILES strings.

Provides python access to the classes in the canvaslibs_ext directory.

Copyright Schrodinger, LLC. All rights reserved.

"""
# Contributors: Mike Beachy, Matvey Adzhigirey

import re
import warnings

import schrodinger.application.canvas.utils as canvasutils
from schrodinger.infra.canvas import ChmMmctAdaptor
from schrodinger.infra.canvas import ChmMmctSmilesGenerator
from schrodinger.structure import NO_STEREO
from schrodinger.structure import STEREO_FROM_3D
from schrodinger.structure import STEREO_FROM_ANNOTATION
from schrodinger.structure import STEREO_FROM_ANNOTATION_AND_GEOM
from schrodinger.structure import STEREO_FROM_GEOMETRY

from . import build

try:
    _stereo_enums = set([
        ChmMmctAdaptor.StereoFromGeometry,
        ChmMmctAdaptor.StereoFromGeometry_Safe,
        ChmMmctAdaptor.StereoFromAnnotation,
        ChmMmctAdaptor.StereoFromAnnotation_Safe,
        ChmMmctAdaptor.StereoFromAnnotationAndGeometry,
        ChmMmctAdaptor.StereoFromAnnotationAndGeometry_Safe,
        STEREO_FROM_GEOMETRY,
        STEREO_FROM_ANNOTATION,
        STEREO_FROM_ANNOTATION_AND_GEOM,
        STEREO_FROM_3D,
    ])

except AttributeError:
    raise ImportError(
        "The canvaslibs_ext library is not available for this platform, so the smiles module will not work."
    )

unique_smiles_prop_name = 's_canvas_Unique_SMILES'
unique_smiles_stereo_prop_name = 's_canvas_Unique_SMILES_Stereo'

_mmstereo_re = re.compile("^s_st_(Chirality|EZ|PM|AtomNumChirality)_")

_canvas_license = None


[docs]def get_property_name(stereo): """ Return the default m2io property name for the unique SMILES string with the type of stereochemistry given in 'stereo'. :param stereo: Any of the module level variables NO_STEREO, STEREO_FROM_GEOMETRY, STEREO_FROM_ANNOTATION, or STEREO_FROM_ANNOTATION_AND_GEOM, or values from the actual enum in the ChmMmctAdaptor class. """ if stereo == NO_STEREO or stereo == ChmMmctAdaptor.NoStereo: return unique_smiles_prop_name elif stereo in _stereo_enums: return unique_smiles_stereo_prop_name else: raise Exception("Unrecognized stereochemistry type: '%s'" % stereo)
def _determine_stereo_option(struct, safe=True): """ Look for mmstereo properties in the provide Structure. If mmstereo properties are found, return the type of stereo determination to use as ChmMmctAdaptor.StereoFromAnnotation. Otherwise, return ChmMmctAdaptor.StereoFromGeometry. Note: the stereo annotations can be stale. """ for p in struct.property: if _mmstereo_re.match(p): if safe: return ChmMmctAdaptor.StereoFromAnnotation_Safe else: return ChmMmctAdaptor.StereoFromAnnotation if safe: return ChmMmctAdaptor.StereoFromGeometry_Safe else: return ChmMmctAdaptor.StereoFromGeometry
[docs]def remove_stereo_annotation(struct): """ There is no current option in the SmilesGenerator that can be used to determine stereochemical information from the 3d geometry only. The STEREO_FROM_3D option does not override any stereochemical annotations that are already present in a structure. If you want the stereochemistry to be determined by the 3d geometry only, use this function to remove any existing stereochemical annotations. This is a recognized issue that will be addressed in future releases in a backwards compatible way. """ property_names = list(struct.property) for pname in property_names: if _mmstereo_re.match(pname): del (struct.property[pname])
def _translate_stereo_enum(stereo, safe=True): """ This function translates a module sterechemistry constant into its ChmMmctAdaptor enum equivalent. """ if stereo == STEREO_FROM_GEOMETRY: if safe: return ChmMmctAdaptor.StereoFromGeometry_Safe else: return ChmMmctAdaptor.StereoFromGeometry elif stereo == STEREO_FROM_ANNOTATION: if safe: return ChmMmctAdaptor.StereoFromAnnotation_Safe else: return ChmMmctAdaptor.StereoFromAnnotation elif (stereo == STEREO_FROM_ANNOTATION_AND_GEOM or stereo == STEREO_FROM_3D): if stereo == STEREO_FROM_3D: warnings.warn( "The STEREO_FROM_3D is deprecated in favor of " "STEREO_FROM_ANNOTATION_AND_GEOM", DeprecationWarning, stacklevel=3) if safe: return ChmMmctAdaptor.StereoFromAnnotationAndGeometry_Safe else: return ChmMmctAdaptor.StereoFromAnnotationAndGeometry elif stereo == NO_STEREO: return ChmMmctAdaptor.NoStereo else: raise ValueError("SmilesGenerator: invalid stereo option: %s" % stereo)
[docs]class SmilesGenerator(object): """ A class to generate a SMILES string from a Structure object. This is just a thin wrapper to the canvaslibs_ext classes. """
[docs] def __init__(self, stereo=STEREO_FROM_ANNOTATION_AND_GEOM, unique=True, safe=True, wantAllH=False, forceAllBondOrders=False, wildcardAllAtoms=False): """ Construct a SmilesGenerator with specific behavior for stereochemistry and unique smiles. :param stereo: This should be set to one of the module level constants and will specify behavior in the getSmiles method. Can be one of the following (default is STEREO_FROM_ANNOTATION_AND_GEOM): - STEREO_FROM_ANNOTATION_AND_GEOM - Derive stereochemistry from annotations, but use the 3D coordinates when no annotation is present. This is the same behavior as the old STEREO_FROM_3D option, which is deprecated. - STEREO_FROM_ANNOTATION - Derive stereochemistry from pre-existing mmstereo properties (faster, so useful when structures are known to be 2D). - STEREO_FROM_GEOMETRY - Derive stereochemistry from the 3D coordinates only (for 3D structures). Annotations are used for 2D structures. - NO_STEREO - Don't include stereochemistry. :type unique: bool :param unique: If True, generate unique (a.k.a. canonical) SMILES. :type safe: bool :param safe: If True, use only stereochemistry from mmstereo that is deemed "safe" by the Canvas libraries. If False, use all stereochemistry info from mmstereo. This is relevant for the STEREO_FROM_GEOMETRY, STEREO_FROM_ANNOTATION, and STEREO_FROM_ANNOTATION_AND_GEOM options of the stereo argument. :type wantAllH: bool :param wantAllH: If True, each hydrogen receives its own SMILES token :type forceAllBondOrders: bool :param forceAllBondOrders: If True, all bond orders in the SMILES will be explicit. By default, aromatic and single bond orders (C-C, c:c) are suppressed. :type wildcardAllAtoms: bool :param wildcardAllAtoms: If True, all heavy atoms will appear as asterisks when calling getSmiles(). Ignored when calling getSmilesAndMap() """ global _canvas_license if _canvas_license is None: _canvas_license = canvasutils.get_license( canvasutils.LICENSE_SHARED) self.smiles_generator = ChmMmctSmilesGenerator() self.stereo = _translate_stereo_enum(stereo, safe) self.unique = unique self.safe = safe self.wildcardAllAtoms = wildcardAllAtoms self.smiles_generator.setForceHydrogens(wantAllH) self.smiles_generator.setForceAllBondOrders(forceAllBondOrders)
[docs] def getSmiles(self, struct): """ Returns a SMILES string for a structure. Use the wantAllH option when initializing the SmilesGenerator instance if hydrogens are needed. :param struct: The Structure object from which to generate the SMILES string. """ ct_handle = struct.handle stereo = self.stereo wildcard = self.wildcardAllAtoms if self.unique: return self.smiles_generator.getUniqueSmiles( ct_handle, stereo, wildcard) else: return self.smiles_generator.getSmiles(ct_handle, stereo, wildcard)
[docs] def getSmilesAndMap(self, struct): """ Returns a SMILES string and index mapping of the atoms in a structure. Use the wantAllH option when initializing the SmilesGenerator instance to speicify whether hydrogens should be included (default is to include heavy atoms only). :param struct: The Structure object from which to generate the SMILES string. :rtype: (str, list) :return: SMILES string, and a list of new atom indices, which can be passed directly to build.reorder_atoms(). """ ct_handle = struct.handle stereo = self.stereo if self.unique: return self.smiles_generator.getUniqueSmilesAndMap( ct_handle, stereo) else: return self.smiles_generator.getSmilesAndMap(ct_handle, stereo)
[docs] def getStandardizedSmiles(self, struc): """ Get a SMILES string representing the standardized version of a structure by neutralizing the structure first. This ensures different ionization states of the same compound produce the same SMILES. Different tautomers will still generate different SMILES. To check whether different input structures are tautomers of each other, analyze.generate_tautomer_code() can be used. :param struc: Structure to get the standardized SMILES string for. :type struc: schrodinger.strucgture.Structure :return: Standardized SMILES string :rtype: str """ for atom in struc.atom: atom.property['b_is_orig_atom'] = True build.add_hydrogens(struc) neut_st = build.neutralize_structure(struc) # We need to remove the hydrodgens added prior to neutralization # before generating SMILES because the added hydrogens may cause # us to lose information about which stereo centers were defined # and which weren't. def delete_added_hydrogens(st): delete_atoms = [] for atom in st.atom: if atom.property.get('b_is_orig_atom'): del atom.property['b_is_orig_atom'] else: delete_atoms.append(atom.index) st.deleteAtoms(delete_atoms) delete_added_hydrogens(struc) delete_added_hydrogens(neut_st) return self.getSmiles(neut_st)
[docs] def getUniqueOrder(self, struct): """ Returns a canonicalized ordering of atoms in the given structure. NOTE: Structure MUST contain all hydrogens. NOTE: Uniqueness process does not consider atom coordinates, so symmetrically equivalent atoms will have arbitrarily assigned ordering. In other words, identical conformers are likely to have a >0 RMSD after renumbering. :type struct: `structure.Structure` :param struct: The Structure object from which to generate the SMILES string. :rtype: list(int) :return: List of canonically ordered atom indices, which can be passed directly to build.reorder_atoms(). """ order = self.smiles_generator.getUniqueOrder(struct.handle) return list(order)
[docs] def canonicalize(self, pattern): """ Return canonicalized (unique) version of the specified SMILES string. """ return self.smiles_generator.canonicalize(pattern)
#EOF