Source code for schrodinger.application.canvas.fingerprint

"""

Higher-level wrappers to the Canvas Fingerprint generation and manipulation
classes.

Copyright Schrodinger, LLC. All rights reserved.

"""

# Contributors: Quentin McDonald

from textwrap import dedent

from schrodinger.infra import canvas

############# Canvas classes begin here ##################################


[docs]class CanvasFingerprintGenerator(object): """ A class to encapsulate canvas finger print generation. Fingerprints may be generated and returned as Fingerprint objects or may be written to a file. """ FINGERPRINT_TYPES = [ 'Linear', 'Radial', 'MolPrint2D', 'Atom Pairs', 'Atom Triplets', 'Topological Torsions', 'Dendritic' ] SHORT_FINGERPRINT_TYPES = [ 'linear', 'radial', 'molprint2D', 'pairwise', 'triplet', 'torsion', 'dendritic' ] ATOM_TYPING_SCHEMES = [ " 1. All atoms equivalent; all bonds equivalent.", " 2. Atoms distinguished by HB acceptor/donor; all bonds equivalent.", " 3. Atoms distinguished by hybridization state; all bonds equivalent", " 4. Atoms distinguished by functional type: {H}, {C}, {F,Cl}, {Br,I}, {N,0}, {S}, {other}; bonds by hybridization.", " 5. Mol2 atom types; all bonds equivalent.", " 6. Atoms distinguished by whether terminal, halogen, HB acceptor/donor bonds distinguished by bond order", " 7. Atomic number and bond order", " 8. Atoms distinguished by ring size, aromaticity, HB acceptor/donor, ionization potential, whether terminal, whether halogen; bonds distinguished by bond order", " 9. Carhart atom types (atom-pairs approach); all bonds equivalent.", "10. Daylight invariant atom types; bonds distinguished by bond order.", "11. Same as 7, but aromatic distinguished from non-aromatic", "12. Same as 10, but cyclic aliphatic distinguished from acyclic aliphatic" ] PRECISION = [32, 64] # Record the default atom typing scheme for each fingerprint type. Note # that dendritic defaults to the linear atom typing scheme. DEFAULT_ATOM_TYPING_SCHEMES = { "linear": canvas.DEFAULT_LINEAR_ATOM_TYPING_STYLE, "radial": canvas.DEFAULT_RADIAL_ATOM_TYPING_STYLE, "torsion": canvas.DEFAULT_TORSIONAL_ATOM_TYPING_STYLE, "pairwise": canvas.DEFAULT_PAIRWISE_ATOM_TYPING_STYLE, "triplet": canvas.DEFAULT_TRIPLET_ATOM_TYPING_STYLE, "molprint2D": canvas.DEFAULT_MOLPRINT2D_ATOM_TYPING_STYLE, "dendritic": canvas.DEFAULT_LINEAR_ATOM_TYPING_STYLE } def __del__(self): if self._filename is not None: self.close()
[docs] def __init__(self, logger, default_type='Linear'): self._fingerprinter = None self._filename = None self._current_type = default_type self._precision = int(self.PRECISION[0]) self._current_file_name = None self._adaptor = canvas.ChmMmctAdaptor() self._logger = logger self._linear_names = [ self.FINGERPRINT_TYPES[0], self.SHORT_FINGERPRINT_TYPES[0] ] self._radial_names = [ self.FINGERPRINT_TYPES[1], self.SHORT_FINGERPRINT_TYPES[1] ] self._molprint2D_names = [ self.FINGERPRINT_TYPES[2], self.SHORT_FINGERPRINT_TYPES[2] ] self._atom_pair_names = [ self.FINGERPRINT_TYPES[3], self.SHORT_FINGERPRINT_TYPES[3] ] self._triplet_names = [ self.FINGERPRINT_TYPES[4], self.SHORT_FINGERPRINT_TYPES[4] ] self._torsion_names = [ self.FINGERPRINT_TYPES[5], self.SHORT_FINGERPRINT_TYPES[5] ] self._dendritic_names = [ self.FINGERPRINT_TYPES[6], self.SHORT_FINGERPRINT_TYPES[6] ] self._long_names_to_short = {} for (long_name, short) in zip(self.FINGERPRINT_TYPES, self.SHORT_FINGERPRINT_TYPES): self._long_names_to_short[long_name] = short # Assign the default atom/bond type. This needs to be done at # the end after initialization of long_names_to_short: self._current_atom_bond_type = self.getDefaultAtomTypingScheme()
[docs] def debug(self, output): """ Wrapper for debug logging, just to simplify logging """ self._logger.debug(output)
[docs] def getDefaultAtomTypingScheme(self): """ Once the fingerprint type has been set then this method will return the default atom typing scheme appropriate for that fingerprint type """ # Check to see if it's a long name we currently have # stored as the default value if self._current_type in self._long_names_to_short: nm = self._long_names_to_short[self._current_type] else: nm = self._current_type return self.DEFAULT_ATOM_TYPING_SCHEMES[nm]
[docs] def getDescription(self): """ Returns a string representing a summary of the current fingerprint settings """ desc = "%s with %s-bit precision\nAtom typing is\n '%s'" % ( self._current_type, self._precision, self.ATOM_TYPING_SCHEMES[int(self._current_atom_bond_type) - 1]) return desc
[docs] def getCurrentType(self): """ Returns the name of the fingerprint type current set: """ return self._current_type
[docs] def setType(self, fp_type): """ Set the type of fingerprints to be generated by this generator. The type must be one of the values in the class variable CanvasFingerPrintGenerator.FINGERPRINT_TYPE """ if (fp_type not in self.FINGERPRINT_TYPES and fp_type not in self.SHORT_FINGERPRINT_TYPES): raise Exception("Unknown fingerprint type %s" % fp_type) if fp_type != self._current_type: self.debug("FPGen - Setting type to '%s'" % fp_type) self._current_type = fp_type # We need to regenerate the fingerprinter if the fingerprint # type changes: if self._fingerprinter is not None: self._fingerprinter = None return
[docs] def setPrecision(self, precision): """ Set the number of bits to be used for fingerprint generation. """ if int(precision) not in self.PRECISION: raise Exception("Unknown bit width %d" % precision) self._precision = precision self.debug("FPGen - Setting num bits to '%d'" % precision) # We need to regenerate the finger printer if the fingerprint # type changes: if self._fingerprinter is not None: del self._fingerprinter self._fingerprinter = None
[docs] def getPrecision(self): """ Returns the current number of bits used for fingerprinting """ return self._precision
[docs] def setAtomBondTyping(self, atom_bond_typing): """ Set the atom typing scheme. This must be an integer from 1 to the number of atom typing schemes. The atom typing schemes are described in the class variable ATOM_TYPING_SCHEMES """ if ((type(atom_bond_typing) != int) or atom_bond_typing < 1 or atom_bond_typing > len(self.ATOM_TYPING_SCHEMES)): raise Exception( "Unknown atom typing index: %d, must be between 1 and %d" % (int(atom_bond_typing), len(self.ATOM_TYPING_SCHEMES))) if atom_bond_typing != self._current_atom_bond_type: self._current_atom_bond_type = atom_bond_typing # We need to regenerate the finger print if the atom typing # scheme changes: if self._fingerprinter is not None: del self._fingerprinter self._fingerprinter = None self.debug("FPGen - Setting atom/bond typing to '%d'" % atom_bond_typing) return
[docs] def getCurrentAtomBondTyping(self): """ Returns the current atom bond typing value """ return self._current_atom_bond_type
def _getFingerprinter(self): """ A private method which will return a CanvasFingerprinter object appropriate to the current type and atom typing settings """ atype = self._current_atom_bond_type if self._fingerprinter is not None: return self._fingerprinter elif self._current_type in self._linear_names: # Linear: if self._precision == 32: self._fingerprinter = canvas.ChmLinearOut32(atype) self.debug("FPGen - creating ChmLinearOut32(%d)" % atype) else: self._fingerprinter = canvas.ChmLinearOut64(atype) self.debug("FPGen - creating ChmLinearOut64(%d)" % atype) elif self._current_type in self._radial_names: # Radial if self._precision == 32: self.debug("FPGen - creating ChmRadialOut32(%d)" % atype) self._fingerprinter = canvas.ChmRadialOut32(atype) else: self.debug("FPGen - creating ChmRadialOut64(%d)" % atype) self._fingerprinter = canvas.ChmRadialOut64(atype) elif self._current_type in self._molprint2D_names: # MolPrint2D if self._precision == 32: self._fingerprinter = canvas.ChmMolprint2D32(atype) self.debug("FPGen - creating ChmMolprint2D32(%d)" % atype) else: self._fingerprinter = canvas.ChmMolprint2D64(atype) self.debug("FPGen - creatingMolprint2D64(%d)" % atype) elif self._current_type in self._atom_pair_names: # Atom Pairs if self._precision == 32: self._fingerprinter = canvas.ChmPairwiseOut32(atype) self.debug("FPGen - creating ChmPairwise32(%d)" % atype) else: self._fingerprinter = canvas.ChmPairwiseOut64(atype) self.debug("FPGen - creating ChmPairwise64(%d)" % atype) elif self._current_type in self._triplet_names: # Atom Triplets if self._precision == 32: self._fingerprinter = canvas.ChmTripletOut32(atype) self.debug("FPGen - creating ChmTripletOut32(%d)" % atype) else: self._fingerprinter = canvas.ChmTripletOut64(atype) self.debug("FPGen - creating ChmTripletOut64(%d)" % atype) elif self._current_type in self._torsion_names: # Topological Torsions if self._precision == 32: self._fingerprinter = canvas.ChmTopologicalTorsionOut32(atype) self.debug("FPGen - creating ChmTopologicalTorsionOut32(%d)" % atype) else: self._fingerprinter = canvas.ChmTopologicalTorsionOut64(atype) self.debug("FPGen - creating ChmTopologicalTorsionOut64(%d)" % atype) elif self._current_type in self._dendritic_names: # Dendritic if self._precision == 32: self._fingerprinter = canvas.ChmDendriticOut32(atype) self.debug("FPGen - creating ChmDendriticOut32(%d)" % atype) else: self._fingerprinter = canvas.ChmDendriticOut64(atype) self.debug("FPGen - creating ChmDendriticOut64(%d)" % atype) else: raise Exception("Unknown fingerprint type: %s" % self._current_type) return self._fingerprinter
[docs] def generate(self, st, chmmol=False, stereo=canvas.ChmMmctAdaptor.NoStereo): """ Return a fingerprint object using the current settings for type, bit width and atom typing for the Structure object st :type st: schrodinger.structure.Structure or canvas.base.chmmol object :param st: structure to generate the fingerprint for :type chmmol: True if the structure is a chmmol object, False if it is a Structure object :param stereo: stereo type that should be used when creating chmmol from a Structure object :type stereo: canvas.ChmMmctAdaptor.StereoType """ if not chmmol: mol = self._adaptor.create(st.handle, stereo) else: mol = st fprinter = self._getFingerprinter() return fprinter.generate(mol)
[docs] def open(self, filename): """ Open a file to which fingerprints are to be written """ if self._filename is not None: self.close() fprinter = self._getFingerprinter() fprinter.open(filename) self._filename = filename
[docs] def write(self, st, fingerprint_id, chmmol=False): """ Create a fingerprint from the structure 'st' and add it to the file with the ID 'fingerprint_id'. If a file has not been opened then raise an exception :type st: schrodinger.structure.Structure or canvas.base.chmmol object :param st: structure to generate the fingerprint for :type chmmol: True if the structure is a chmmol object, False if it is a Structure object """ if self._filename is None: raise Exception("Attempt to write when no file has been opened") if not chmmol: mol = self._adaptor.create(st.handle, canvas.ChmMmctAdaptor.NoStereo) else: mol = st fprinter = self._getFingerprinter() fprinter.write(mol, str(fingerprint_id))
[docs] def close(self): """ Close the file which was previously open for finger print generation """ if self._filename is None: raise Exception("Attempt to close when no file has been opened") fprinter = self._getFingerprinter() fprinter.close() self._fingerprinter = None self._filename = None
############# Command line specific classes start here:
[docs]class CanvasFingerprintGeneratorCLI(CanvasFingerprintGenerator): """ A subclass of the canvas fingerprint generator which is to be used from a program with a command line interface. This class has methods for defining options in an option parser and for applying those options once they've been parsed. The idea is to provide a standard command line interface for setting the fingerprint options """
[docs] def __init__(self, logger, default_type='Linear'): super(CanvasFingerprintGeneratorCLI, self).__init__(logger, default_type)
[docs] def addOptions(self, parser): """ Add options for fingerprint type, atom typing scheme and number of bits to use. The parser argument is an instance of argparse.ArgumentParser. """ parser.add_argument("-fp_type", action="store", type=str, choices=self.SHORT_FINGERPRINT_TYPES, metavar="<type name>", default='linear', help="Type of fingerprint") parser.add_argument( "-fp_ab_type", action="store", metavar="<index>", # Note set default to non-existant type so we use default # for fingerprint type if it's not specified: default=-1, type=int, help="Atom bond typing scheme (1-%d, see above)" % len(self.ATOM_TYPING_SCHEMES)) parser.add_argument( "-fp_bits", action="store", default=32, type=int, choices=self.PRECISION, metavar="<bits>", help="Bit precision to use in fingerprint (32 or 64)")
[docs] def parseOptions(self, options): """ Examine the options and set the internal state to reflect them. """ self.setType(options.fp_type) fp_ab_type = int(options.fp_ab_type) if fp_ab_type == -1: # Default - set it based on current fingerprint type fp_ab_type = self.getDefaultAtomTypingScheme() self.setAtomBondTyping(fp_ab_type) self.setPrecision(int(options.fp_bits))
[docs] def getOptionDesc(self): """ A method which returns a summary of the options supported by the fingerprint generator """ return "-fp_type <%s> -fp_ab_type <1-%d> -fp_bits" % ("".join( self.SHORT_FINGERPRINT_TYPES), len(self.ATOM_TYPING_SCHEMES))
[docs] def getAtomBondTypingSchemeDescription(self): """ Return a string which contains a description of the atom and bond typing schemes available for fingerprint generation """ desc = """ Atom and bond typing schemes are described by an integer from 1 to %d. The schemes are: \n""" % len(self.ATOM_TYPING_SCHEMES) for i in range(len(self.ATOM_TYPING_SCHEMES)): desc = "%s %s\n" % (desc, self.ATOM_TYPING_SCHEMES[i]) desc += """ The default values depend on the fingerprint types: linear=%(linear)d radial=%(radial)d molprint2D=%(molprint2D)d pairwise=%(pairwise)d triplet=%(triplet)d torsion=%(torsion)d dendritic=%(dendritic)d """ % self.DEFAULT_ATOM_TYPING_SCHEMES return dedent(desc)
[docs] def getFingerprintDescription(self): """ Return a string which contains a description of the atom and bond typing schemes available for fingerprint generation """ desc = """ Available finger print types are (-fp_type arg) \n""" for i in range(len(self.FINGERPRINT_TYPES)): desc = "%s %s (%s)\n" % (desc, self.FINGERPRINT_TYPES[i], self.SHORT_FINGERPRINT_TYPES[i]) return dedent(desc)