Source code for schrodinger.application.combinatorial_diversity.diversity_fingerprinter

"""
This module contains the DiversityFingerprinter class, which generates
Canvas fingerprints and, optionally, a default set of physicochemical
properties for structures provided as SMILES strings.

Copyright Schrodinger LLC, All Rights Reserved.
"""

import os
from enum import Enum

from schrodinger import structure
from schrodinger.infra import canvas
from schrodinger.structutils import analyze

DENDRITIC = "dendritic"
LINEAR = "linear"
MOLPRINT2D = "molprint2D"
RADIAL = "radial"
LEGAL_FP_TYPES = [DENDRITIC, LINEAR, MOLPRINT2D, RADIAL]
LEGAL_FP_CLASSES = [
    canvas.ChmDendriticOut32, canvas.ChmLinearOut32, canvas.ChmMolprint2D32,
    canvas.ChmRadialOut32
]
FP_DICT = dict(zip(LEGAL_FP_TYPES, LEGAL_FP_CLASSES))
PROPERTY_NAMES = [
    "AlogP", "MW", "HBA", "HBD", "PSA", "RB", "Rings", "AromaticRings",
    "ChiralCenters"
]
# Descriptions that may be displayed to an end-user to clarify what's
# actually being calculated:
PROPERTY_DESCRIPTIONS = [
    "Atom-based logP", "Molecular weight", "Number of hydrogen bond acceptors",
    "Number of hydrogen bond donors", "Polar surface area",
    "Number of rotatable bonds", "Number of rings", "Number of aromatic rings",
    "Number of chiral centers"
]

PropertyType = Enum("PropertyType", "FLOAT INT STR MISSING")
PROPERTY_TYPES = [
    PropertyType.FLOAT, PropertyType.FLOAT, PropertyType.INT, PropertyType.INT,
    PropertyType.FLOAT, PropertyType.INT, PropertyType.INT, PropertyType.INT,
    PropertyType.INT
]


[docs]class DiversityFingerprinter: """ Generates Canvas fingerprints and a default set of physicochemical propertes for structures provided as SMILES. """
[docs] def __init__(self, fp_type, want_props=False, hba_file=None, hbd_file=None): """ Constructor taking a fingerprint type, whether to calculate default physicochemical properties, and custom rule files for assigning hydrogen bond acceptor and donor counts. :param fp_type: Fingerprint type (see LEGAL_FP_TYPES). :type fp_type: str :param want_props: Whether to calculate default properties. :type want_props: bool :param hba_file: File with customized hydrogen bond acceptor rules. Ignored if want_props is False. :type hba_file: str or NoneType :param hbd_file: File with customized hydrogen bond donor rules. Ignored if want_props is False. :type hbd_file: str or NoneType :raises KeyError: If fp_type is not supported. :raises FileNotFoundError: If hba_file or hbd_file can't be found. """ if fp_type not in FP_DICT: raise KeyError(f'Fingerprint type "{fp_type}" not supported') self._fp_generator = FP_DICT[fp_type]() self._want_props = want_props self._hba_file = "" self._hbd_file = "" if want_props: if hba_file: if not os.path.isfile(hba_file): mesg = (f'Hydrogen bond acceptor rules file "{hba_file}" ' 'not found') raise FileNotFoundError(mesg) self._hba_file = hba_file if hbd_file: if not os.path.isfile(hbd_file): mesg = (f'Hydrogen bond donor rules file "{hbd_file}" ' 'not found') raise FileNotFoundError(mesg) self._hbd_file = hbd_file self._adaptor = canvas.ChmMmctAdaptor() self._AlogP = canvas.ChmAtomTyperAlogP() self._PSA = canvas.ChmAtomTyperPSA() # ChmHbond order is donor file, acceptor file. self._Hbond = canvas.ChmHbond(self._hbd_file, self._hba_file)
[docs] def compute(self, smiles): """ Computes fingerprints and default properties, if requested, for the provided SMILES. If properties are calculated, they are returned in the order indicated in PROPERTY_NAMES. An empty list of property values is returned if properties are not calculated. :param smiles: SMILES string for the structure. :type smiles: str :return: Fingerprint and list of property values. :rtype: canvas.ChmSparseBitset, list(float/int) """ mol = canvas.ChmMol.fromSMILES(smiles) props = [] if self._want_props: alogp = self._AlogP.calculateScalar(mol, True) mw = mol.getMW() hba, hbd = self._compute_hbond_counts(mol) psa = self._PSA.calculateScalar(mol) st = structure.Structure(self._adaptor.create(mol)) rb = analyze.get_num_rotatable_bonds(st) rings = len(st.ring) arings = 0 for ring in st.ring: if ring.isAromatic(): arings += 1 cc = len(analyze.get_chiral_atoms(st)) props = [alogp, mw, hba, hbd, psa, rb, rings, arings, cc] return self._fp_generator.generate(mol), props
def _compute_hbond_counts(self, mol): """ Computes hydrogen bond acceptor and donor counts for the provided structure. :param mol: The structure. :type mol: canvas.ChmMol :return: Count of acceptors, followed by count of donors. :rtype: int, int """ acceptor_count = 0 donor_count = 0 self._Hbond.assignAcceptors(mol) self._Hbond.assignDonors(mol) atom_count = mol.getAtomCount(True) heavy_atom_visited = atom_count * [False] for i in range(atom_count): if self._Hbond.isAcceptor(i): acceptor_count += 1 if self._Hbond.isDonor(i): # Count only one donor per heavy atom. heavy_atom = mol.getAtom(i).getHeavyNeighbors()[0].getMolIndex() if not heavy_atom_visited[heavy_atom]: donor_count += 1 heavy_atom_visited[heavy_atom] = True return acceptor_count, donor_count