Source code for schrodinger.application.canvas.topo_descriptors

"""
Module which computes constitutional and topological descriptors, walk and
path counts, and connectivity indices.

Details of the descriptors implemented here are taken from:
"Handbook of Molecular Descriptors" by Mannhols, Kubinyi and Timmerman
"Molecular Descriptors for Chemoinformatics, Vol. I", by Todeschini and Consonni

Copyright Schrodinger, LLC. All rights reserved.
"""

import bisect
import math
from collections import Counter
from collections import defaultdict
from collections import namedtuple
from functools import partial
from itertools import combinations_with_replacement
from past.utils import old_div

import networkx
import numpy
from decorator import decorator

import schrodinger.application.canvas.base as canvas_base
from schrodinger.infra import canvas
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.utils import log

logger = log.get_output_logger(__file__)

# Descriptor container
Descriptor = namedtuple("Descriptor", ["func", "cast_type", "label", "key"])
TOPO_DESCRIPTORS_PRODUCT = "desc"
HEAVY_ATOM_MAX = 150
# CANVAS-27492: Round floats to avoid platform differences
FLOAT_TRUNCATION = 8


[docs]class TopoError(Exception): """ Error raised when unable to calculate topological descriptor """
[docs]@decorator def cached_topo_descriptor(func, self, *args, **kwargs): """ Decorator to extract previously calculated descriptor value from cached descriptors dictionary. If not available, the descriptor is calculated and stored for future reference. """ key = tuple([func.__name__]) + args if key in self._cached_property: return self._cached_property[key] value = func(self, *args, **kwargs) self._cached_property[key] = value return value
[docs]class TopologicalDescriptors(object):
[docs] def __init__(self, default_descriptor_value=None): """ Initializes valid topological descriptors, and creates an empty cached property dictionary. """ self._default_value = default_descriptor_value self._cached_property = {} self._available_descriptors = self._getDescriptorEntries()
[docs] def allDescriptorFullLabels(self): """ Returns all topological descriptor labels as label-key string :return: list of all descriptors as "Label (Key)" strings :rtype: list of str """ return [ self._makeFullLabel(desc) for desc in self._available_descriptors ]
[docs] def calculateTopologicalDescriptors(self, st, descriptor_strings): """ Calculates a set of topological descriptors for a given structure. :param st: structure to consider :type st: `structure.Structure` :param descriptor_strings: list of descriptors to calculate :type descriptor_strings: list of str :return: Structure copy appended with successful descriptor properties :rtype: `structure.Structure` """ # Clear any previously cached properties self._cached_property.clear() # Create structure copy to which new properties can be appended working_st = self._extractSingleStructure(st) final_st = st.copy() # Ignore H or molecules with more 150 heavy atoms natom = self._getAtomCount(working_st) if natom == 0 or natom > HEAVY_ATOM_MAX: logger.warning("Molecule \"%s\" rejected:" % st.title) logger.warning("%d heavy atoms in the given molecule" % natom) return final_st for desc_str in descriptor_strings: try: desc_entry = self._findDescriptorEntry(desc_str) property_name = self.m2ioPropertyName(desc_entry) value = self._evaluateDescriptor(desc_entry, working_st) final_st.property[property_name] = value except TopoError as e: logger.info("-- unable to compute %s: %s" % (desc_str, e)) return final_st
[docs] def m2ioPropertyName(self, descriptor): """ Create descriptor m2io property name for a Descriptor. :param descriptor: descriptor entry :type descriptor: `Descriptor` :return: m2io formatted descriptor property name :rtype: str """ m2io_mapping = {float: "r", int: "i", bool: "b", str: "s"} # Create property name from label without spaces formatted_label = descriptor.label.strip().replace(" ", "_") property_name = TOPO_DESCRIPTORS_PRODUCT + "_" + formatted_label return m2io_mapping[descriptor.cast_type] + "_" + property_name
def _extractSingleStructure(self, st): """ Extracts a single structure if more than one is present. :param st: Original structure :type st: `structure.Structure` :return: Structure consisting of single largest molecule :rtype: `structure.Structure` """ # If there is more than one molecule in the structure, use the largest if len(st.molecule) > 1: logger.warning("Molecule \"%s\" modified:" % st.title) logger.warning("More than one structure; using largest") mol_size = [len(mol) for mol in st.molecule] largest_mol_index = mol_size.index(max(mol_size)) + 1 return st.molecule[largest_mol_index].extractStructure() # Otherwise the structure is fine as-is return st def _findDescriptorEntry(self, desc_str): """ Locates the corresponding Descriptor in the class tuple of Descriptors. Looks for Descriptor based on matching the passed string to any of the key, label, or "label (key)" strings. :param desc_str: input descriptor string :type desc_str: str :return: Descriptor namedtuple :rtype: `Descriptor` :raise TopoError: If the descriptor entry could not be found """ for entry in self._available_descriptors: if desc_str in [entry.key, entry.label, self._makeFullLabel(entry)]: return entry # Descriptor not found raise TopoError("\"%s\" unrecognized" % desc_str) def _makeFullLabel(self, descriptor): """ Create complete descriptor label-key string :param descriptor: Descriptor namedtuple :type descriptor: `Descriptor` :return: full descriptor label :rtype: str """ return "%s (%s)" % (descriptor.label, descriptor.key) def _evaluateDescriptor(self, descriptor, st): """ Evalulates a descriptor for a given structure. If successful, the value is cast to the expected type before returning. :param descriptor: Descriptor namedtuple :type descriptor: `Descriptor` :return: descriptor value, cast to the appropriate return type :rtype: one of {float, int, bool, str} :raise TopoError: If the descriptor cast-type was unrecognized """ # Evaluate descriptor, supplying try: value = descriptor.func(st) except TopoError as e: if self._default_value is not None: value = self._default_value else: raise TopoError(e) # Cast function call result try: if descriptor.cast_type is float: value = round(float(value), FLOAT_TRUNCATION) elif descriptor.cast_type is int: value = int(value) return value except TypeError as e: raise TopoError("Unable to cast to %s" % descriptor.cast_type) # Unknown cast_type raise TopoError("Unknown cast value %s" % descriptor.cast_type) # ========================================================================= # Descriptor methods # ========================================================================= def _getDescriptorEntries(self): """ Returns set of valid descriptor tuples in the form of: (descriptor key, descriptor label, function mapping) :return: set of available topological descriptor mappings :rtype: set of tuples """ entries = [] label = "First Zagreb" func = self._getFirstZagrebIndex entries.append(Descriptor(func, int, label, "ZM1")) label = "First Zagreb index by valence vertex degrees" func = self._getFirstZagrebValenceIndex entries.append(Descriptor(func, float, label, "ZM1V")) label = "Second Zagreb" func = self._getSecondZagrebIndex entries.append(Descriptor(func, int, label, "ZM2")) label = "Second Zagreb index by valence vertex degrees" func = self._getSecondZagrebValenceIndex entries.append(Descriptor(func, float, label, "ZM2V")) label = "Polarity" func = self._getPolarityNumber entries.append(Descriptor(func, int, label, "Pol")) label = "Narumi Simple Topological" func = self._getSimpleTopologicalIndex entries.append(Descriptor(func, float, label, "NST")) label = "Narumi Harmonic Topological" func = self._getHarmonicTopologicalIndex entries.append(Descriptor(func, float, label, "NHT")) label = "Narumi Geometric Topological" func = self._getGeometricTopologicalIndex entries.append(Descriptor(func, float, label, "NGT")) label = "Total structure connectivity" func = self._getTotalStructureConnectivityIndex entries.append(Descriptor(func, float, label, "TSC")) label = "Wiener" func = self._getWienerIndex entries.append(Descriptor(func, int, label, "W")) label = "Mean Wiener" func = self._getMeanWienerIndex entries.append(Descriptor(func, float, label, "MW")) label = "Xu" func = self._getXuIndex entries.append(Descriptor(func, float, label, "Xu")) label = "Quadratic" func = self._getQuadraticIndex entries.append(Descriptor(func, int, label, "QIndex")) label = "Radial centric" func = self._getRadialCentricInformationIndex entries.append(Descriptor(func, float, label, "RC")) label = "Mean Square Distance Balaban" func = self._getMeanSquareDistanceIndex entries.append(Descriptor(func, float, label, "MSDB")) label = "Superpendentic" func = self._getSuperpendenticIndex entries.append(Descriptor(func, float, label, "SP")) label = "Harary" func = self._getHararyIndex entries.append(Descriptor(func, float, label, "Har")) label = "Log of product of row sums" func = self._getLogPRSIndex entries.append(Descriptor(func, float, label, "LPRS")) label = "Pogliani" func = self._getPoglianiIndex entries.append(Descriptor(func, float, label, "Pog")) label = "Schultz Molecular Topological" func = self._getSchultzMolecularTopologicalIndex entries.append(Descriptor(func, int, label, "SMT")) label = "Schultz Molecular Topological by valence vertex degrees" func = self._getSchultzMolecularTopologicalValenceIndex entries.append(Descriptor(func, float, label, "SMTV")) label = "Mean Distance Degree Deviation" func = self._getMeanDistanceDegreeDeviation entries.append(Descriptor(func, float, label, "MDDD")) label = "Ramification" func = self._getRamificationIndex entries.append(Descriptor(func, int, label, "Ram")) label = "Gutman Molecular Topological" func = self._getGutmanMolecularTopologicalIndex entries.append(Descriptor(func, int, label, "GMT")) label = "Gutman MTI by valence vertex degrees" func = self._getGutmanMolecularTopologicalValenceIndex entries.append(Descriptor(func, float, label, "GMTV")) label = "Average vertex distance degree" func = self._getAverageDistanceDegree entries.append(Descriptor(func, float, label, "AVDD")) label = "Unipolarity" func = self._getUnipolarity entries.append(Descriptor(func, int, label, "UP")) label = "Centralization" func = self._getCentralization entries.append(Descriptor(func, int, label, "CENT")) label = "Variation" func = self._getVariation entries.append(Descriptor(func, int, label, "VAR")) label = "Molecular electrotopological variation" func = self._getMolecularElectrotopologicalVariation entries.append(Descriptor(func, float, label, "MEV")) label = "Maximal electrotopological positive variation" func = self._getMaximalElectrotopologicalPositiveVariation entries.append(Descriptor(func, float, label, "MEPV")) label = "Maximal electrotopological negative variation" func = self._getMaximalElectrotopologicalNegativeVariation entries.append(Descriptor(func, float, label, "MENV")) label = "Eccentric connectivity" func = self._getEccentricConnectivityIndex entries.append(Descriptor(func, int, label, "ECCc")) label = "Eccentricity" func = self._getEccentricity entries.append(Descriptor(func, int, label, "ECC")) label = "Average eccentricity" func = self._getAverageEccentricity entries.append(Descriptor(func, float, label, "AECC")) label = "Eccentric" func = self._getEccentric entries.append(Descriptor(func, float, label, "DECC")) for i in range(6): label = "Valence connectivity index chi-%d" % i func = partial(self._getValenceConnectivityIndex, m=i) entries.append(Descriptor(func, float, label, "vX%d" % i)) for i in range(6): label = "Average valence connectivity index chi-%d" % i func = partial(self._getMeanValenceConnectivityIndex, m=i) entries.append(Descriptor(func, float, label, "AvX%d" % i)) label = "Quasi Wiener" func = self._getQuasiWienerIndex entries.append(Descriptor(func, float, label, "QW")) label = "First Mohar" func = self._getFirstMoharIndex entries.append(Descriptor(func, float, label, "FM")) label = "Second Mohar" func = self._getSecondMoharIndex entries.append(Descriptor(func, float, label, "SM")) label = "Spanning tree number" func = self._getSpanningTreeNumber entries.append(Descriptor(func, float, label, "STN")) label = "Kier benzene-likeliness index" func = self._getBenzeneLikelinessIndex entries.append(Descriptor(func, float, label, "KBLI")) for i in range(1, 11): label = "Topological charge index of order %d" % i func = partial(self._getTopologicalChargeIndex, k=i) entries.append(Descriptor(func, float, label, "TCI%d" % i)) for i in range(1, 11): label = "Mean topological charge index of order %d" % i func = partial(self._getMeanTopologicalChargeIndex, k=i) entries.append(Descriptor(func, float, label, "MTCI%d" % i)) label = "Global topological charge" func = self._getGlobalTopologicalChargeIndex entries.append(Descriptor(func, float, label, "GTC")) label = "Hyper-distance-path index" func = self._getHyperDistancePathIndex entries.append(Descriptor(func, float, label, "HDPI")) label = "Reciprocal hyper-distance-path index" func = self._getReciprocalHyperDistancePathIndex entries.append(Descriptor(func, float, label, "RHDPI")) label = "Square reciprocal distance sum" func = self._getSquareReciprocalDistanceSumIndex entries.append(Descriptor(func, float, label, "SRDS")) label = "Modified Randic connectivity" func = self._getModifiedRandicIndex entries.append(Descriptor(func, float, label, "MRC")) label = "Balaban centric" func = self._getBalabanCentricIndex entries.append(Descriptor(func, float, label, "BC")) label = "Lopping centric" func = self._getLoppingCentricInformationIndex entries.append(Descriptor(func, float, label, "LC")) label = "Kier Hall electronegativity" func = self._getKierHallElectronegativitySum entries.append(Descriptor(func, float, label, "KHE")) elements = ["N", "O", "S", "P", "F", "Cl", "Br", "I"] for a, b in combinations_with_replacement(elements, 2): label = "Sum of topological distances between %s..%s" % (a, b) func = partial(self._getTopologicalDistanceSum, elem1=a, elem2=b) key = "STD(%s,%s)" % (a, b) entries.append(Descriptor(func, int, label, key)) label = "Wiener-type index from Z weighted distance matrix - Barysz matrix" func = self._getBaryszWeinerTypeIndex entries.append(Descriptor(func, float, label, "WhetZ")) label = "Wiener-type index from electronegativity weighted distance matrix" func = self._getElectronegativityWienerTypeIndex entries.append(Descriptor(func, float, label, "Whete")) label = "Wiener-type index from mass weighted distance matrix" func = self._getMassWienerTypeIndex entries.append(Descriptor(func, float, label, "Whetm")) label = "Wiener-type index from van der waals weighted distance matrix" func = self._getVDWVolumeWienerTypeIndex entries.append(Descriptor(func, float, label, "Whetv")) label = "Wiener-type index from polarizability weighted distance matrix" func = self._getPolarizabilityWienerTypeIndex entries.append(Descriptor(func, float, label, "Whetp")) label = "Balaban-type index from Z weighted distance matrix - Barysz matrix" func = self._getBaryszIndex entries.append(Descriptor(func, float, label, "JhetZ")) label = "Balaban-type index from electronegativity weighted distance matrix" func = self._getElectronegativityBalabanTypeIndex entries.append(Descriptor(func, float, label, "Jhete")) label = "Balaban-type index from mass weighted distance matrix" func = self._getMassBalabanTypeIndex entries.append(Descriptor(func, float, label, "Jhetm")) label = "Balaban-type index from van der waals weighted distance matrix" func = self._getVDWVolumeBalabanTypeIndex entries.append(Descriptor(func, float, label, "Jhetv")) label = "Balaban-type index from polarizability weighted distance matrix" func = self._getPolarizabilityBalabanTypeIndex entries.append(Descriptor(func, float, label, "Jhetp")) label = "Topological diameter" func = self._getTopologicalDiameter entries.append(Descriptor(func, int, label, "TD")) label = "Topological radius" func = self._getTopologicalRadius entries.append(Descriptor(func, int, label, "TR")) label = "Petitjean 2D shape" func = self._getPetitjeanShapeIndex entries.append(Descriptor(func, float, label, "PJ2DS")) label = "Balaban distance connectivity index" func = self._getBalabanDistanceConnectivityIndex entries.append(Descriptor(func, float, label, "J")) for i in range(6): label = "Solvation connectivity index chi-%d" % i func = partial(self._getSolvationConnectivityIndex, m=i) entries.append(Descriptor(func, float, label, "SCIX%d" % i)) for i in range(6): label = "Connectivity index chi-%d" % i if i == 1: label = "Connectivity chi-1 [Randic connectivity]" func = partial(self._getConnectivityIndex, m=i) entries.append(Descriptor(func, float, label, "CIX%d" % i)) for i in range(6): label = "Average connectivity index chi-%d" % i func = partial(self._getMeanConnectivityIndex, m=i) entries.append(Descriptor(func, float, label, "ACIX%d" % i)) label = "reciprocal distance Randic-type index" func = self._getRDCHIIndex entries.append(Descriptor(func, float, label, "RDR")) label = "reciprocal distance square Randic-type index" func = self._getRDSQIndex entries.append(Descriptor(func, float, label, "RDSR")) for i in range(1, 4): label = "%d-path Kier alpha-modified shape index" % i func = partial(self._getKierAlphaModifiedShapeIndex, m=i) entries.append(Descriptor(func, float, label, "KAMS%d" % i)) label = "Kier flexibility" func = self._getKierMolecularFlexibilityIndex entries.append(Descriptor(func, float, label, "KF")) for i in range(2, 6): label = "path/walk %d - Randic shape index" % i func = partial(self._getMolecularPathWalkIndex, m=i) entries.append(Descriptor(func, float, label, "RSIpw%d" % i)) label = "E-state topological parameter" func = self._getEStateIndex entries.append(Descriptor(func, float, label, "ETP")) label = "Chirality count" func = self._getChiralAtomCount entries.append(Descriptor(func, int, label, "CHRCOUNT")) for i in range(3, 21): label = "Ring Count %d" % i func = partial(self._getRingCount, n=i) entries.append(Descriptor(func, int, label, "RNGCNT%d" % i)) label = "Atom Count" func = self._getAtomCount entries.append(Descriptor(func, int, label, "ATMCNT")) label = "Bond Count" func = self._getBondCount entries.append(Descriptor(func, int, label, "BNDCNT")) label = "Atoms in Ring System" func = self._getRingAtomCount entries.append(Descriptor(func, int, label, "ATMRNGCNT")) label = "Bonds in Ring System" func = self._getRingBondCount entries.append(Descriptor(func, int, label, "BNDRNGCNT")) label = "Cyclomatic number" func = self._getCyclomaticNumber entries.append(Descriptor(func, int, label, "CYCLONUM")) label = "Number of ring systems" func = self._getNumberRingSystems entries.append(Descriptor(func, int, label, "NRS")) label = "Normalized number of ring systems" func = self._getNormalizedNumberRingSystems entries.append(Descriptor(func, float, label, "NNRS")) label = "Ring Fusion degree" func = self._getRingFusionDegree entries.append(Descriptor(func, float, label, "RFD")) label = "Total ring size" func = self._getTotalRingSize entries.append(Descriptor(func, int, label, "TRS")) label = "Ring perimeter" func = self._getRingPerimeter entries.append(Descriptor(func, int, label, "RNGPERM")) label = "Ring bridge count" func = self._getRingBridges entries.append(Descriptor(func, int, label, "RNGBDGE")) label = "Molecule cyclized degree" func = self._getMolecularCyclizedDegree entries.append(Descriptor(func, float, label, "MCD")) label = "Ring Fusion density" func = self._getRingFusionDensity entries.append(Descriptor(func, float, label, "RFDELTA")) label = "Ring complexity index" func = self._getRingComplexityIndex entries.append(Descriptor(func, float, label, "RCI")) label = "Van der Waals surface area" func = self._getTotalVDWSurfaceArea entries.append(Descriptor(func, float, label, "VSA")) for i in range(1, 9): label = "MR%d" % i func = partial(self._getPVSAMolarRefractivity, n=i) entries.append(Descriptor(func, float, label, "MR%d" % i)) for i in range(1, 11): label = "ALOGP%d" % i func = partial(self._getPVSAlogP, n=i) entries.append(Descriptor(func, float, label, "ALOGP%d" % i)) for i in range(1, 15): label = "PEOE%d" % i func = partial(self._getPVSAPartialCharges, n=i) entries.append(Descriptor(func, float, label, "PEOE%d" % i)) return tuple(entries) # ========================================================================= # Common descriptor asserts # ========================================================================= def _assertMultipleHeavyAtoms(self, st): """ Raises a TopoError if the structure has only one heavy atom """ if self._getAtomCount(st) == 1: raise TopoError("Requires more than one atom") def _assertPathLength(self, st, m): """ Raises a TopoError if the structure does not have any paths of length m """ if m not in self._getConnectivityPaths(st): raise TopoError("No paths of length %d present in molecule" % m) # ========================================================================= # Descriptor methods # ========================================================================= @cached_topo_descriptor def _getFirstZagrebIndex(self, st): """ Handbook p. 509: First Zagreb index (M_1) -- topological index based on atomic vertex degrees. The first index in strictly related to zero-order connectivity index. Also called the Gutman index. """ vertex_func = self._getVertexDegree # NOTE: vertex_func(i)^2 instead of _generalizedConnectivityIndex return sum( [pow(vertex_func(st, i), 2) for i in self._getAtomIndices(st)]) @cached_topo_descriptor def _getFirstZagrebValenceIndex(self, st): """ Handbook p. 509: First valence Zagreb index (M^v_1) -- topological index based on atomic valence vertex degrees. The first index in strictly related to zero-order connectivity index. """ vertex_func = self._getValenceVertexDegree # NOTE: vertex_func(i)^2 instead of _generalizedConnectivityIndex return sum( [pow(vertex_func(st, i), 2) for i in self._getAtomIndices(st)]) @cached_topo_descriptor def _getSecondZagrebIndex(self, st): """ Handbook p. 509: Second Zagreb index (M_2) -- topological index based on atomic vertex degrees. The second index in strictly related to first-order connectivity index; it is part of the Schuttz molecular topological index. """ vertex_func = self._getVertexDegree return self._generalizedConnectivityIndex(st, 1, vertex_func, 1.0) @cached_topo_descriptor def _getSecondZagrebValenceIndex(self, st): """ Handbook p. 509: Second valence Zagreb index (M^v_2) -- topological index based on atomic valence vertex degrees. The second index in strictly related to first-order connectivity index. """ vertex_func = self._getValenceVertexDegree return self._generalizedConnectivityIndex(st, 1, vertex_func, 1.0) @cached_topo_descriptor def _getPolarityNumber(self, st): """ Handbook p. 114: polarity number (p_2) -- also known as Wiener polarity number; the number of pairs of graph vertices which are separated by three edges. It is usually assumed that the polarity number accounts for the flexibility of acyclic structures, p being equal to the number of bonds around which free rotations can take place. """ return self._getDistanceCount(st, 3) @cached_topo_descriptor def _getSimpleTopologicalIndex(self, st): """ Handbook p. 476: simple topological index (S) -- descriptor related to molecular branching, proposed as the product of the vertex degrees for each atom [Narumi, 1987]. NOTE: Take the natural log to avoid overflow. """ self._assertMultipleHeavyAtoms(st) # prevent log(0) return sum([ math.log(self._getVertexDegree(st, i)) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getHarmonicTopologicalIndex(self, st): """ Handbook p. 476: harmonic topological index (H) -- descriptor related to the simple topological index [Narumi, 1987]. """ self._assertMultipleHeavyAtoms(st) # prevent divide by zero inv_vertex = [ old_div(1.0, self._getVertexDegree(st, i)) for i in self._getAtomIndices(st) ] return old_div(self._getAtomCount(st), sum(inv_vertex)) @cached_topo_descriptor def _getGeometricTopologicalIndex(self, st): """ Handbook p. 476: geometric topological index (G) -- descriptor related to the simple topological index [Narumi, 1987]. """ exponent = old_div(1.0, self._getAtomCount(st)) return pow(math.exp(self._getSimpleTopologicalIndex(st)), exponent) @cached_topo_descriptor def _getTotalStructureConnectivityIndex(self, st): """ Handbook p. 86: total structure connectivity index -- connectivity index contemporarily accounting for all the atoms in the graph. This is the square root of the simple topological index proposed by Narumi for measuring molecular branching. (Handbook misprint: inverse square root) """ # NOTE: DRAGON uses the log of the Narumi Simple Topological index return pow(math.exp(self._getSimpleTopologicalIndex(st)), -0.5) def _wienerOperator(self, matrix): """ Handbook p. 9, Wiener operator W(M) -- half the sum of the off-diagonal entries of the matrix M; name taken from the Wiener index. """ return 0.5 * numpy.sum(matrix) @cached_topo_descriptor def _getWienerIndex(self, st): """ Handbook p. 497: Wiener index (W) -- the sum over all bonds of the product of the number of vertices on each side of the bond; ie. the sum of all topological distancesinthe H-depleted molecular graph. Also known as the Weiner number. """ return self._wienerOperator(self._getDistanceMatrix(st)) @cached_topo_descriptor def _getMeanWienerIndex(self, st): """ Handbook p. 497: mean Wiener index (W_bar) -- defined from the Wiener index as 2 * W / (A * (A - 1)) """ self._assertMultipleHeavyAtoms(st) # prevent divide by zero n = self._getAtomCount(st) return 2.0 * self._getWienerIndex(st) / (n * (n - 1)) @cached_topo_descriptor def _getXuIndex(self, st): """ Handbook p. 507: Xu index -- descriptor accounting for molecular size and branching. Defined as sqrt(A) * ln(L), where L represents the valence average topological distance calculated by vertex degree and vertex distance degree of all atoms. NOTE: Use natural log (comparable to E-DRAGON) """ self._assertMultipleHeavyAtoms(st) indices = self._getAtomIndices(st) delta = [self._getVertexDegree(st, i) for i in indices] sigma = [self._getVertexDistanceDegree(st, i) for i in indices] L = numpy.dot(numpy.multiply(delta, sigma), sigma) L /= float(numpy.dot(delta, sigma)) return math.sqrt(self._getAtomCount(st)) * math.log(L) @cached_topo_descriptor def _getQuadraticIndex(self, st): """ Handbook p. 44: quadratic index (Q) -- obtained by normalization of the first Zagreb index. Also called normalized quadratic index. """ M1 = self._getFirstZagrebIndex(st) return 3 - (2 * self._getAtomCount(st)) + (old_div(M1, 2.0)) @cached_topo_descriptor def _getRadialCentricInformationIndex(self, st): """ Handbook p. 44: radial centric information index (I^V_C,R) -- defined as the lopping centric information index, but where ng is the number of graph vertices having the same atom eccentricity. """ n = float(self._getAtomCount(st)) info_func = lambda x: -(old_div(x, n)) * math.log((old_div(x, n)), 2) return self._centricIndexSum(st, self._getAtomEccentricity, info_func) @cached_topo_descriptor def _getMeanSquareDistanceIndex(self, st): """ Handbook p. 113: mean square distance index (MSD) -- calculated from the second order distance distribution moment [Balaban, 1983a]. NOTE: Handbook misprint -- the square root should encompass [A * (A-1)] """ self._assertMultipleHeavyAtoms(st) # prevent divide by zero n = float(self._getAtomCount(st)) sq_dist_sum = numpy.sum(numpy.square(self._getDistanceMatrix(st))) return pow(old_div(sq_dist_sum, (n * (n - 1))), 0.5) @cached_topo_descriptor def _getSuperpendenticIndex(self, st): """ Handbook p. 431: superpendentic index -- calculated from the pendent matrix, which is a submatrix of the distance matrix which is to the number of atoms by the number of terminal vertices. The superpendentic index is calculated as the square root of the sum of the products of the nonzero row elements in the pendent matrix. NOTE: As with DRAGON, instead of returning sqrt of sum of ow products: pow(sum(numpy.prod(pendent_matrix, axis=1)), 0.5) we avoid overflow by returning the sqrt of sum of log(row products) """ terminal_vertices = [ index for index in self._getAtomIndices(st) if self._getVertexDegree(st, index) == 1 ] if not terminal_vertices: raise TopoError("No terminal vertices for superpendentic") # Instead of forming a new matrix, modify the distance matrix pendent_matrix = self._getDistanceMatrix(st).copy() # Ignore zeros on the diagonal numpy.fill_diagonal(pendent_matrix, 1) # Ignore columns that are not terminal vertices for index in self._getAtomIndices(st): if index not in terminal_vertices: pendent_matrix[:, index - 1].fill(1) # Return sqrt of sum of log(row products) return pow(numpy.sum(numpy.log(pendent_matrix)), 0.5) @cached_topo_descriptor def _getHararyIndex(self, st): """ Handbook p. 209: Harary index (H) -- topological index derived from the reciprocal distance matrix by the Wiener operator (Harary number) """ return self._wienerOperator(self._getReciprocalDistanceMatrix(st)) @cached_topo_descriptor def _getLogPRSIndex(self, st): """ Handbook p. 116: Log of PRS index -- log(Product of Row Sums index) defined as the lof of the product of the vertex distance degrees. Taking the log is preferred due to the large values that can be reached by the PRS index. NOTE: As per Handbook, log is base 10 """ self._assertMultipleHeavyAtoms(st) # prevent log(0) # Distribute log to avoid overflow return sum([ math.log10(self._getVertexDistanceDegree(st, i)) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getPoglianiIndex(self, st): """ Handbook p. 116: Pogliani index (D^nu) -- sum of the ratio of the number of valence electrons to the principal quantum number for each atom. """ pogliani = 0.0 for atom in self._getHeavyStructure(st).atom: Zv_i = mm.mmelement_get_valence_count(atom.atomic_number) L_i = mm.mmelement_get_period(atom.atomic_number) pogliani += old_div(Zv_i, float(L_i)) return pogliani def _baseSchultzIndex(self, st, vertex_func): """ Handbook p. 381: Schultz molecular topological index (MTI) -- index derived from the adjacency and distance matrices, defined as the sum over (A+D).v intricacy numbers. NOTE: Handbook decomposes the index into two parts, a sum of square vertex degrees and vertex degree-vertex distance degree products, and erroneously labels the former as "the second Zagreb index". The sum for M2 is over only bonded pair i-j, whereas here is over ALL i-j pairs. :param vertex_func: vertex degree function :type vertex_func: f(st, index) """ MTI = 0.0 for i in self._getAtomIndices(st): vertex_degree = vertex_func(st, i) MTI += pow(vertex_degree, 2) MTI += vertex_degree * self._getVertexDistanceDegree(st, i) return MTI @cached_topo_descriptor def _getSchultzMolecularTopologicalIndex(self, st): """ Handbook p. 381: Schultz molecular topological index (MTI) -- molecular topological index calculated using the vertex degree. """ return self._baseSchultzIndex(st, self._getVertexDegree) @cached_topo_descriptor def _getSchultzMolecularTopologicalValenceIndex(self, st): """ Handbook p. 382: Schultz molecular topological valence index (MTI^v) -- a vertex-valency-weighted analogue to the Schultz molecular topological index calculated using the valence vertex degree. """ return self._baseSchultzIndex(st, self._getValenceVertexDegree) @cached_topo_descriptor def _getMeanDistanceDegreeDeviation(self, st): """ Handbook p. 114: mean distance degree deviation (delta sigma) -- the mean devaiationo f the row sum of the distance matrix from its mean. """ average = self._getAverageDistanceDegree(st) return numpy.mean([ abs(self._getVertexDistanceDegree(st, i) - average) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getRamificationIndex(self, st): """ Handbook p. 475: ramification index (r) -- a simple ramification index proposed for acyclic graphs, where the sum runs over all the vertex degrees greater than two. """ delta = [self._getVertexDegree(st, i) for i in self._getAtomIndices(st)] return sum(di - 2 for di in delta if di > 2) def _baseGutmanIndex(self, st, vertex_func): """ Handbook p. 382: Gutman molecular topological index (S_G) -- sum of the topological distance between the vertices vi and vj weighted by the product of the endpoint vertex degrees. :param vertex_func: vertex degree function :type vertex_func: f(st, index) """ dist_matrix = self._getDistanceMatrix(st) SG = 0 for i, j in self._getAtomPairs(st): vi, vj = vertex_func(st, i), vertex_func(st, j) SG += dist_matrix[i - 1][j - 1] * vi * vj return SG @cached_topo_descriptor def _getGutmanMolecularTopologicalIndex(self, st): """ Handbook p. 382: Gutman molecular topological index (S_G) -- the vertex degree weighted form of S_G. """ return self._baseGutmanIndex(st, self._getVertexDegree) @cached_topo_descriptor def _getGutmanMolecularTopologicalValenceIndex(self, st): """ Handbook p. 382: Gutman molecular topological valence index (S_G) -- a vertex-valency-weighted analogue of S_G, where the weighting factor is multiplicative. """ return self._baseGutmanIndex(st, self._getValenceVertexDegree) @cached_topo_descriptor def _getAverageDistanceDegree(self, st): """ Handbook p. 114: average distance degree (sigma^bar) -- average row sum of the distance matrix. Note this is also 2 * (Wiener index) / A. """ return numpy.mean([ self._getVertexDistanceDegree(st, i) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getUnipolarity(self, st): """ Handbook p. 114: unipolarity (simga*) -- minimum value of the vertex distance degrees. """ return min([ self._getVertexDistanceDegree(st, i) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getCentralization(self, st): """ Handbook p. 114: centralization (delta sigma^*) -- molecular invariant immediately derived from the distance matrix. """ n = self._getAtomCount(st) return 2 * self._getWienerIndex(st) - n * self._getUnipolarity(st) @cached_topo_descriptor def _getVariation(self, st): """ Handbook p. 114: variation (delta sigma^+) -- molecular invariant immediately derived from the distance matrix. """ unipolarity = self._getUnipolarity(st) return max([ self._getVertexDistanceDegree(st, i) - unipolarity for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getMolecularElectrotopologicalVariation(self, st): """ Adopted from DRAGON: molecular electrotopological variation """ return sum(numpy.fabs(self._getFieldEffects(st))) @cached_topo_descriptor def _getMaximalElectrotopologicalPositiveVariation(self, st): """ Adopted from DRAGON: maximal electrotopological positive variation """ positive_fe = [fe for fe in self._getFieldEffects(st) if fe > 0.0] if not positive_fe: raise TopoError("No positive field effect values") return max(positive_fe) @cached_topo_descriptor def _getMaximalElectrotopologicalNegativeVariation(self, st): """ Adopted from DRAGON: maximal electrotopological negative variation """ positive_fe = [abs(fe) for fe in self._getFieldEffects(st) if fe < 0.0] if not positive_fe: raise TopoError("No negative field effect values") return max(positive_fe) @cached_topo_descriptor def _getEccentricConnectivityIndex(self, st): """ Handbook p. 124: eccentric connectivity index (xi^C) -- the sum of the products between eccentricity and vertex degree over all atoms. """ xi_sum = 0.0 for i in self._getAtomIndices(st): nu_i = self._getAtomEccentricity(st, i) delta_i = self._getVertexDegree(st, i) xi_sum += nu_i * delta_i return xi_sum @cached_topo_descriptor def _getEccentricity(self, st): """ Handbook p. 112: eccentricity (nu) -- sum of atom eccentricities. """ return sum([ self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getAverageEccentricity(self, st): """ Handbook p. 112: average atom eccentricity (nu^bar) -- average of atom eccentricities. """ return old_div(float(self._getEccentricity(st)), self._getAtomCount(st)) @cached_topo_descriptor def _getEccentric(self, st): """ Handbook p. 112: eccentricity (Delta nu) -- mean deviation from average of atom eccentricities. """ average = self._getAverageEccentricity(st) dev_sum = sum([ abs(self._getAtomEccentricity(st, i) - average) for i in self._getAtomIndices(st) ]) return old_div(float(dev_sum), self._getAtomCount(st)) @cached_topo_descriptor def _getValenceConnectivityIndex(self, st, m): """ Handbook p. 85: connectivity indices of mth order -- usually known as Kier-Hall connectivity indices, defined a general scheme based on the Randic index to also calculate zero-order and higher-order descriptors. :param m: index order of the connectivity index :type m: int """ vertex_func = self._getValenceVertexDegree return self._generalizedConnectivityIndex(st, m, vertex_func, -0.5) @cached_topo_descriptor def _getMeanValenceConnectivityIndex(self, st, m): """ Handbook p. 86: mean valence connectivity indices of mth order -- again, replacing the vertex degree by the valence vertex degree in the similar mean connectivity indices. :param m: index order of the connectivity index :type m: int """ connectivity_index = float(self._getValenceConnectivityIndex(st, m)) return old_div(connectivity_index, len(self._getConnectivityPaths(st)[m])) @cached_topo_descriptor def _getQuasiWienerIndex(self, st): """ Handbook p. 253: quasi-Wiener index (W^*) -- sum of the reciprocal A - 1 positive eigenvalues was proposed as a molecular descriptor. For acyclic graphs, the quasi-Wiener index coincides with the Wiener index, while for cycle-containing graphs the two descriptors differ. Moreover, it has been demonstrated that the quasi-Wiener index coincides with the Kirchhoff number for any graph. """ eigenvalues = self._getLaplacianEigenvalues(st)[:-1] # A-1 values quasi_sum = sum( old_div(1.0, eigenvalues[i]) for i in range(len(eigenvalues))) return self._getAtomCount(st) * quasi_sum @cached_topo_descriptor def _getFirstMoharIndex(self, st): """ Handbook p. 254: first Mohar indices (TI)_1 -- index derived from the Laplacian matrix. """ self._assertMultipleHeavyAtoms(st) # prevent log(0) bond_atom_ratio = old_div(float(self._getBondCount(st)), self._getAtomCount(st)) return 2 * math.log10(bond_atom_ratio) * self._getQuasiWienerIndex(st) @cached_topo_descriptor def _getSecondMoharIndex(self, st): """ Handbook p. 254: second Mohar indices (TI)_2 -- index derived from the Laplacian matrix. """ first_non_zero_eigen = self._getLaplacianEigenvalues(st)[-2] return old_div(4.0, (self._getAtomCount(st) * first_non_zero_eigen)) @cached_topo_descriptor def _getSpanningTreeNumber(self, st): """ Handbook p. 253: log of spanning tree number (T^*) -- log of the product of the positive A-1 eigenvalues of the Laplacian matrix. NOTE: Take the natural log to avoid overflow. """ eigenvalues = self._getLaplacianEigenvalues(st)[:-1] # A-1 values eigenvalue_logsum = sum(math.log(lamdda_i) for lamdda_i in eigenvalues) return eigenvalue_logsum - math.log(self._getAtomCount(st)) @cached_topo_descriptor def _getBenzeneLikelinessIndex(self, st): """ Handbook p. 379: benzene-likeliness index -- an aromaticity index based on the first-order valence connectivity index divided by the number of bonds and normalized on the benzene molecule. """ vX1 = self._getValenceConnectivityIndex(st, 1) return 3 * vX1 / self._getBondCount(st) @cached_topo_descriptor def _getTopologicalChargeIndex(self, st, k): """ Handbook p. 445: topological charge index (G_k) -- the half-sum of all charge terms corresponding to pair of vertices with topological distance = k and would represent the total charge transfer between atoms placed at topological distance k. :param k: path length :type k: int """ charge_term_matrix = self._getChargeTermMatrix(st) distance_matrix = self._getDistanceMatrix(st) charge_sum_k = 0.0 for i in range(distance_matrix.shape[0]): for j in range(i + 1, distance_matrix.shape[0]): # Distance matrix is symmetric, d_ij == d_ji if distance_matrix[i][j] == k: # Charge term matrix is asymmetric, CT_ij != CT_ji charge_sum_k += abs(charge_term_matrix[i][j]) charge_sum_k += abs(charge_term_matrix[j][i]) return 0.5 * charge_sum_k @cached_topo_descriptor def _getMeanTopologicalChargeIndex(self, st, k): """ Handbook p. 445: mean topological charge index (J_k) -- topological charge index divided by the number of edges in an acyclic molecule. Values are set to zero for k greater than the molecular diameter. :param k: path length :type k: int """ if k > self._getTopologicalDiameter(st): return 0.0 norm = self._getDistanceCount(st, k) return old_div(self._getTopologicalChargeIndex(st, k), float(norm)) @cached_topo_descriptor def _getGlobalTopologicalChargeIndex(self, st): """ Handbook p. 445: global topological charge index (J) -- sum over mean topological charge indinces, with the superior limit equal to 5. The value was proposed by the authors to obtain uniform length descriptors. NOTE: Past implementation, as well as DRAGON, sum up to k=10 """ return sum( self._getMeanTopologicalChargeIndex(st, i) for i in range(1, 11)) @cached_topo_descriptor def _getHyperDistancePathIndex(self, st): """ Handbook p. 118: hyper-distance-path index (D_p) -- defined as applying the Wiener operator to the distance-path matrix, where entry i-j of the matrix is calculated from the distance matrix D as all the possible combinations of two elements taken from d_ij + 1 elements (binomial coefficient). """ dist_matrix = self._getDistanceMatrix(st) dist_path_matrix = old_div((numpy.power(dist_matrix, 2) + dist_matrix), 2.0) return self._wienerOperator(dist_path_matrix) @cached_topo_descriptor def _getReciprocalHyperDistancePathIndex(self, st): """ Handbook p. 118: reciprocal hyper-distance-pathindex (D_p^-1) -- defined in the same was as the hyper-distance-path index, but where elements of the distance-path matrix are reciprocal. """ dist_matrix = self._getDistanceMatrix(st) dist_path_matrix = old_div((numpy.power(dist_matrix, 2) + dist_matrix), 2.0) return self._wienerOperator(matrix_reciprocal(dist_path_matrix)) @cached_topo_descriptor def _getSquareReciprocalDistanceSumIndex(self, st): """ Handbook p. 210: square reciprocal distance sum index (Har2) -- the Harary index calculated with the reciprocal squared distance matrix. """ return self._wienerOperator(self._getReciprocalSquareDistanceMatrix(st)) @cached_topo_descriptor def _getModifiedRandicIndex(self, st): """ Handbook p. 88: modified Randic index (chi^1_mod) -- sum of atomic properties, accounting for valence electrons and extended connectivities using a Randic connectivity index-type formula. 0.5 * sum_atoms[ sum_atomi_bonds[ Z_i * (delta_i * delta_j) ^ -0.5 ]] """ randic_sum = 0.0 for atom in self._getHeavyStructure(st).atom: for bond in atom.bond: di = self._getVertexDegree(st, bond.atom1.index) dj = self._getVertexDegree(st, bond.atom2.index) randic_sum += old_div(float(atom.atomic_number), pow(di * dj, 0.5)) return 0.5 * randic_sum @cached_topo_descriptor def _getBalabanCentricIndex(self, st): """ Handbook p. 42: Balaban centric index (B) -- topological index defined for acyclic graphs based on the pruning of the graph, a stepwise procedure for removing all the terminal vertices. """ if self._getCyclomaticNumber(st): raise TopoError("Cycles present, only defined for acyclic graphs") square_func = lambda x: x * x return self._centricIndexSum(st, self._getVertexDegree, square_func) @cached_topo_descriptor def _getLoppingCentricInformationIndex(self, st): """ Handbook p. 42: lopping centric information index (I_B) -- index defined as the mean information content derived from the pruning of acyclic graphs based on the pruning of the graph, a stepwise procedure for removing all the terminal vertices. """ if self._getCyclomaticNumber(st): raise TopoError("Cycles present, only defined for acyclic graphs") n = float(self._getAtomCount(st)) info_func = lambda x: -(old_div(x, n)) * math.log((old_div(x, n)), 2) return self._centricIndexSum(st, self._getVertexDegree, info_func) @cached_topo_descriptor def _getKierHallElectronegativitySum(self, st): """ Handbook p. 475: Kier-Hall electronegativity index (KHE) -- sum of Kier-Hall electrotopological states. """ KHE_sum = 0.0 for i, atom in enumerate(self._getHeavyStructure(st).atom, start=1): delta_vi = self._getValenceVertexDegree(st, i) delta_i = self._getVertexDegree(st, i) L_i = mm.mmelement_get_period(atom.atomic_number) KHE_sum += old_div(float(delta_vi - delta_i), pow(L_i, 2)) return KHE_sum @cached_topo_descriptor def _getTopologicalDistanceSum(self, st, elem1, elem2): """ Adopted from DRAGON: sum of topological distances between all pairs of given atom types. """ distance_matrix = self._getDistanceMatrix(st) elements = {elem1, elem2} top_dist = 0 heavy_st = self._getHeavyStructure(st) ele_map = {int(atom): atom.element for atom in heavy_st.atom} for i, j in self._getAtomPairs(st): if {ele_map[i], ele_map[j]} == elements: top_dist += distance_matrix[i - 1, j - 1] return top_dist @cached_topo_descriptor def _getBaryszWeinerTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Wiener operator to the Barysz distance matrix. """ return self._wienerOperator(self._getBaryszDistanceMatrix(st)) @cached_topo_descriptor def _getElectronegativityWienerTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Wiener operator to the electronegativities weighted distance matrix. """ distance_matrix = self._getElectronegativityDistanceMatrix(st) return self._wienerOperator(distance_matrix) @cached_topo_descriptor def _getMassWienerTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Wiener operator to the atomic mass weighted distance matrix. """ return self._wienerOperator(self._getMassDistanceMatrix(st)) @cached_topo_descriptor def _getVDWVolumeWienerTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Wiener operator to the van der Waals volume weighted distance matrix. """ return self._wienerOperator(self._getVDWVolumeDistanceMatrix(st)) @cached_topo_descriptor def _getPolarizabilityWienerTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Wiener operator to the polarizability weighted distance matrix. """ distance_matrix = self._getPolarizabilityDistanceMatrix(st) return self._wienerOperator(distance_matrix) @cached_topo_descriptor def _getBaryszIndex(self, st): """ Handbook p. 489: Barysz index (J_het) -- a generalization of the Balaban distance connectivity index calculated by applying the Ivanciuc-Balaban operator to the Barysz distance matrix. """ return self._balbanOperator(st, self._getBaryszDistanceMatrix(st)) @cached_topo_descriptor def _getElectronegativityBalabanTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Ivanciuc-Balaban operator to the electronegativities weighted distance matrix. """ distance_matrix = self._getElectronegativityDistanceMatrix(st) return self._balbanOperator(st, distance_matrix) @cached_topo_descriptor def _getMassBalabanTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Ivanciuc-Balaban operator to the atomic mass weighted distance matrix. """ return self._balbanOperator(st, self._getMassDistanceMatrix(st)) @cached_topo_descriptor def _getVDWVolumeBalabanTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Ivanciuc-Balaban operator to the van der Waals volume weighted distance matrix. """ return self._balbanOperator(st, self._getVDWVolumeDistanceMatrix(st)) @cached_topo_descriptor def _getPolarizabilityBalabanTypeIndex(self, st): """ Handbook p. 489: index calculated by applying the Ivanciuc-Balaban operator to the polarizability weighted distance matrix. """ distance_matrix = self._getPolarizabilityDistanceMatrix(st) return self._balbanOperator(st, distance_matrix) @cached_topo_descriptor def _getTopologicalDiameter(self, st): """ Handbook p. 112: topological diameter (D) -- defined as the maximum atom eccentricity """ return max([ self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getTopologicalRadius(self, st): """ Handbook p. 112: topological radius (R) -- defined as the minimum atom eccentricity """ return min([ self._getAtomEccentricity(st, i) for i in self._getAtomIndices(st) ]) @cached_topo_descriptor def _getPetitjeanShapeIndex(self, st): """ Handbook p. 390: Petitjean shape index (I_2) -- a topological anisometry descriptor, also called a graph-theoretical shape coefficient. """ self._assertMultipleHeavyAtoms(st) # prevent divide by zero diameter = self._getTopologicalDiameter(st) radius = self._getTopologicalRadius(st) return old_div((diameter - radius), float(radius)) @cached_topo_descriptor def _getBalabanDistanceConnectivityIndex(self, st): """ Handbook p. 21: Balaban distance connectivity index (J) -- defined in terms of sums over each i th row of the distance matrix as:: B/(C+1) * sum_bonds[ (vertex dist_i * vertex dist_j) ^ -0.5 ] It is also called distance connectivity index or average distance sum connectivity. """ return self._balbanOperator(st, self._getDistanceMatrix(st)) @cached_topo_descriptor def _getSolvationConnectivityIndex(self, st, m): """ Handbook p. 88: solvation connectivity indices (m chi^s_q) -- descriptor defined in order to model solvation entropy and describe dispersion interactions in solution. """ self._assertMultipleHeavyAtoms(st) # prevent pow(0, exponent) self._assertPathLength(st, m) connectivity_index = 0.0 atoms = self._getHeavyStructure(st).atom for path in self._getConnectivityPaths(st)[m]: L = [mm.mmelement_get_period(atoms[i].atomic_number) for i in path] delta = [self._getVertexDegree(st, i) for i in path] connectivity_index += old_div(numpy.prod(L), pow(numpy.prod(delta), 0.5)) return (old_div(1.0, pow(2, m + 1))) * connectivity_index @cached_topo_descriptor def _getConnectivityIndex(self, st, m): """ Handbook p. 85: connectivity indices of mth order -- usually known as Kier-Hall connectivity indices, defined a general scheme based on the Randic index to also calculate zero-order and higher-order descriptors. :param m: index order of the connectivity index :type m: int """ vertex_func = self._getVertexDegree return self._generalizedConnectivityIndex(st, m, vertex_func, -0.5) @cached_topo_descriptor def _getMeanConnectivityIndex(self, st, m): """ Handbook p. 85: mean connectivity indices of mth order -- the Handbook only describes the mean Randic connectivity index (m=1), defined as the Randic connectivity index divided by the number of bonds. Here, we extrapolate to zero and higher orders :param m: index order of the connectivity index :type m: int """ connectivity_index = float(self._getConnectivityIndex(st, m)) return old_div(connectivity_index, len(self._getConnectivityPaths(st)[m])) @cached_topo_descriptor def _getRDCHIIndex(self, st): """ Handbook p. 116: RDCHI index -- topological index based on a Randic-type formula, which increases with molecular size but decreases with molecular branching. """ RDCHI = 0.0 for i, j in self._getBondIndices(st): RDS_i = self._getReciprocalDistanceSum(st, i) RDS_j = self._getReciprocalDistanceSum(st, j) RDCHI += pow(RDS_i * RDS_j, -0.5) return RDCHI @cached_topo_descriptor def _getRDSQIndex(self, st): """ Handbook p. 116: RDSQ index -- topological index based on a Randic-type formula, which increases with both molecular size and molecular branching. """ RDSQ = 0.0 for i, j in self._getBondIndices(st): RDS_i = self._getReciprocalDistanceSum(st, i) RDS_j = self._getReciprocalDistanceSum(st, j) RDSQ += pow(RDS_i * RDS_j, 0.5) return RDSQ @cached_topo_descriptor def _getKierAlphaModifiedShapeIndex(self, st, m): """ Handbook p. 249: Kier alpha-modified shape descriptor (m kappa_alpha) -- descriptor defined in terms of the number of graph vertices A and the number of paths with length (m = 1,2,3). The alpha-modified version takes into account the different shape contribution of heteroatoms and hybridization states. """ self._assertPathLength(st, m) # alpha values indexed by atomic number and hybridization state # Handbook p. 250, Table K-1 R_Csp3 = 0.77 alpha_dict = { (6, canvas.Sp3): (old_div(R_Csp3, R_Csp3)) - 1.0, (6, canvas.Sp2): (old_div(0.67, R_Csp3)) - 1.0, (6, canvas.Sp): (old_div(0.60, R_Csp3)) - 1.0, (7, canvas.Sp3): (old_div(0.74, R_Csp3)) - 1.0, (7, canvas.Sp2): (old_div(0.62, R_Csp3)) - 1.0, (7, canvas.Sp): (old_div(0.55, R_Csp3)) - 1.0, (8, canvas.Sp3): (old_div(0.74, R_Csp3)) - 1.0, (8, canvas.Sp2): (old_div(0.62, R_Csp3)) - 1.0, (9, canvas.Sp3): (old_div(0.72, R_Csp3)) - 1.0, (15, canvas.Sp3): (old_div(1.10, R_Csp3)) - 1.0, (15, canvas.Sp2): (old_div(1.00, R_Csp3)) - 1.0, (16, canvas.Sp3): (old_div(1.04, R_Csp3)) - 1.0, (16, canvas.Sp2): (old_div(0.94, R_Csp3)) - 1.0, (17, canvas.Sp3): (old_div(0.99, R_Csp3)) - 1.0, (35, canvas.Sp3): (old_div(1.14, R_Csp3)) - 1.0, (53, canvas.Sp3): (old_div(1.33, R_Csp3)) - 1.0, } # parameter derived from the ration of the covalent raidus relative # to the sp3 carbon atom alpha = 0.0 for a in self._getChmMol(st).getHeavyAtoms(): atom_key = (a.getAtomicNumber(), a.getHybridization()) try: alpha += alpha_dict[atom_key] except KeyError: raise TopoError("No alpha defined for atom %d %d" % atom_key) A = self._getAtomCount(st) A_alpha = A + alpha if m == 1: minmax_path_count = A_alpha * pow(A_alpha - 1, 2) elif m == 2: minmax_path_count = (A_alpha - 1) * pow(A_alpha - 2, 2) elif m == 3 and A % 2 == 0: # even number of atoms minmax_path_count = (A_alpha - 3) * pow(A_alpha - 2, 2) elif m == 3 and A % 2 != 0: # odd number of atoms minmax_path_count = (A_alpha - 1) * pow(A_alpha - 3, 2) else: raise TopoError("Unknown m value %d" % m) num_paths = len(self._getConnectivityPaths(st)[m]) + alpha return old_div(float(minmax_path_count), pow(num_paths, 2)) @cached_topo_descriptor def _getKierMolecularFlexibilityIndex(self, st): """ Handbook p. 178: Kier molecular flexibility index (fi) -- a measure of molecular flexibility derived from the Kier alpha-modified shape descriptors, kappa1 encodes information about the count of atoms and relative cyclicity of molecules, while kappa2 encodes information about branching or relative spatial density of molecules. """ kappa1 = self._getKierAlphaModifiedShapeIndex(st, 1) kappa2 = self._getKierAlphaModifiedShapeIndex(st, 2) return kappa1 * kappa2 / self._getAtomCount(st) @cached_topo_descriptor def _getMolecularPathWalkIndex(self, st, m): """ Handbook p. 393: Molecular path/walk indices -- the average sum of atomic path/walk indices of equal length. Obtained by separately summing all the paths and walks of the same length, and then calculating the ratio between their counts. """ self._assertPathLength(st, m) paths = self._getConnectivityPaths(st)[m] awc = self._getAtomicWalkCounts(st, m) path_walk_sum = 0.0 for i in self._getAtomIndices(st): # Number of paths of length m from atom i atom_path_count = len([p for p in paths if i in (p[0], p[m])]) path_walk_sum += old_div(atom_path_count, float(awc[i - 1])) return old_div(path_walk_sum, self._getAtomCount(st)) @cached_topo_descriptor def _getEStateIndex(self, st): """ Molecular Descriptors p. 42: E-state topological parameter (TI^E) -- derived by applying the Ivanciuc-Balaban operator to the E-state index values. It has to be pointed out that the proposed formula for the E-state topological parameter cannot be used for every molecule because it presents two drawbacks: (1) it cannot be calculated when there exists one atom in the molecule with negative E-state value; (2) it assumes very large values even when one S value tends to zero. To overcome these drawbacks of the original formula, an alternative formula (adopted in the DRAGON descriptors) is used here. """ B = self._getBondCount(st) C = self._getCyclomaticNumber(st) # exp(E-state index) = exp(I_i + Delta I_i) exp_S = [ math.exp(sum(x)) for x in zip(self._getFieldEffects(st), self._getIntrinsicStates(st)) ] dist_sum = 0.0 for i, j in self._getBondIndices(st): S_i, S_j = exp_S[i - 1], exp_S[j - 1] dist_sum += pow(1.0 + (S_i * S_j), -0.5) return (old_div(float(B), (C + 1))) * dist_sum @cached_topo_descriptor def _getChiralAtomCount(self, st): """ Unknown source: number of chiral atoms -- count chiral atoms labeled with either R or S (ignoring ANR and ANS). _getFullStructure needs to be avoided here, since H atom indexes would be altered, and Chirality labels will be invalidaded for 1D structures. """ st_copy = st.copy() build.add_hydrogens(st_copy) chiral_atoms = analyze.get_chiral_atoms(st_copy) return len([c for c in chiral_atoms.values() if c in ("R", "S")]) @cached_topo_descriptor def _getRingCount(self, st, n): """ Adopted from DRAGON: number of rings of heavy atom count n """ ring_lengths = [len(ring) for ring in self._getHeavyStructure(st).ring] return ring_lengths.count(n) @cached_topo_descriptor def _getAtomCount(self, st): """ Handbook p. 16: atom number (A) -- defined as the total number of atoms in a molecule, which refers only to non-hydrogen atoms (atom count). """ return self._getHeavyStructure(st).atom_total @cached_topo_descriptor def _getBondCount(self, st): """ Handbook p. 28: bond number (B) -- defined as the number of bonds in the molecule (edge counting; bond count). """ return len(self._getHeavyStructure(st).bond) @cached_topo_descriptor def _getRingAtomCount(self, st): """ Molecular Descriptors p. 655: (A_R) -- the total number of atoms belonging to rings """ ring_atoms = [ index for ring in self._getHeavyStructure(st).ring for index in ring.getAtomIndices() ] return len(set(ring_atoms)) @cached_topo_descriptor def _getRingBondCount(self, st): """ Molecular Descriptors p. 655: (B_R) -- the total number of bonds belonging to rings """ ring_bonds = [ sorted([bond_edge.atom1.index, bond_edge.atom2.index]) for ring in self._getHeavyStructure(st).ring for bond_edge in ring.edge ] return len(set([tuple(bond) for bond in ring_bonds])) @cached_topo_descriptor def _getNumberRingSystems(self, st): """ Molecular Descriptors p. 655: number of ring systems (NRS) """ return (self._getBondCount(st) - self._getRingBondCount(st)) - \ (self._getAtomCount(st) - self._getRingAtomCount(st)) + 1 @cached_topo_descriptor def _getNormalizedNumberRingSystems(self, st): """ Molecular Descriptors p. 655: normalized number of ring systems (NRS*) by the cyclomatic number. """ C = self._getCyclomaticNumber(st) if not C: raise TopoError("No ring systems present") return old_div(self._getNumberRingSystems(st), float(C)) @cached_topo_descriptor def _getRingFusionDegree(self, st): """ Molecular Descriptors p. 655: ring fusion degree (RFD) -- the reciprocal of the normalized number of ring systems. """ return old_div(1.0, self._getNormalizedNumberRingSystems(st)) @cached_topo_descriptor def _getTotalRingSize(self, st): """ Molecular Descriptors p. 656: total ring size (R) -- the sum of the ring size of all the single cycles of all the ring systems; in this case, the atoms belonging to fused connections are counted twice. """ ring_atoms = [ index for ring in self._getHeavyStructure(st).ring for index in ring.getAtomIndices() ] return len(ring_atoms) @cached_topo_descriptor def _getRingPerimeter(self, st): """ Molecular Descriptors p. 656: ring perimeter (R_P) -- represents the perimeter of all the rings present in the molecule. """ return 2 * self._getRingBondCount(st) - self._getTotalRingSize(st) @cached_topo_descriptor def _getRingBridges(self, st): """ Molecular Descriptors p. 656: ring bridges (R_B) -- represents the number of bridge edges. """ return self._getTotalRingSize(st) - self._getRingBondCount(st) @cached_topo_descriptor def _getMolecularCyclizedDegree(self, st): """ Molecular Descriptors p. 656: molecular cyclized degree (MCD) -- ratio of ring atoms to total atoms. """ return old_div(self._getRingAtomCount(st), float(self._getAtomCount(st))) @cached_topo_descriptor def _getRingFusionDensity(self, st): """ Molecular Descriptors p. 656: ring fusion density (RF Delta) """ ring_atom_count = self._getRingAtomCount(st) if not ring_atom_count: raise TopoError("No ring systems present") return 2.0 * self._getRingBridges(st) / ring_atom_count @cached_topo_descriptor def _getRingComplexityIndex(self, st): """ Molecular Descriptors p. 656: ring complexity index (C_R) """ ring_atom_count = self._getRingAtomCount(st) if not ring_atom_count: raise TopoError("No rings present") return old_div(self._getTotalRingSize(st), float(self._getRingAtomCount(st))) @cached_topo_descriptor def _getTotalVDWSurfaceArea(self, st): """ Molecular Descriptors p. 609: Total VSA for all atoms """ return sum(self._getVDWSurfaceArea(st)) @cached_topo_descriptor def _getPVSAMolarRefractivity(self, st, n): """ Molecular Descriptors p. 611: P_VSA based on molar refractivity. Property interval boundaries extracted from Table P16, p. 611 """ check_canvas_license() mr_typer = canvas_base.ChmAtomTyperMR() value_iterator = mr_typer.valueType(self._getChmMol(st), False) mr_values = [value.getValue() for value in value_iterator] intervals = [0.11, 0.26, 0.35, 0.39, 0.44, 0.485, 0.56] return self._propertyRangeVDWSurfaceArea(st, mr_values, intervals, n) @cached_topo_descriptor def _getPVSAlogP(self, st, n): """ Molecular Descriptors p. 611: P_VSA based on log P values, to capture hydrophobic and hydrophillic effect. Property interval boundaries extracted from Table P16, p. 611 """ check_canvas_license() alogp_typer = canvas_base.ChmAtomTyperAlogP() value_iterator = alogp_typer.valueType(self._getChmMol(st), False) alogp_values = [value.getValue() for value in value_iterator] intervals = [-0.4, -0.2, 0.0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4] return self._propertyRangeVDWSurfaceArea(st, alogp_values, intervals, n) @cached_topo_descriptor def _getPVSAPartialCharges(self, st, n): """ Molecular Descriptors p. 611: P_VSA based on Gasteiger charges, ie. partial equalization of orbital electronegativity (PEOE). Property interval boundaries extracted from Table P16, p. 611 """ check_canvas_license() gasteiger = canvas_base.ChmGasteigerCharge() try: gasteiger.calcGasteigerCharges(self._getChmMol(st)) # TODO: Currently throws RuntimeError, need to wrap ChmException except RuntimeError as e: raise TopoError(str(e).strip()) peoe_values = gasteiger.getCharges() intervals = [ -0.3, -0.25, -0.2, -0.15, -0.1, -0.05, 0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3 ] return self._propertyRangeVDWSurfaceArea(st, peoe_values, intervals, n) # ========================================================================= # Intermediate descriptor methods # ========================================================================= @cached_topo_descriptor def _getHeavyStructure(self, st): """ Handbook p. 313: Hydrogen-depleted molecule. """ st_copy = st.copy() build.delete_hydrogens(st_copy) return st_copy @cached_topo_descriptor def _getFullStructure(self, st): """ Fully hydrogenated molecule. """ st_copy = self._getHeavyStructure(st).copy() build.add_hydrogens(st_copy) return st_copy @cached_topo_descriptor def _getChmMol(self, st): check_canvas_license() adaptor = canvas_base.ChmMmctAdaptor() return adaptor.create(int(self._getFullStructure(st))) @cached_topo_descriptor def _getHydrogenCount(self, st, heavy_atom_index): """ Handbook p. 474: (h_i) -- the number of hydrogen atoms bonded to a given heavy atom. :param heavy_atom_index: 1-based index of the heavy atom structure :type heavy_atom_index: int """ # This works because hydrogens are added to the HeavyStructure, which # appends them and does not change the heavy atom indices full_st_atom = self._getFullStructure(st).atom[heavy_atom_index] return sum(bond.atom2.atomic_number == 1 for bond in full_st_atom.bond) @cached_topo_descriptor def _getAtomIndices(self, st): """ Returns list of atom indices, which are 1-based. """ return list(range(1, self._getHeavyStructure(st).atom_total + 1)) @cached_topo_descriptor def _getAtomPairs(self, st): """ Returns list of all pairs of atom indices, which are 1-based. """ n = self._getAtomCount(st) return [(i, j) for i in range(1, n + 1) for j in range(i + 1, n + 1)] @cached_topo_descriptor def _getBondIndices(self, st): """ Returns list of bond index tuples, which are 1-based. Sorts indices (for comparision purposes) """ return [ tuple(sorted([bond.atom1.index, bond.atom2.index])) for bond in self._getHeavyStructure(st).bond ] @cached_topo_descriptor def _getAdjacencyMatrix(self, st): """ Handbook p. 2: adjacency matrix (A) -- represents the whole set of connections between adjacent pairs of atoms. The entries a_ij of the matrix equal one if vertices vi and vj are bonded, and zero otherwise. The adjacency matrix is symmetric with dimension A x A , where A is the number of atoms and it is derived from an H-depleted molecular graph. """ n = self._getAtomCount(st) adjmat = numpy.zeros((n, n), dtype=int) for i, j in self._getBondIndices(st): adjmat[i - 1][j - 1] = adjmat[j - 1][i - 1] = 1 return adjmat @cached_topo_descriptor def _getVertexDegree(self, st, atom_index): """ Handbook p. 474, 2: vertex degree (delta_i) -- the ith row sum of the adjacency matrix. :param atom_index: 1-based atomic index :type atom_index: int """ return sum(self._getAdjacencyMatrix(st)[atom_index - 1]) @cached_topo_descriptor def _getSimpleValenceVertexDegree(self, st, atom_index): """ Handbook p. 474: valence vertex degree (delta^v_i) -- vertex degree taking into account all valence electrons of the ith atom. A simplified version of the equation below. :param atom_index: 1-based atomic index :type atom_index: int """ atom = self._getHeavyStructure(st).atom[atom_index] Zv_i = mm.mmelement_get_valence_count(atom.atomic_number) h_i = self._getHydrogenCount(st, atom_index) return float(Zv_i - h_i) @cached_topo_descriptor def _getValenceVertexDegree(self, st, atom_index): """ Handbook p. 474: valence vertex degree (delta^v_i) -- vertex degree taking into account all valence electrons of the ith atom. It encodes the electronic identity of the atom in terms of both valence electron and core electron counts. Given as: (Z^v_i - h_i) / (Z_i - Z^v_i - 1) For atoms of higher principal quantum levels (P, S, Cl, Br, I), Kier and Hall proposed to account for both valence and nonvalence electrons Z^v_i = number of valence electrons of ith atom h_i = number of hydrogens bonded to atom Z_i = total number of electrons of ith atom (atomic number) :param atom_index: 1-based atomic index :type atom_index: int """ atom = self._getHeavyStructure(st).atom[atom_index] Zv_i = mm.mmelement_get_valence_count(atom.atomic_number) h_i = self._getHydrogenCount(st, atom_index) return old_div(float(Zv_i - h_i), (atom.atomic_number - Zv_i - 1)) @cached_topo_descriptor def _getConnectivityGraph(self, st): """ Build undirected graph corresponding to molecular connectivity. """ graph = networkx.Graph() graph.add_edges_from(self._getBondIndices(st)) return graph @cached_topo_descriptor def _getConnectivityGraphWithWeights(self, st, carbon_weight, atom_weights): """ Build a graph representation of a ligand structure, weighted according to the rules specified for building the multigraph distance matrix (inverse bond order). :param st: a ligand structure :type st: structure.Structure :param carbon_weight: weight assigned to carbon atoms :type carbon_weight: float :param atom_weights: weight assigned to each heavy atom in the ligand; should have length equal to the number of heavy atoms in the ligand :type atom_weights: tuple[float] :return: a weighted graph representation of the ligand `st` :rtype: networkx.Graph """ graph = networkx.Graph() heavy_st = self._getHeavyStructure(st) weight_c2 = carbon_weight * carbon_weight for idx1, idx2 in self._getBondIndices(st): weight1, weight2 = atom_weights[idx1 - 1], atom_weights[idx2 - 1] bond_order = self._getConventionalBondOrder(st, idx1, idx2) weight = weight_c2 / (bond_order * weight1 * weight2) graph.add_edge(idx1, idx2, weight=weight) return graph @cached_topo_descriptor def _getDistanceMatrix(self, st): """ Handbook p. 112: distance matrix (D) -- matrix of all topological distances between all the atom pairs. Also known as the vertex distance matrix. The topological distance d_ij the number of edges in the shortest path between the vertices vi and vi; the off-diagonal entries of the distance matrix equal one if vertices vi and v, are adjacent (i.e. the atoms i and j are bonded) and are greater than one otherwise. The diagonal elements are of course equal to zero. The distance matrix is symmetric with dimension A x A, where A is the number of atoms, and is derived from the H-depleted molecular graph. """ n = self._getAtomCount(st) dist_mat = numpy.zeros((n, n), dtype=int) graph = self._getConnectivityGraph(st) for i, j in self._getAtomPairs(st): path_length = networkx.shortest_path_length(graph, i, j) dist_mat[i - 1][j - 1] = dist_mat[j - 1][i - 1] = path_length return dist_mat @cached_topo_descriptor def _getReciprocalDistanceMatrix(self, st): """ Handbook p. 112: reciprocal distance matrix (D^-1) -- a square symmetric A x A matrix derived from the distance matrix where each off-diagonal element is the reciprocal of the topological distance d between the considered vertices (and 0 for d_ii). """ return matrix_reciprocal(self._getDistanceMatrix(st)) @cached_topo_descriptor def _getReciprocalSquareDistanceMatrix(self, st): """ Handbook p. 112: reciprocal square distance matrix (D^-2) -- a square symmetric A x A matrix derived from the distance matrix where each off-diagonal element is the reciprocal of the topological distance d between the considered vertices (and 0 for d_ii). """ return numpy.power(self._getReciprocalDistanceMatrix(st), 2) @cached_topo_descriptor def _getVertexDistanceDegree(self, st, atom_index): """ Handbook p. 113, vertex distance degree (sigma_i) -- the ith row sum of the distance matrix. Also known as the distance number, distance index, distance rank, vertex distance sum, or distance of a vertex. :param atom_index: 1-based atomic index :type atom_index: int """ return sum(self._getDistanceMatrix(st)[atom_index - 1]) @cached_topo_descriptor def _getAtomEccentricity(self, st, atom_index): """ Handbook p. 112, atom eccentricity (nu_i) -- maximum value entry in the ith row of the distance matrix, i.e. the maximum distance from theith vertex to any other vertices (vertex eccentricity). :param atom_index: 1-based atomic index :type atom_index: int """ return max(self._getDistanceMatrix(st)[atom_index - 1]) @cached_topo_descriptor def _getDistanceCount(self, st, distance): """ Handbook p. 113, graph distance count (f^k) -- total number of distances in the graph equal to k :param distance: bonded distance (kth-order) :type distance: int """ distance_count = Counter(self._getDistanceMatrix(st).flatten()) return 0.5 * distance_count[distance] @cached_topo_descriptor def _getReciprocalDistanceSum(self, st, atom_index): """ Handbook p. 116: reciprocal distance sum (RDS_i) -- sum of the reciprocal distance matrix elements in the ith row. :param atom_index: 1-based atomic index :type atom_index: int """ return sum(self._getReciprocalDistanceMatrix(st)[atom_index - 1]) @cached_topo_descriptor def _getConnectivityPaths(self, st): """ Handbook p. 85, all shortest paths keyed by path lengths. The paths are equivalent to mth order subgraphs in the molecular structure. """ connectivity_paths = defaultdict(list) # Zero-th order paths are the atoms to themselves: [index, index] connectivity_paths[0] = [[i] for i in self._getAtomIndices(st)] # Higher order paths are taken from shortest paths graph = self._getConnectivityGraph(st) for i, j in self._getAtomPairs(st): for path_ij in networkx.all_simple_paths(graph, i, j, cutoff=5): connectivity_paths[len(path_ij) - 1].append(path_ij) return connectivity_paths @cached_topo_descriptor def _getCyclomaticNumber(self, st): """ Handbook p. 94, cyclomatic number -- the number of independent cycles (or rings), and, more exactly, the number of non-overlapping cycles. Should be equivalent to len(st.ring) """ return self._getBondCount(st) - self._getAtomCount(st) + 1 @cached_topo_descriptor def _getIntrinsicStates(self, st): """ Handbook p. 159: intrinsic state (I_i) -- calculated from the principal quantum number, the number of valence electrons, and the number of sigma electrons of the ith atom in the H-depleted molecular graph. NOTE: Molecular Descriptors p. 284, Table E11 and DRAGON use the simple valence vertex degree when calculating the instrisic state. """ self._assertMultipleHeavyAtoms(st) # prevent divide by zero intrinsic_states = [] heavy_st = self._getHeavyStructure(st) for i in self._getAtomIndices(st): atom = heavy_st.atom[i] L_i = mm.mmelement_get_period(atom.atomic_number) v_i = self._getSimpleValenceVertexDegree(st, i) d_i = self._getVertexDegree(st, i) intrinsic_states.append( old_div((pow(old_div(2.0, L_i), 2) * v_i + 1), d_i)) return intrinsic_states @cached_topo_descriptor def _getFieldEffects(self, st, k=2): """ Handbook p. 159: field effect (Delta I_i) -- calculated as perturbation of the intrinsic state of ith atom by all other atoms in the molecule. The exponent k is a parameter to modify the influence of distant or nearby atoms for particular studies. Usually it is taken as k = 2. """ intrinsic_states = self._getIntrinsicStates(st) field_effect = [] atom_count = self._getAtomCount(st) dist_matrix = self._getDistanceMatrix(st) for i in range(atom_count): field_effect_i = 0.0 for j in range(atom_count): I_diff = intrinsic_states[i] - intrinsic_states[j] d_ij = dist_matrix[i][j] field_effect_i += old_div(float(I_diff), pow(d_ij + 1.0, k)) field_effect.append(field_effect_i) return field_effect @cached_topo_descriptor def _getLaplacianEigenvalues(self, st): """ Handbook p. 253: laplacian eigenvalues (lambda_i) -- from the Laplacian matrix, defined as the difference between the vertex degree matrix and the adjacency matrix. The diagonalization of the Laplacian matrix gives A real eigenvalues hi which constitute the Laplacian spectrum. All eignevalues are (a) non-negative numbers, (b) the last value is always zero, and (c) the second to last is great than zero if the graph is connected. """ # Construct Laplacian matrix; L = V - A lap_matrix = -1 * self._getAdjacencyMatrix(st) for i in self._getAtomIndices(st): lap_matrix[i - 1, i - 1] = self._getVertexDegree(st, i) # Diagonalize the matrix and return sorted eigenvalues eigenvalues = list(numpy.linalg.eigh(lap_matrix)[0]) # Check that A-1 eignevalues exist if not eigenvalues[:-1]: raise TopoError("No Laplacian eigenvalues available") return sorted(eigenvalues, reverse=True) @cached_topo_descriptor def _getChargeTermMatrix(self, st): """ Handbook p. 445: charge term matrix (CT) -- an unsymmetric matrix of charge terms that are graph invariants related to the charge transfer between the pair of considered vertices. The diagonal entries of the CT matrix represent the topological valence of the atoms; the off-diagonal entries CT_ij represent a measure of the net charge transferred from the atom j to the atom i. """ # Galvez matrix; M = A * D^-2 galvex_matrix = numpy.dot(self._getAdjacencyMatrix(st), self._getReciprocalSquareDistanceMatrix(st)) # CT = diag(M) + [M - M^T] return numpy.diag(numpy.diag(galvex_matrix)) + ( galvex_matrix - numpy.transpose(galvex_matrix)) @cached_topo_descriptor def _getAromaticBonds(self, st): """ Collects all bonds in the heavy atom structure that are aromatic. :return: tuple of aromatic bonds :rtype: tuple of `structure.StructureBond` """ adaptor = canvas.ChmMmctAdaptor() heavy_atom_st = self._getHeavyStructure(st) aromatic_info = adaptor.getAromaticityInfo(heavy_atom_st.handle) aromatic_bonds = [] for i, j in self._getBondIndices(st): if aromatic_info[i - 1] and aromatic_info[j - 1]: aromatic_bonds.append(tuple(sorted([i, j]))) return tuple(aromatic_bonds) @cached_topo_descriptor def _getConventionalBondOrder(self, st, i, j): """ Handbook p. 28: conventional bond order (sigma^*) -- within the framework of the graph theory, specifically the multigraph, this is defined as being equal to 1, 2, 3, and 1.5 for single, double, triple and aromatic bonds, respectively. Define zero-order bonds as 1, as to be valid with multigraph distance weight and vdW surface area correction. """ search_indices = tuple(sorted((i, j))) for bond in self._getHeavyStructure(st).bond: indices = tuple(sorted([bond.atom1.index, bond.atom2.index])) if indices == search_indices: # Assume aromatic bond are 1.5, as per handbook if indices in self._getAromaticBonds(st): return 1.5 # Return 1 for zero-order bonds if not bond.order: return 1 # Otherwise use the intenger bond order return bond.order raise TopoError("No bond exists between atoms %d and %d" % (i, j)) @cached_topo_descriptor def _getMultigraphDistanceMatrix(self, st, carbon_weight, atom_weights): """ Handbook p. 488: multigraph distance matrix (``*D``) -- a weighted distance matrix where the distance from vertex vi to vertex vj is obtained by counting the edges in the shortest path between them, where each edge counts as the inverse of the conventional bond order, i.e. the relative topological distance, and therefore contributes 1 / bond order to the overall path length. :param carbon_weight: weight for the carbon atom :type carbon_weight: float :param atom_weights: tuple of weights for each atom :type atom_weights: tuple of float """ atom_count = self._getAtomCount(st) dist_mat = numpy.zeros((atom_count, atom_count), dtype=float) # Diagonal elements are defined as 1 - (X_carbon / X_i) for i, weight_i in enumerate(atom_weights): dist_mat[i][i] = 1.0 - (carbon_weight / float(weight_i)) # Off-diagonal elements are the shortest weighted path length between # atoms graph = self._getConnectivityGraphWithWeights(st, carbon_weight, atom_weights) dist_map = dict(networkx.shortest_path_length(G=graph, weight='weight')) for idx1, idx2 in self._getAtomPairs(st): dist_mat[idx1 - 1][idx2 - 1] = dist_map[idx1][idx2] dist_mat[idx2 - 1][idx1 - 1] = dist_map[idx2][idx1] return dist_mat @cached_topo_descriptor def _getBaryszDistanceMatrix(self, st): """ Handbook p. 488: Barysz distance matrix (D^Z) -- a weighted distance matrix accounting simultaneously for the presence of heteroatoms and multiple bonds in the molecule. """ Z_carbon = 6 Z_weights = [a.atomic_number for a in self._getHeavyStructure(st).atom] return self._getMultigraphDistanceMatrix(st, Z_carbon, tuple(Z_weights)) @cached_topo_descriptor def _getElectronegativityDistanceMatrix(self, st): """ Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance matrix with Sanderson electronegativities See: Handbook p. 488, Barysz distance matrix """ # Sanderson electronegativity (chi^SA) indexed by atomic number # MMolecular Descriptors p. 22, Table A3 electronegativity = { 5: 2.275, 6: 2.746, 7: 3.194, 8: 3.654, 9: 4.000, 13: 1.714, 14: 2.138, 15: 2.515, 16: 2.957, 17: 3.475, 26: 2.000, 27: 2.000, 28: 2.000, 29: 2.033, 30: 2.223, 35: 3.219, 50: 2.298, 53: 2.778, } w_carbon = electronegativity[6] try: w_weights = [ electronegativity[atom.atomic_number] for atom in self._getHeavyStructure(st).atom ] except KeyError as e: raise TopoError("Electronegativity unavailable: %s" % e) return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights)) @cached_topo_descriptor def _getMassDistanceMatrix(self, st): """ Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance matrix with atomic masses See: Handbook p. 488, Barysz distance matrix """ w_carbon = 12.01115 w_weights = (a.atomic_weight for a in self._getFullStructure(st).atom \ if a.atomic_number != 1) return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights)) @cached_topo_descriptor def _getVDWVolumeDistanceMatrix(self, st): """ Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance matrix with van der Waals volumes See: Handbook p. 488, Barysz distance matrix """ # van der Waals volume (V^vdw) indexed by atomic number # Molecular Descriptors p. 22, Table A3 vdw_volume = { 5: 17.875, 6: 22.449, 7: 15.599, 8: 11.494, 9: 9.203, 13: 36.511, 14: 31.976, 15: 26.522, 16: 24.429, 17: 23.228, 26: 41.052, 27: 35.041, 28: 17.157, 29: 11.494, 30: 38.351, 35: 31.059, 50: 45.830, 53: 38.792, } w_carbon = vdw_volume[6] try: w_weights = [ vdw_volume[atom.atomic_number] for atom in self._getHeavyStructure(st).atom ] except KeyError as e: raise TopoError("van der Waals volume unavailable: %s" % e) return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights)) @cached_topo_descriptor def _getPolarizabilityDistanceMatrix(self, st): """ Molecular Descriptors p. 909: generalized Ivanciuc weighting from the Barysz distance matrix with polarizabilities See: Handbook p. 488, Barysz distance matrix """ # polarizability (alpha) indexed by atomic number # Molecular Descriptors p. 22, Table A3 polarizability = { 5: 3.030, 6: 1.760, 7: 1.100, 8: 0.802, 9: 0.557, 13: 6.800, 14: 5.380, 15: 3.630, 16: 2.900, 17: 2.180, 26: 8.400, 27: 7.500, 28: 6.800, 29: 6.100, 30: 7.100, 35: 3.050, 50: 7.700, 53: 5.350, } w_carbon = polarizability[6] try: w_weights = [ polarizability[atom.atomic_number] for atom in self._getHeavyStructure(st).atom ] except KeyError as e: raise TopoError("Polarizability unavailable: %s" % e) return self._getMultigraphDistanceMatrix(st, w_carbon, tuple(w_weights)) @cached_topo_descriptor def _getAtomicWalkCounts(self, st, k): """ Handbook p. 480: atomic walk count (awc) -- the total number of equipoise walks of length k starting from vertex vi. """ # Get A^k adjacency_k = self._getAdjacencyMatrix(st) for i in range(k - 1): adjacency_k = numpy.dot(adjacency_k, self._getAdjacencyMatrix(st)) return numpy.sum(adjacency_k, axis=0) @cached_topo_descriptor def _getVDWSurfaceArea(self, st): """ Molecular Descriptors p. 609: van der Waals surface area (VSA) -- calculated from the atomic van der Waals radius, summing over contributions from atoms in the adjacency matrix. """ # Correction term depending on the bond order: # 0 for single, 0.1 for aromatic, 0.2 for double, 0.3 for triple correction = {1: 0, 1.5: 0.1, 2: 0.20, 3: 0.30} vdw_surface_areas = [] full_st = self._getFullStructure(st) for i, atom_i in enumerate(full_st.atom, start=1): R_i = atom_i.radius contribution_sum = 0.0 for bond in atom_i.bond: j = bond.atom2.index R_j = bond.atom2.radius # Hydrogens are single bonded, otherwise get bond order if 1 in (atom_i.atomic_number, bond.atom2.atomic_number): c_ij = correction[1] else: c_ij = correction[self._getConventionalBondOrder(st, i, j)] b_ij = mm.mmideal_get_stretch(full_st, (i, j)) - c_ij g_ij = min(max(abs(R_i - R_j), b_ij), R_i + R_j) contribution_sum += old_div((pow(R_j, 2) - pow(R_i - g_ij, 2)), g_ij) # VSA_i = (4 * PI * R_i^2) - (PI * P_I * ij_contributions) vsa_i = math.pi * R_i * (4 * R_i - contribution_sum) vdw_surface_areas.append(vsa_i) return vdw_surface_areas # ============================================================================= # Non-cached helper functions # ============================================================================= def _balbanOperator(self, st, matrix): """ Handbook p. 7, Ivanciuc-Balaban operator IB(M) -- a modified Randic operator summing over all bonded pairs for row sums of each participant:: B/(C+1) * sum_bonds[ (R(i) * R(j)) ^ -0.5 ] C is the cyclomatic number, B is the number of bonds, and R is the row sum operator. """ B = self._getBondCount(st) C = self._getCyclomaticNumber(st) row_sum = [sum(row) for row in matrix] dist_sum = 0.0 for i, j in self._getBondIndices(st): dist_sum += pow(row_sum[i - 1] * row_sum[j - 1], -0.5) return (old_div(float(B), (C + 1))) * dist_sum def _centricIndexSum(self, st, value_function, centric_function): """ Calculates atom centric index based on an atomic property function and a base function of the centric index sum. :param value_function: function to extract atomic property :type value_function: f(st, atom_index) :param centric_function: base function for component value of count :type centric_function: f(count) """ values = [value_function(st, i) for i in self._getAtomIndices(st)] value_count_tuples = Counter(values).most_common() return sum(centric_function(count) for _, count in value_count_tuples) def _generalizedConnectivityIndex(self, st, m, vertex_func, exponent): """ Handbook p. 86: formula for generalized connectivity indices, used for several descriptors. :param m: index order of the connectivity index :type m: int :param vertex_func: vertex degree function :type vertex_func: f(st, index) :param exponent: power to raise vertex degree product :type exponent: float """ self._assertMultipleHeavyAtoms(st) # prevent pow(0, exponent) self._assertPathLength(st, m) connectivity_index = 0.0 for path in self._getConnectivityPaths(st)[m]: vertex_degrees = [vertex_func(st, i) for i in path] connectivity_index += pow(numpy.prod(vertex_degrees), exponent) return connectivity_index def _propertyRangeVDWSurfaceArea(self, st, properties, intervals, n): """ Molecular Descriptors p. 609: P_VSA -- the amount of van der Waals surface area (VSA) having a property value in a certain range. These descriptors correspond to a partition of the molecular surface area conditioned by the atomic values of the property P. :param properties: 1-based atomic index :type properties: list :param intervals: 1-based atomic index :type intervals: list :param n: 1-based interval :type n: int """ surface_areas = self._getVDWSurfaceArea(st) if len(properties) > len(surface_areas): raise TopoError("Mismatch between atom values and number of atoms") surface_area_sum = 0.0 for value, vsai in zip(properties, surface_areas): # Find bin corresponding to a_i <= value < a_i+1 if bisect.bisect_right(intervals, value) + 1 == n: surface_area_sum += vsai return surface_area_sum
# ============================================================================= # External helper functions # =============================================================================
[docs]def check_canvas_license(): """ Check that a CANVAS_FULL license is available to use """ CANVAS_LICENSE = canvas_base.ChmLicenseShared() if not CANVAS_LICENSE.isValid(): raise TopoError("VSA descriptors require Canvas license")
[docs]def matrix_reciprocal(matrix): """ Returns a matrix whose elements are the reciprocal of each of the elements of the original matrix. The diagonal of the matrix must be zeros, and the routine assumes that no other zeros exist in the matrix. """ if numpy.sum(numpy.diag(matrix)) != 0.0: raise TopoError("Diagonal of matrix must be zeros.") inv_matrix = matrix.copy() # Non-zero number for inversion numpy.fill_diagonal(inv_matrix, -1) inv_matrix = old_div(1.0, inv_matrix) # Assign zeros back to diagonal numpy.fill_diagonal(inv_matrix, 0.0) return inv_matrix