Source code for schrodinger.application.phase.shape_screen_reporter.prop_utils

"""
APIs for calculating and filtering shape screen report properties.

Copyright Schrodinger LLC, All Rights Reserved.
"""

import csv
import os
import sys
from typing import Tuple
from typing import Union

from schrodinger import adapter
from schrodinger import structure
from schrodinger.infra import phase

# Update search path to allow library analysis modules to be imported.
mmshare_dir = os.path.split(os.path.split(os.environ['MMSHARE_EXEC'])[0])[0]
sys.path.append(os.path.join(mmshare_dir, 'python', 'scripts'))

# These imports must happen after updating the search path.
from library_analysis_dir.utils import cpd_profiler  # isort:skip
from library_analysis_dir.utils import hit_to_lead  # isort:skip
from library_analysis_dir.utils.hit_to_lead import HTL_FUNCTIONS  # isort:skip

# Properties that are automatically calculated and stored in the database:
ALOGP_VSDB = 'r_vsdb_AlogP'
MW_VSDB = 'r_vsdb_MW'
PSA_VSDB = 'r_vsdb_PSA'
HBA_VSDB = 'i_vsdb_HBA'
HBD_VSDB = 'i_vsdb_HBD'
TOTAL_RINGS_VSDB = 'i_vsdb_Total_Rings'
STEREO_CENTERS_VSDB = 'i_vsdb_Stereo_Centers'
PROPERTY_CLASS_VSDB = 's_vsdb_Property_Class'
ION_CLASS_VSDB = 's_vsdb_Ion_Class'  # Calculated only if Epik was run

# The corresponding property names in the MPO profiling modules:
ALOGP_MPO = 'AlogP'
MW_MPO = 'MW'
PSA_MPO = 'PSA'
HBA_MPO = 'HBA'
HBD_MPO = 'HBD'
TOTAL_RINGS_MPO = 'Total Rings'
STEREO_CENTERS_MPO = 'Stereo Centers'
PROPERTY_CLASS_MPO = 'Class'
ION_CLASS_MPO = 'Ion Class'

DEFAULT_VSDB = [
    ALOGP_VSDB, MW_VSDB, PSA_VSDB, HBA_VSDB, HBD_VSDB, TOTAL_RINGS_VSDB,
    STEREO_CENTERS_VSDB, PROPERTY_CLASS_VSDB
]

DEFAULT_MPO = [
    ALOGP_MPO, MW_MPO, PSA_MPO, HBA_MPO, HBD_MPO, TOTAL_RINGS_MPO,
    STEREO_CENTERS_MPO, PROPERTY_CLASS_MPO
]

DEFAULT_MPO_VSDB_DICT = dict(zip(DEFAULT_MPO, DEFAULT_VSDB))

# Properties we need from the hit_to_lead module:
HTL_PROPS_SUBSET = [ALOGP_MPO, PSA_MPO, HBD_MPO, 'HAC']
HTL_FUNCTIONS_SUBSET = {key: HTL_FUNCTIONS[key] for key in HTL_PROPS_SUBSET}

# Used to detect whether Epik has been run on a structure:
EPIK_IONIZATION_PENALTY = 'r_epik_Ionization_Penalty'

# Default cutoff on the number of atoms for a ligand-size structure:
MAX_ATOMS_LIGAND = 150


[docs]def add_physicochemical_properties(st, max_atoms=MAX_ATOMS_LIGAND): """ Adds DEFAULT_VSDB properties to the provided structure if it appears to be a ligand. Adds ION_CLASS_VSDB only if the structure contains the property EPIK_IONIZATION_PENALTY. :param st: Structure to which properties are to be added :type st: structure.Structure :param max_atoms: Properties are not added if the number of atoms exceeds this value :type max_atoms: int :return: 1 if properties were successfully added; 0 if not. This convention facilitates counting the number of successful adds. :rtype: int """ if not is_ligand(st, max_atoms): return 0 # Trap RDKit sanitization errors. try: rdkit_mol = adapter.to_rdkit(st) except ValueError as e: # RDKit already prints a description of the error, so just output # structure name. print(f'Sanitization failed for "{st.title}" - skipping', flush=True) return 0 profile = {} cpd_profiler.add_rdkit_property(rdkit_mol, profile) mpo_profile = hit_to_lead.get_mpo_from_mol(rdkit_mol, HTL_FUNCTIONS_SUBSET) for prop in HTL_PROPS_SUBSET: profile[prop] = mpo_profile[prop] profile[PROPERTY_CLASS_MPO] = cpd_profiler.get_cpd_class(profile) for mpo_prop, vsdb_prop in DEFAULT_MPO_VSDB_DICT.items(): st.property[vsdb_prop] = profile[mpo_prop] if st.property.get(EPIK_IONIZATION_PENALTY) is not None: ion_class = cpd_profiler.get_ion_class(st)[ION_CLASS_MPO] st.property[ION_CLASS_VSDB] = ion_class return 1
[docs]def add_physicochemical_properties_to_hits(hits_file_in, hits_file_out, max_atoms=MAX_ATOMS_LIGAND): """ Calls add_physicochemical_properties for each structure in hits_file_in and writes the resulting structures to hits_file_out. :param hits_file_in: Input Maestro/SD file of hits :type hits_file_in: str :param hits_file_out: Output Maestro/SD file :type hits_file_out: str :param max_atoms: Properties are not added if the number of atoms exceeds this value :type max_atoms: int :return: tuple of total structures read, total structures to which properties were successfully added :rtype: int, int """ structs_read = 0 props_added = 0 with structure.StructureReader(hits_file_in) as reader, \ structure.StructureWriter(hits_file_out) as writer: for st in reader: structs_read += 1 props_added += add_physicochemical_properties(st, max_atoms) writer.append(st) return structs_read, props_added
[docs]def is_ligand(st, max_atoms): """ Returns True if the number of atoms in the provided structure is less than or equal to max_atoms and the structure is not a pharmacophore hypothesis. :param st: The structure in question :type st: structure.Structure :param max_atoms: Cutoff on the total number of atoms :type max_atoms: int """ if st.atom_total <= max_atoms: if st.property.get(phase.PHASE_SITE_TYPES) is None: return True return False
[docs]def read_property_filters(source, delim=','): """ Reads property filters and returns a phase.PhpDbPropertyQuery object that can be used to query a phase.PhpDbPropertyTable. Each line/element of source must contain a single property filter in the following format, assuming comma delimiters: <property>,<operator>,<value1>[,<value2>...] Where, <property> is an m2io-style property name <operator> is '=', '<', '>', '<=', '>=', '<>', 'BETWEEN', 'LIKE' or 'IN' <value1>[,<value2>...] is a list of the appropriate number of values. Must be exactly 2 values in the case of BETWEEN, 1 or more values for IN and exactly 1 value for all other operators. For example: i_vsdb_Stereo_Centers,<,2 r_vsdb_AlogP,BETWEEN,2.0,5.0 s_vsdb_Ion_Class,LIKE,%Basic s_vsdb_Property_Class,IN,Druglike,Near-Druglike,Leadlike :param source: Name of file containing the property filters or an iterable object of property filter strings :type filename: str or iterable of str :param delim: Delimiter that separates values in each string :type delim: str :return: Property filter query :rtype: phase.PhpDbPropertyQuery :raise: ValueError if source is formatted incorrectly, or if any filter is invalid """ if type(source) == str: with open(source) as fh: return read_property_filters_from_iterable(fh, delim) else: return read_property_filters_from_iterable(source, delim)
[docs]def read_property_filters_from_iterable(source, delim=','): """ Reads property filters from an iterable of strings and returns a phase.PhpDbPropertyQuery. :param source: Iterable that contains property filters as delimited strings. Blank strings and strings that start with or contain only the comment character '#' are skipped. :type source: Iterable of str :param delim: Delimiter that separates values in each string :type delim: str :return: Property filter query :rtype: phase.PhpDbPropertyQuery :raise: ValueError if a filter is formatted incorrectly, or if a filter is invalid """ query = phase.PhpDbPropertyQuery(phase.VSDB_PROPERTY_TABLE, phase.PROPERTY_TABLE_ID_COL, phase.PhpFilterLogic_AND) for i, line in enumerate(source, 1): line = line.strip() if not line or line.startswith('# ') or line == '#': continue tokens = next(csv.reader([line], delimiter=delim)) if len(tokens) < 3: msg = f'Not enough values on line {i} of filter' raise ValueError(msg) property, operator, *values = tokens try: col_type = phase.PhpDbPropertyTable.getColumnType(property) op = phase.PhpDbPropertyQuery.getFilterOperatorEnum(operator) if col_type == phase.PhpDbColumnType_INTEGER: query.addIntegerFilter(property, op, list(map(int, values))) elif col_type == phase.PhpDbColumnType_REAL: query.addRealFilter(property, op, list(map(float, values))) else: query.addStringFilter(property, op, values) except phase.PhpException as exc: raise ValueError(str(exc)) return query
[docs]def summarize_properties(property_table, newline=False): """ Returns a string containing the names and ranges of the properties in the provided property table. :param property_table: Table of integer, real and string properties :type property_table: phase.PhpDbPropertyTable :param newline: Whether to add a newline to the end of the summary :type newline: bool :return: Summary of the properties :rtype: str """ COLUMN_LIMITS_DICT = { phase.PhpDbColumnType_INTEGER: property_table.getIntegerColumnLimits, phase.PhpDbColumnType_REAL: property_table.getRealColumnLimits, phase.PhpDbColumnType_TEXT: property_table.getStringColumnLimits } s = ['Name,Min,Max'] for prop in property_table.getColumnNames()[1:]: prop_type = phase.PhpDbPropertyTable.getColumnType(prop) limits = COLUMN_LIMITS_DICT[prop_type](prop) s += [f'{prop},{limits[0]},{limits[1]}'] if newline: s += [] return '\n'.join(s)
[docs]def get_numeric_property_range( property_name: str, reporter: phase.PhpShapeScreenReporter) -> Tuple[Union[int, float]]: """ Return the value range of a numeric property. :param property_name: The property name to evaluate :param reporter: The shape screen reporter containing property data """ prop_table = reporter.getPropertyTable() column_type = prop_table.getColumnType(property_name) if column_type == phase.PhpDbColumnType_INTEGER: values = prop_table.getIntegerColumnLimits(property_name) elif column_type == phase.PhpDbColumnType_REAL: values = prop_table.getRealColumnLimits(property_name) else: raise ValueError(f'Expected the supplied property to have a numerical ' f'data type, but instead got {property_name}') return values