Source code for schrodinger.application.hitexpander

import os
import re

from schrodinger import structure
from schrodinger.infra import canvas
from schrodinger.infra import phase
from schrodinger.job import jobcontrol
from schrodinger.utils import fileutils
from schrodinger.utils import log
from schrodinger.utils import multifpfile
from schrodinger.utils.multifpfile import FP_NAME_TO_TYPE
from schrodinger.utils.multifpfile import FP_TYPE_TO_NAME
from schrodinger.utils.multifpfile import make_fp_generator

LOGGER_NAME = 'hitexpander'

SUPPORTED_STRUCTURE_FILE_FORMATS = (fileutils.SD, fileutils.SMILES,
                                    fileutils.MAESTRO, fileutils.SMILESCSV)

#------------------------------------------------------------------------------#


[docs]def fp_types(): ''' Returns a (sorted) list of the available fingerprint type names. ''' return sorted(list(FP_TYPE_TO_NAME.values()))
#------------------------------------------------------------------------------#
[docs]def make_fp_generator_from_canvas_fpfile(filename): ''' Instantiates fingerprint generator from Canvas FP file. :param filename: Canvas FP file name. :type filename: string :return: Fingerprint generator. :rtype: `canvas.ChmFPOut32` ''' fpinfo = canvas.getFPInfo(filename) if not fpinfo.isValid(): raise RuntimeError("could not determine fingerprint traits in '%s'" % filename) if fpinfo.is32Bit(): return fpinfo.toFPOut() else: raise RuntimeError("unexpected fingerprints precision in '%s'" % filename)
#------------------------------------------------------------------------------#
[docs]def is_phdb_path(path): return os.path.isabs(path) and path.lower().endswith('.phdb')
#------------------------------------------------------------------------------#
[docs]class UnsupportedFormat(ValueError): pass
#------------------------------------------------------------------------------# def _raise_unsupported_format(fn): raise UnsupportedFormat("'%s': unsupported structure file format." % fn) #------------------------------------------------------------------------------# def _apply_isub(db, molids=None, isub=''): ''' Filters out unavailable/undesired/disallowed molecule IDs. :param db: Phase DB. :type db: `schrodinger.infra.phase.PhpDatabaseFp` :param molids: Iterable over desired molecule IDs. :type molids: iterable over int :param isub: Phase DB subset file name. :type isub: str :return: Subset of `molids` allowed by `isub` available from `db`. :rtype: set(int) or list(int) ''' available_ids = db.getAllIDs() if isub or molids: available_ids = set(available_ids) if isub: isub_ids = phase.read_phase_subset(isub) available_ids &= set(isub_ids) return available_ids if molids is None else available_ids & set(molids) #------------------------------------------------------------------------------#
[docs]def count_ligands(path, isub=''): if is_phdb_path(path): try: db = phase.PhpDatabaseFp(path) available_ids = _apply_isub(db, molids=None, isub=isub) return len(available_ids) except phase.PhpException as e: raise UnsupportedFormat(e) else: try: return structure.count_structures(path) except Exception: _raise_unsupported_format(path)
#------------------------------------------------------------------------------#
[docs]def structure_file_reader(filename, logger=None, molids=None, keepsmiles=False): ''' Generator that yields `(mol_id, st)` tuples for the structures in the file identified by `filename`. The file can be in Maestro/SD/SMILES format. :param filename: File name. :type path: str :param logger: Logger for warnings. :type logger: `logging.Logger` :param molids: IDs (1-based indices) of the desired structures. `None` means "all". :type molids: iterable over integers :param keepsmiles: Should SMILES text be converted into `schrodinger.structure.Structure` instances? :type keepsmiles: bool ''' format = fileutils.get_structure_file_format(filename) if format == fileutils.MAESTRO: reader = structure.MaestroReader(filename) elif format == fileutils.SD: reader = structure.SDReader(filename) elif format == fileutils.SMILES: reader = structure.SmilesReader(filename) elif format == fileutils.SMILESCSV: reader = structure.SmilesCsvReader(filename) else: _raise_unsupported_format(filename) is_smiles = format in (fileutils.SMILES, fileutils.SMILESCSV) if is_smiles: adaptor = canvas.ChmMmctAdaptor() if not logger: logger = log.get_output_logger(LOGGER_NAME) molids_set = set(molids) if molids is not None else None for (i, st) in enumerate(reader, 1): if molids_set is not None and i not in molids_set: continue if is_smiles: if keepsmiles: yield (i, st.smiles) else: try: mol = canvas.ChmMol.fromSMILES(st.smiles) # canvas2d depends on OpenGL, therefore # 1 == canvas2d.ChmAtomOption.H_ExplicitOnly canvas.CHM_FORCE2D(mol, True, 1) # we need to have 2D coordinates or else going # to disappoint mmstereo and/or rdkit_adapter except RuntimeError as e: logger.warn(str(e)) continue # 5 == canvas2d.optionMDL.H_Visible yield (i, structure.Structure(adaptor.create(mol, True, 5))) else: yield (i, st)
#------------------------------------------------------------------------------#
[docs]def phdb_mol_reader(path, molids=None, isub=''): ''' Generator that yields `(mol_id, st)` tuples for the ligands in the Phase DB. :param path: Path to the Phase DB. :type path: str :param molids: IDs of the desired structures. `None` means "all". :type molids: iterable over integers :param isub: Subset file name (ENUM-285). :type isub: str ''' try: db = phase.PhpDatabaseFp(path) available_ids = _apply_isub(db, molids, isub) except phase.PhpException as e: raise UnsupportedFormat(e) for i in sorted(available_ids): yield (i, structure.Structure(db.getCt(i)))
#------------------------------------------------------------------------------#
[docs]class UnavailableFingerprintType(KeyError): pass
#------------------------------------------------------------------------------# def _unavailable_fptype(path, kind): raise UnavailableFingerprintType( "fingerprints of type '%s' are not available from '%s'" % (kind, path)) #------------------------------------------------------------------------------#
[docs]def phdb_fpreader(path, kind, molids=None, isub=''): ''' Generator yielding `(mol_id, fp)` tuples for fingerprints of the desired `kind` for the molecules selected via `molids` (in order of sorted `molids`) from Phase DB pointed to by `path`. :param path: Path to Phase DB. :type path: str :param kind: Name of the desired fingerprints type. :type typename: str :param molids: Identificators of the molecules for which fingerprints are to be loaded. :type molids: iterable over int :param isub: Subset file name (ENUM-285). :type isub: str ''' try: db = phase.PhpDatabaseFp(path) available_ids = _apply_isub(db, molids, isub) except phase.PhpException as e: raise ValueError(str(e)) fptype = FP_NAME_TO_TYPE[kind] if not db.isStored(fptype): _unavailable_fptype(path, kind) for i in sorted(available_ids): yield (i, db.getFp(fptype, i, True))
#------------------------------------------------------------------------------#
[docs]def multifpfile_fpreader(path, kind, molids=None): ''' Generator yielding `(mol_id, fp)` tuples for fingerprints of the desired `kind` for the molecules selected via `molids` (in order of sorted `molids`) from multi-fingerprint file pointed to by `path`. :param path: Path to the multi-fingerprint file. :type path: str :param kind: Name of the desired fingerprints type. :type typename: str :param molids: Identificators of the molecules for which fingerprints are to be loaded. :type molids: int containment checkable or `None` ''' with multifpfile.MultiFPFile(path) as src: if kind not in src.get_typenames(): _unavailable_fptype(path, kind) for (i, fp) in src.iter_fingerprints(typenames=[kind], molids=molids): yield (i, fp)
#------------------------------------------------------------------------------#
[docs]def structure_fpreader(path, kind, molids=None, logger=None): ''' Generator yielding `(mol_id, fp)` tuples for fingerprints of the desired `kind` for the molecules selected via `molids` (in order of sorted `molids`) from multi-fingerprint file pointed to by `path`. :param path: Path to the multi-fingerprint file. :type path: str :param kind: Name of the desired fingerprints type. :type typename: str :param molids: Identificators of the molecules for which fingerprints are to be loaded. :type molids: int containment checkable or `None` ''' # generate on-the-fly adaptor = canvas.ChmMmctAdaptor() fprinter = make_fp_generator(kind) for (i, st) in structure_file_reader(path, logger): if molids and i not in molids: continue chmol = adaptor.create(st) yield (i, multifpfile.bitset_to_list(fprinter.generate(chmol)))
#------------------------------------------------------------------------------#
[docs]def canvasfp_fpreader(path, molids=None, logger=None): ''' Generator yielding `(mol_id, fp)` tuples for fingerprints of the molecules selected via `molids` (in order of sorted `molids`) from Canvas FP file pointed to by `path`. :param path: Path to the Canvas FP file. :type path: str :param molids: Identificators of the molecules for which fingerprints are to be loaded. :type molids: iterable over int ''' fpinfo = canvas.getFPInfo(path) if fpinfo.is32Bit(): freader = canvas.ChmFPIn32(path) elif fpinfo.is64Bit(): freader = canvas.ChmFPIn64(path) else: raise ValueError("unexpected fingerprints precision in '%s'" % path) freader.rewind() if molids: for db_pos in sorted(molids): freader.setPos(db_pos) db_fp = next(freader) # fingerprint at db_pos yield (db_pos, multifpfile.bitset_to_list(db_fp)) else: while freader.hasNext(): db_pos = freader.getPos() # 1-based db_fp = next(freader) # fingerprint at db_pos yield (db_pos, multifpfile.bitset_to_list(db_fp))
#------------------------------------------------------------------------------#
[docs]def check_fp_availability(path, kinds): """ Check whether fingerprints of the desired type are available, raise exception if they are not. :param path: Name of the structure/multi-fingerprint file or Phase DB. :type path: str :param kinds: Iterable over desired fingerprint kinds. :type kind: iterable over str """ def raise_unavailable(what): raise UnavailableFingerprintType( "fingerprints of type '%s' are not available from '%s'" % (what, path)) if is_phdb_path(path): try: db = phase.PhpDatabaseFp(path) except phase.PhpException as e: raise UnsupportedFormat(e) for name in kinds: fp_type = FP_NAME_TO_TYPE.get(name, None) if fp_type is None or not db.isStored(fp_type): raise_unavailable(name) else: format = fileutils.get_structure_file_format(path) if format is not None: if format not in SUPPORTED_STRUCTURE_FILE_FORMATS: _raise_unsupported_format(path) for name in kinds: if name not in FP_NAME_TO_TYPE: raise_unavailable(name) else: types = multifpfile.get_fingerprint_types(path) if types is None and kinds: raise_unavailable(kinds[0]) for name in kinds: if name not in types: raise_unavailable(name)
#------------------------------------------------------------------------------#
[docs]class LogFormatter(object):
[docs] def format(self, rec): rec.message = rec.getMessage() if rec.levelno >= log.WARNING: return rec.levelname + ': ' + rec.message else: return rec.message
#------------------------------------------------------------------------------#
[docs]def prog_for_argparse(scriptfile): ''' Returns strings like "$SCHRODINGER/run script.py". :param scriptfile: Name of a script file. :type scritpfile: str :return: String to be used to launch the script. :rtype: str ''' scriptfile = os.path.basename(scriptfile) scriptfile = re.sub(r'\.py.$', r'.py', scriptfile, flags=re.IGNORECASE) return fileutils.SCHRODINGER_RUN_STR + ' ' + scriptfile
#------------------------------------------------------------------------------#
[docs]def add_isub_option(parser): ''' Adds -isub option to the `parser` (ENUM-285). :param parser: Command line argument parser. :type parser: `argparse.ArgumentParser` ''' parser.add_argument('-isub', metavar='<subset>', help='Process only Phase DB subset defined in ' 'the <subset>_phase.inp file.')
#------------------------------------------------------------------------------#
[docs]def massage_isub_option(parser, args): ''' Processes -isub option (ENUM-285). :param parser: Command line argument parser. :type parser: `argparse.ArgumentParser` :param args: Namespace holding command line arguments. :type args: `argparse.Namespace` ''' using_phdb = (getattr(args, 'database', False) or getattr(args, 'phase_db', False)) subset = getattr(args, 'isub', None) if subset and not using_phdb: parser.error('-isub is not applicable for input other than Phase DB.') if not subset: return ext = phase.PHASE_SUBSET_EXT if subset.endswith(ext): ext = '' args.isub = jobcontrol.get_runtime_path(subset + ext) if not os.path.isfile(args.isub): parser.error(f'missing subset file: "{args.isub}"')
#------------------------------------------------------------------------------#