Source code for schrodinger.application.glide.ensemble_selection

r"""
Classes for representing receptor ensembles and for choosing the best
ensembles of a specified size from a cross-docking experiment.

Example::

    from schrodinger.application.glide import ensemble_selection
    es = ensemble_selection.EnsembleSelection(fname='xdock.csv',
                                              exp_dg_fname='exp.txt')

    # get the 10 best 3-member ensembles

    print "Best by count"
    for ens in es.best_ensembles_by_count(3, 10):
        print "%d\t%.3f\t%s" % (ens.count, ens.rmsd, ens)

    print "\nBest by rmsd"
    for ens in es.best_ensembles_by_rmsd(3, 10):
        print "%d\t%.3f\t%s" % (ens.count, ens.rmsd, ens)

For more details, see the documentation for the EnsembleSelection class.
"""
#Contributors: Ivan Tubert-Brohman, Matt Repasky

import heapq

from schrodinger.utils import log

logger = log.get_output_logger(__file__)


[docs]def lignum_reader(reader, lignum_prop="i_i_glide_lignum", ligoffset=0, ignore_dups=False): """ A generator that returns the structures in fname that have lignum_prop > ligoffset. 'reader' is an iterator that generates Structure objects (e.g., a StructureReader). IMPORTANT NOTES: 1) This requires that the structures are sorted by lignum (as in a raw file from Glide), and that each lignum appears once at most. An exception will be raised otherwise. However, the no-duplicate requirement can be waived by passing a True value for ignore_dups. 2) If a ligand is missing in the file, the generator returns None for that ligand. For example, if lig2 is not in the file, we get (lig1, None, lig3...) """ ilig = ligoffset + 1 last_lignum = None for ct in reader: if ct.property.get('b_glide_receptor'): continue lignum = ct.property[lignum_prop] if last_lignum is not None and lignum <= last_lignum: msg = ("Ligand file is not sorted by lignum in strictly" f" monotonically increasing order. {lignum=}") try: msg += "; filename=" + reader.reader.getFilename() except AttributeError: # In case `reader` is not really a StructureReader but just # some iterator of Structure. pass if lignum == last_lignum and ignore_dups: logger.warning(msg) continue else: raise ValueError(msg) last_lignum = lignum while ilig < lignum: ilig += 1 yield None if lignum == ilig: ilig += 1 yield ct else: # lignum < ilig pass # just skip this structure until we reach the one we want
[docs]class Ensemble_Priority_Queue(object): """ Class implementing a priority queue for the purpose of maintaining lists of ensembles in memory during ensemble selection """
[docs] def __init__(self, nslots): self.heap = [] self.nslots = nslots
[docs] def add(self, ensemble_tuple): if ensemble_tuple.ensemble not in self.ensembles: heapq.heappush(self.heap, ensemble_tuple)
[docs] def pop(self): ensemble_tuple = heapq.heappop(self.heap) return ensemble_tuple
[docs] def best_scoring_element(self): return max(self.heap)
[docs] def worst_saved_score(self): return self.heap[0].score
[docs] def pushpop(self, ensemble_tuple): if (self.length < self.nslots): self.add(ensemble_tuple) elif (ensemble_tuple.score > self.worst_saved_score()): length_before_add = self.length self.add(ensemble_tuple) if (self.length > length_before_add): self.pop()
@property def length(self): return len(self.heap) @property def ensembles(self): return [element.ensemble for element in self.heap]