Source code for schrodinger.pipeline.stages.glide

# python module
"""
Glide Docking & Gridgen stages.

    DockingStage - stage for running Glide docking jobs

    GridgenStage - stage for generating Glide grids (not functional)
        HBOND_CONSTRAINTS: List of constraints in format: "label <atomnum>"
        METAL_CONSTRAINTS: List of constraints in format: "label <atomnum>"
        POSIT_CONSTRAINTS: List of constraints in format: "label <x> <y> <z> <radius>"
        NOE_CONSTRAINTS: List of constraints in format: "label <x> <y> <z> <min> <max>"

Copyright Schrodinger, LLC. All rights reserved.

"""

import csv
import gzip  # For compress()
import os
import re
import shutil
import sys
import tarfile  # For cleanup
import time

import schrodinger.utils.subprocess as subprocess
from schrodinger import structure
from schrodinger.application.glide import glide
from schrodinger.application.glide import glideanalysis
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import pipeutils
from schrodinger.pipeline import stage
from schrodinger.utils import csv_unicode
from schrodinger.utils import fileutils

glide_path = 'glide'
GLIDE_MERGE = os.path.join(os.environ['SCHRODINGER'], 'utilities',
                           'glide_merge')


[docs]def compress(origFile):
    """ Compresses the specified file and removes the original """

    if origFile.endswith('.mae'):
        compFile = origFile + 'gz'  # To produce .maegz
    else:
        compFile = origFile + '.gz'

    orig_handle = open(origFile, 'r')
    gzip_handle = gzip.GzipFile(compFile, mode='w')
    for line in orig_handle:
        gzip_handle.write(line)
    orig_handle.close()
    gzip_handle.close()

    fileutils.force_remove(origFile)

    return compFile


[docs]class DockingStage(stage.Stage):
    """ VSW stage for performing Glide docking """

[docs]    def __init__(self, *args, **kwargs):
        # For these specs, None represents MMIM default
        # Use the specification that $SCHRODINGER/glide uses, except append
        # a few VSW-specific keywords:

        docking_specs = "\n".join(glide.validation_specs('docking'))
        specs = docking_specs + """
        RECOMBINE = boolean(default=True) # Whether to recombine ligand files
        UNIQUEFIELD = string(default="s_m_title")
        OUTVARIANTFIELD = string(default=None) # Field for generated variant codes
        NUM_TO_KEEP = integer(default=0) # Keep at most this many output ligands
        PERCENT_TO_KEEP = float(default=0.0) # Keep only this percentage of the (input) ligands
        COMPRESS_IN_FILES = boolean(default=True) # Whether to compress subjob *_in files
        BEST_BY_TITLE = boolean(default=False) # Whether to keep only the best scoring pose per compound at the end.
        LVDW = float(default=0.8) # Old name for LIG_VSCALE. glide.py will recognize it.
        LIGCCUT = float(default=0.15) # Old name for LIG_CCUT. glide.py will recognize it.
        LIGAND_CONFS = string(default=None) # Old name for DOCKING_METHOD. glide.py will recognize it.
        DIELECTRIC = float(default=None) # Old name for GLIDE_DIELCO (ignored)

        # Metal and H-bond constraint options:
        [CONSTRAINT_GROUP:1]
          USE_CONS = string()
          NREQUIRED_CONS = string()
        [CONSTRAINT_GROUP:2]
          USE_CONS = string()
          NREQUIRED_CONS = string()
        [CONSTRAINT_GROUP:3]
          USE_CONS = string()
          NREQUIRED_CONS = string()
        [CONSTRAINT_GROUP:4]
          USE_CONS = string()
          NREQUIRED_CONS = string()
        [FEATURE:1]
          __many__ = string()
        [FEATURE:2]
          __many__ = string()
        [FEATURE:3]
          __many__ = string()
        [FEATURE:4]
          __many__ = string()
        [FEATURE:5]
          __many__ = string()
        """
        stage.Stage.__init__(self, specs=specs, *args, **kwargs)

        self.setMainProduct('glide')

        self.addExpectedInput(1, "structures", True)
        self.addExpectedInput(2, "grid", True)
        self.addExpectedOutput(1, "structures", True)
        # Input ligands after GENCODES
        self.addExpectedOutput(2, "structures", False)

        # BEST_BY_TITLE - GUI match:
        # 'Only best scoring pose':'YES'
        # 'All good scoring poses':'NO'
        # 'All states':'NO'
        self.recombined_ligands = []
        self.docked_ligands = {}  # Key: jobname, value: output ligand file

        self.jobdj = None
        # Whether to continue with jobs in the event of subjob failure. The
        # default is True and can be overridden from the command line using the
        # noforce option. See VSW-886
        self._force_jobs = True
        self.status = "NOT STARTED"

        self.input_compound_count = None
        self.subjob_inposes = {}

[docs]    def checkInputs(self):
        in2 = self.getInput(2)
        in2.check()
        if not in2.getPath():
            raise RuntimeError("Docking Stage: No grid in Input Object 2!")
        return True

[docs]    def recombineInputLigands(self):

        # Run modes:
        # 1. Recombining - and generating codes
        # 2. Recombining - and not generating codes
        # 3. Not Recombining - and not generating codes

        # These keywords are ignored if not recombining:
        self.uniquefield = self['UNIQUEFIELD']
        variantfield = self['OUTVARIANTFIELD']
        self.gencodes = (variantfield is not None)
        recombine = self['RECOMBINE']

        if self.gencodes and not recombine:
            # Ev:76572
            self.info(
                "WARNING: Since variant codes need to be generated, will recombine"
            )
            recombine = True

        if not recombine:

            # No recombine (Ev:65662)
            if self.gencodes:
                raise RuntimeError(
                    "ERROR: OUTVARIANTFIELD is not supported if RECOMBINE is FALSE"
                )

            # VSW-813
            if self.uniquefield.upper() == 'NONE':
                raise RuntimeError(
                    "ERROR: UNIQUEFIELD can't be set to NONE if RECOMBINE is FALSE"
                )
                # This is because the docking subjob may produce more than one pose per
                # input compound. Since no unique-field is specified, it is not possible
                # to know how to group the poses into compounds.

            self.info("Will NOT recombine or retitle input ligands")
            self.recombined_ligands = self.getInput(1).getFiles()

            self.info("There will be %i subjobs." %
                      len(self.recombined_ligands))
            self.info("Will identify ligands based on unique field: %s" %
                      self.uniquefield)
            self.info(
                "Counting numbers of structures and compounds in input ligands..."
            )

            self.subjob_inposes = {}  # number of poses in each input job
            filenum = 0
            st_num = 0

            # Count the number of poses in each input file and total number of compounds:
            # Ev:105233
            compound_ids = set()

            for filename in self.recombined_ligands:
                filenum += 1

                # Count the number of structures & compounds in this file:
                file_count = 0
                file_st_num = 0

                for st in pipeutils.get_reader(filename, astext=True):
                    st_num += 1
                    file_st_num += 1
                    file_count += 1
                    # Write progress period:
                    if st_num % 1000 == 0:
                        sys.stdout.write(".")
                        sys.stdout.flush()

                    # Get compound code:
                    try:
                        # Ev:96648 & Ev: 104439
                        compound_id = pipeutils.read_unique_field(
                            st, self.uniquefield)
                        compound_ids.add(compound_id)
                    except KeyError:
                        err = "No field %s in structure #%i (file %s)!" % (
                            self.uniquefield, file_st_num, filename)
                        raise RuntimeError(err)

                self.subjob_inposes[filenum] = file_count

            number_of_compounds = len(compound_ids)
            # Otherwise, the number_of_compounds was already manually
            # incremented

            self.input_compound_count = number_of_compounds
            self.info('')  # To have a new line after the progress periods
            self.info("Number of input compounds: %i" %
                      self.input_compound_count)
            self.info("Number of input structures: %i" % st_num)

            msg = "\nInput number of poses for each subjob:"
            msg += "\n%s" % str(self.subjob_inposes)
            self.info(msg)

            # Ev:118766
            self.info(
                "\nNOTE: %i jobs will be launched, as that's the number of input files. (The option to 'use input files directly for subjobs' was used.)\n"
                % len(self.subjob_inposes))
            return

        # IF GOT HERE THEN RECOMBINING

        if self.gencodes:
            if self.uniquefield.upper() == "NONE":
                self.info(
                    "Will treat each input structure as its own compound (renumbering ligands)."
                )
            else:
                self.info("Will identify ligands based on unique field: %s" %
                          self.uniquefield)
            self.info("Will save generated variant codes into field: %s" %
                      variantfield)
        else:
            self.info("Will identify ligands based on field: %s" %
                      self.uniquefield)

        method = self['DOCKING_METHOD'].lower()
        if method not in ['confgen', 'rigid', 'mininplace', 'inplace']:
            raise RuntimeError("ERROR: Invalid confs option:%s" % method)

        # The limits are set so that smallest job will take 5-15 minutes.
        # The limits are set so that largest job will take 20-30 hours.
        # Bigger numbers mean faster calculation time
        min_job_size, max_job_size = None, None

        precision = self['PRECISION'].upper()
        if precision not in ['HTVS', 'SP', 'XP']:
            raise RuntimeError("Invalid docking PRECISION: %s" % precision)

        # Determine the approximate size of a 1 hour job:
        if precision == "HTVS":
            if method == "inplace" or method == "mininplace":
                msg = "ERROR: %s unavailable for %s" % (method, precision)
                raise RuntimeError(msg)
            elif method == "rigid":
                min_job_size = 8000
            elif method == "confgen":
                min_job_size = 4000

        elif precision == "SP":
            if method == "inplace":
                # ~25min (Try to keep file sizes under 10K)
                min_job_size = 10000
            elif method == "mininplace":
                min_job_size = 5000  # 8,000 ~ 1 hour; 5,000 ~ 40min
            elif method == "rigid":
                min_job_size = 2000
            elif method == "confgen":
                min_job_size = 300

        elif precision == "XP":
            if method == "inplace":
                min_job_size = 4000
            elif method == "mininplace":
                min_job_size = 100
            elif method == "rigid":
                min_job_size = 200
            elif method == "confgen":
                min_job_size = 20

        if min_job_size is None:
            # Should never happen
            msg = "ERROR: Unknown precision/method combintation: %s/%s" % (
                precision, method)
            raise RuntimeError(msg)

        max_job_size = min_job_size * 10  # about 10 hours

        input_files = self.getInput(1).getFiles()
        try:
            self.compress_in = self['COMPRESS_IN_FILES']
        except:  # older input file
            self.compress_in = False

        initial_njobs, adjust_njobs = self.getNJobs()
        #                   ^--- whether or not to adjust the number of jobs for optimal job size

        self.info("  Separating ligands into %i subjobs." % initial_njobs)
        if adjust_njobs:
            self.info("    (will adjust if needed)")
        else:
            self.info("    (will not adjust)")

        # Algorithm if adjusting the number of subjobs:
        # 1. Write to first file until min_job_size is reached,
        # 2. Create another file and write to it until minimum is reached
        # 3. Repeat step #2 until NJOBS # of files are created.
        # 4. Add ligands to above files until max_job_size is reached
        # 5. After that, start creating additional files.

        self.subjob_compounds = {}  # Number of compounds in each input job
        self.subjob_inposes = {}  # Number of poses in each input job
        recombined_writers_dict = {}  # keys are filenums
        compound_id = None
        filenum = 0  # First structure will be writtne to filenum #1

        # The compound code of the previous structure:
        current_compound_id = None
        current_compound_id_variant = 0
        number_of_compounds = 0

        st_num = 0  # Processing ligand #

        reader = structure.MultiFileStructureReader(input_files)
        for st in reader:
            if st.property.get("b_glide_receptor"):
                # Skip receptors in PV files
                continue
            st_num += 1

            # Write progress period:
            if st_num % 1000 == 0:
                sys.stdout.write(".")
                sys.stdout.flush()

            if self.uniquefield.upper() == 'NONE':
                compound_id = str(st_num)
                st.property["i_vsw_st_num_backup"] = st_num
            else:
                # Get compound code:
                try:
                    # Ev:96648 & Ev: 104439
                    compound_id = pipeutils.read_unique_field(
                        st, self.uniquefield)
                except KeyError:
                    err = "No field %s in structure #%i (file %s)!" % (
                        self.uniquefield, reader.index_in_current_file,
                        reader.current_filename)
                    raise RuntimeError(err)

            if compound_id != current_compound_id or current_compound_id is None:
                # New compound:
                current_compound_id_variant = 1
                number_of_compounds += 1
                current_compound_id = compound_id
            else:
                # Another variant of the same compound:
                current_compound_id_variant += 1

            # Replacement for GenCodes (Ev:65971)
            if self.gencodes:  # Regenerating titles & variant codes:
                # FIXME take this code out
                st.property["s_pipeline_title_backup"] = st.title
                st.title = str(compound_id)

                # add ligand variant property based on number of poses counted
                # for the current compound.
                s = "%s-%i" % (compound_id, current_compound_id_variant)
                st.property[variantfield] = s

            # If the compound_id (compound code) for this ligand is the same as
            # for the previous ligand, write them to the same subjob file.
            if current_compound_id_variant == 1:
                # This compound is different from a previous compound.
                # (will also happen for the first structure)
                # Consider writing it to a different file.

                # Determine what the output file for next ligand will be:
                if filenum == 0:
                    # Adjusting or not; start with first file:
                    filenum = 1

                elif not adjust_njobs:
                    # Do not adjust njobs, mix the ligands:
                    filenum += 1
                    if filenum > initial_njobs:
                        filenum = 1

                else:  # Adjusting NJOBS (and not the first structure)
                    # Stay in first file if in under-min zone
                    # Go to next EXISTING file if in min-max zone
                    # Create a NEW file if in over-max zone
                    curr_poses = self.subjob_inposes[filenum]
                    if curr_poses >= min_job_size:
                        # min-max or over-max
                        if curr_poses >= max_job_size:
                            # Already wrote maximum number of structures to
                            # this file. Create a new file:
                            filenum += 1
                        else:
                            # Go to the next EXISTING file:
                            filenum += 1
                            if filenum > initial_njobs:
                                filenum = 1
                    else:
                        # Keep writing poses to the first input file
                        pass

                # Either way, another compound was added to <filenum>
                try:
                    self.subjob_compounds[filenum] += 1
                except KeyError:
                    self.subjob_compounds[filenum] = 1

            else:  # Same compound as previous structure
                pass  # Otherwise write to same file as previous structure

            # Write ligand to current file:
            if filenum not in recombined_writers_dict:  # first ligand
                filename = self.genFileName(filenum=filenum,
                                            extension="_in.mae")
                if self.compress_in:
                    filename += 'gz'

                writer = structure.StructureWriter(filename)
                writer.filename = filename
                recombined_writers_dict[filenum] = writer

            recombined_writers_dict[filenum].append(st)

            try:
                self.subjob_inposes[filenum] += 1
            except KeyError:
                self.subjob_inposes[filenum] = 1

        self.input_compound_count = number_of_compounds

        self.info('')  # To have a new line after the progress periods
        self.info("Number of input compounds: %i" % self.input_compound_count)
        self.info("Number of input structures: %i" % st_num)

        if len(recombined_writers_dict) != initial_njobs:
            if adjust_njobs:
                self.info("Number of subjobs was adjusted to %i." %
                          len(recombined_writers_dict))
            else:
                self.info("Number of output subjobs: %i" %
                          len(recombined_writers_dict))

        num_list = []
        for i, numposes in self.subjob_inposes.items():
            num_cmpds = self.subjob_compounds[i]
            # NUM:(COMPOUNDS, STRUCTURES)
            num_list.append("%i:(%i, %i)" % (i, num_cmpds, numposes))

        msg = "\nInput number of (compounds, structures) for each subjob:"
        msg += "\n%s" % ", ".join(num_list)
        self.info(msg)

        # Close file handles:
        recombined_files = []
        for writer in recombined_writers_dict.values():
            recombined_files.append(writer.filename)
            writer.close()

        self.recombined_ligands = recombined_files

        self.info('')
        self.checkFiles(self.recombined_ligands)

[docs]    def setupJobs(self):
        """ Docks the structures in ligfiles to the grid <gridprefix>. """

        self.jobdj = self.getJobDJ()
        self.glide_jobnames = []

        gridfile = self.getInput(2).getPath()

        output_type = self.get('POSE_OUTTYPE', 'pv')

        poses_per_lig = self.get('POSES_PER_LIG')
        if poses_per_lig is None:
            poses_per_lig = 1

        if output_type.lower() in ['pv', 'poseviewer']:
            self.out_pv = True
        elif output_type.lower() in ['lib', 'ligandlib']:
            self.out_pv = False
        else:
            raise RuntimeError("Bad output type: %s" % output_type)

        # Default for this keyword in glide.py is None (Use MMIM default)
        # MMIM default is True.
        self.compress_poses = self['COMPRESS_POSES']
        if self.compress_poses is None:
            # Keyword not specified Ev:87161:
            self.compress_poses = True

        for i, ligfile in enumerate(self.recombined_ligands):
            filenum = i + 1  # starts at 1

            # Just so that we keep all of the output.
            nreport = self.subjob_inposes[filenum] * poses_per_lig

            glide_jobname = self.genFileName(filenum=filenum)
            input_file = self.write_glide_input_file(ligfile, gridfile, nreport,
                                                     glide_jobname)

            cmd = [glide_path, '-RESTART', input_file]

            self.debug("CMD: %s" % cmd)
            self.jobdj.addJob(cmd)
            self.glide_jobnames.append(glide_jobname)

[docs]    def processJobOutputs(self):

        docked_ligand_files = []
        for filename in self.docked_ligands.values():
            if filename is not None:
                # If at least one ligand from this subjob docked:
                docked_ligand_files.append(filename)

        if not docked_ligand_files:
            msg = "Glide subjob(s) did not produce any poses"
            self.error(msg)
            #raise RuntimeError(msg)
            self.setOutput(1, pipeio.Structures([]))
            return

        sys.stdout.flush()

        num_comps_to_keep = self['NUM_TO_KEEP']
        percent_to_keep = self['PERCENT_TO_KEEP']
        if num_comps_to_keep and percent_to_keep:
            raise RuntimeError(
                '"NUM_TO_KEEP" and "PERCENT_TO_KEEP" are mutually exclusive')
        if not num_comps_to_keep and not percent_to_keep:
            percent_to_keep = 100.0

        if num_comps_to_keep:
            if num_comps_to_keep > self.input_compound_count:
                num_comps_to_keep = self.input_compound_count
            msg = "  Keeping %i of %i compounds." % (num_comps_to_keep,
                                                     self.input_compound_count)
            self.info(msg)
            sys.stdout.flush()
        else:
            int_percent = int(percent_to_keep)
            num_comps_to_keep = int(percent_to_keep *
                                    self.input_compound_count / 100)
            if num_comps_to_keep < 1:
                num_comps_to_keep = 1
            if num_comps_to_keep > self.input_compound_count:
                num_comps_to_keep = self.input_compound_count

            if int_percent == percent_to_keep:
                self.info("  Keeping %i" % int_percent + "% of compounds.")
            else:
                self.info("  Keeping %f" % percent_to_keep + "% of compounds.")
            msg = "    Actually keeping %i of %i compounds." % (
                num_comps_to_keep, self.input_compound_count)
            self.info(msg)
            sys.stdout.flush()

        combined_logfile = self.genFileName(extension="_combined.log")
        combined_logfile_fh = open(combined_logfile, 'w')

        best_by_compound = self['BEST_BY_TITLE']
        self.info("  Best by compound: " + str(best_by_compound))
        sys.stdout.flush()

        total_cpu_time = 0  # seconds

        myre = re.compile(r'Total\sCPU\stime\s=\s+(\d+?\.\d+?)')

        self.info("  Calculating CPU time and combining log files...")

        for glide_jobname, outfile in self.docked_ligands.items():
            logfile = glide_jobname + '.log'

            if outfile is None:
                # This subjob did not produce any poses
                continue

            # Merge the *.log file:
            if not os.path.isfile(logfile):
                self.error("  Glide log file is missing: %s" % logfile)
                combined_logfile_fh = None
            elif combined_logfile_fh:  # log file exists and all OK till now:
                self.debug("  Combining %s..." % logfile)
                try:
                    with open(logfile) as fh:
                        for line in fh:
                            match = myre.search(line)
                            if match:
                                total_cpu_time += float(match.group(1))
                            combined_logfile_fh.write(line)
                except IOError:
                    self.error("  Error merging Glide log file")
                    combined_logfile_fh = None

        # Close the file handle if all files merged successfully; if there
        # was a problem, remove the file altogether:
        if combined_logfile_fh:
            combined_logfile_fh.close()
        elif os.path.isfile(combined_logfile):
            fileutils.force_remove(combined_logfile)
            total_cpu_time = None

        if total_cpu_time:
            sec = int(total_cpu_time * 10) / 10.0
            hrs = int(sec // 3600)
            msg = "\nAll Glide subjobs: Total CPU time =      %i seconds ( %i hours)" % (
                sec, hrs)
            self.info(msg)

            total_cpu_time = int(total_cpu_time)
            sec = total_cpu_time % 60
            mins = ((total_cpu_time - sec) // 60) % 60
            hrs = (total_cpu_time - sec - mins * 60) // 3600
            msg = "                   Human readable = %i hours, %i minutes, %i seconds\n" % (
                hrs, mins, sec)
            self.info(msg)
            sys.stdout.flush()
        else:
            self.info(
                "    have not calculated CPU time or generated combined logfile"
            )
            sys.stdout.flush()

        merged_file, num_merged_sts = self.mergeGlideOutputs(
            docked_ligand_files, num_comps_to_keep, best_by_compound)

        self.info('')  # Line break

        if merged_file is None:
            # May happen in SIP mode, if no poses docked. VSW-926
            self.setOutput(1, pipeio.Structures([]))
        else:
            self.setOutput(1, pipeio.Structures([merged_file], num_merged_sts))

        if self.gencodes and self.outputRequested(2):
            self.setOutput(2, pipeio.Structures(self.recombined_ligands))
        self.addOutputFile(combined_logfile)
        self.info("Combined logfile: " + combined_logfile)

        sys.stdout.flush()

[docs]    def write_glide_input_file(self, ligfile, gridfile, nreport, glide_jobname):
        """ Write Glide docking job input file """

        self.checkFile(ligfile)

        precision = self['PRECISION']

        if (precision == "HTVS"):
            mode = 'htvs'
        elif (precision == "SP"):
            mode = 'sp'
        elif (precision == "XP"):
            mode = 'xp'
        else:
            raise RuntimeError("ERROR: Invalid PRECISION value: %s" % precision)

        if self['WRITE_XP_DESC'] and mode == 'htvs':
            msg = "ERROR: Can't write XP descriptors for %s mode." % precision
            raise RuntimeError(msg)

        # Setup keywords to pass to glide.Dock:
        tmp_config = glide.Dock({})
        keywords = {}

        # Pass all non-VSW specific keywords to Glide:
        for item in glide.validation_specs('docking'):
            if not item:
                continue
            key = item.split()[0]
            value = self.get(key)
            if value != tmp_config.default_values[key]:
                # Add only non-default values VSW-888:
                keywords[key] = value

        # Overwrite some keywords:
        keywords['GRIDFILE'] = gridfile
        keywords['JOBNAME'] = glide_jobname
        keywords['LIGANDFILE'] = ligfile
        keywords['PRECISION'] = precision
        keywords['NREPORT'] = nreport

        # Overwrite POSE_OUTPUT keyword:
        if self.out_pv:
            keywords['POSE_OUTTYPE'] = 'poseviewer'
        else:
            keywords['POSE_OUTTYPE'] = 'ligandlib'

        if keywords.get('GSCORE_CUTOFF') == 'None':
            # older versions of vswinput.py wrote "GSCORE_CUTOFF None" to the
            # input file, and "None" is not a valid float (note that this is
            # actually the string "None", not the Python None object).
            del keywords['GSCORE_CUTOFF']

        # Pass lists of HBOND & METAL constraints to $SCHRODINGER/glide:

        constraints = self['USE_CONS']
        # USE_CONS specified (no constraint groups):
        if constraints:
            nrequired_str = self['NREQUIRED_CONS']
            keywords['CONSTRAINT_GROUP:1'] = \
                {'USE_CONS': constraints,
                 'NREQUIRED_CONS': nrequired_str
                 }

        # Pass CONSTRIANT_GROUP & FEATURE keywords to glide.py:
        for name, section in self.items():
            if name.startswith('CONSTRAINT_GROUP') and section:
                keywords[name] = dict(section)
            if name.startswith('FEATURE') and section:
                keywords[name] = dict(section)

        msg = '*******************\nPassing to glide.py: %s\n*******************' % str(
            keywords)
        self.debug(msg)

        # Write the input file:
        input_file = glide.Dock(keywords).writeSimplified()

        # Make sure the file exists:
        self.checkFile(input_file)

        return input_file

[docs]    def mergeGlideOutputs(self, input_files, num_comps_to_keep,
                          best_by_compound):
        """
        Takes in a list of _lib.mae or _pv.mae files, and merges them together
        based on glidescore. Assumes that the files have already been sorted
        with glide_sort (default in Glide).

        :param input_files: List of docked pose files (PV or LIB).
        :type input_files: list of str

        :param num_comps_to_keep: Stop merging when this number of compounds
                have been reached.
        :type num_comps_to_keep: int

        :param best_by_compound: whether to keep only the best scoring pose per
                compound (based on unique ID property).

        :return: Merged file path and number of structures in it (including rec).
                If none of the structures docked (SIP mode), will return (None, 0).
        :rtype: (str, int)
        """

        inputs_file = self.genFileName(extension="_files_to_merge.txt")
        fh = open(inputs_file, 'w')
        assert len(input_files) > 0
        for filename in input_files:
            fh.write(filename + '\n')
        fh.close()
        assert os.path.isfile(inputs_file)

        if self.out_pv:
            out_ext = '_pv.mae'
        else:
            out_ext = '_lib.mae'
        if self.compress_poses:
            out_ext += 'gz'
        merged_file = self.genOutputFileName(1, extension=out_ext)

        if self.uniquefield.upper() == 'NONE':
            id_prop = "i_vsw_st_num_backup"
        else:
            id_prop = self.uniquefield

        cmd = [
            GLIDE_MERGE, '-f', inputs_file, '-m',
            str(num_comps_to_keep), '-id_prop', id_prop, '-o', merged_file
        ]
        if best_by_compound:
            cmd += ['-p', '1']

        # FIXME
        # In case NREPORT keyword was specified for this docking stage:
        # nreport = self.get('NREPORT', 0)
        # if nreport:
        #     cmd += ['-n', str(nreport)]

        self.info("Merging the Glide output files...")
        self.info(subprocess.list2cmdline(cmd))

        gm_log = os.path.abspath('glide_merge.log')
        self.info("For progress, see: %s" % gm_log)

        with open(gm_log, 'w') as fh:
            p = subprocess.Popen(cmd,
                                 stderr=fh,
                                 stdout=fh,
                                 universal_newlines=True)
            ret = p.wait()
        self.info(pipeutils.get_last_20_lines(gm_log))
        if ret != 0:
            self.exit("glide_merge failed; see %s" % gm_log)

        with open(gm_log) as fh:
            stdout = fh.read()
        if "** NO ACCEPTABLE LIGAND POSES WERE FOUND **" in stdout:
            return None, 0
        else:
            self.info("glide_merge completed suffessfully")

        self.checkFile(merged_file)

        # Create the CSV file and count the number of compounds and sts:
        self.info("Reading merged structures...")

        # File that will need to be post-processed to generate the CSV:
        # This file is in format: propname1, value1, propname2, value2, ...
        pre_csv_file = self.genFileName(extension="_tmp.csv")
        csv_props = set()  # Set of props in all structures
        num_kept_structures = 0
        kept_compounds = set()
        with csv_unicode.writer_open(pre_csv_file) as precsvfh:
            precsvwriter = csv.writer(precsvfh)
            for st in pipeutils.get_reader(merged_file, astext=True):
                if st.property.get("b_glide_receptor"):
                    continue

                # Write the properties to the pre-csv file:
                row = []
                for propname, value in st.property.items():
                    row += [propname, value]
                    csv_props.add(propname)
                precsvwriter.writerow(row)

                num_kept_structures += 1
                kept_compounds.add(st.property[id_prop])
        assert num_kept_structures > 0
        num_kept_compounds = len(kept_compounds)
        assert os.path.isfile(pre_csv_file)

        if num_kept_compounds < num_comps_to_keep:
            self.warning("WARNING: Number of Glide output compounds is less "
                         "than the requested number of compounds to keep")
        self.info("   Number of compounds expected to keep: %i" %
                  num_comps_to_keep)
        self.info("   Number of compounds kept: %i" % num_kept_compounds)
        self.info("   Number of structures kept: %i" % num_kept_structures)

        self.generateCsvFile(pre_csv_file, csv_props)
        fileutils.force_remove(pre_csv_file)
        fileutils.force_remove(inputs_file)

        if self.out_pv:
            num_kept_structures += 1

        return merged_file, num_kept_structures

[docs]    def generateCsvFile(self, pre_csv_file, csv_props):
        self.info("Generating csv report file...")
        # Generate the CSV file from the pre-csv file:
        csv_file = self.genOutputFileName(1, extension='.csv')
        with csv_unicode.reader_open(pre_csv_file) as precsvfh, \
                csv_unicode.writer_open(csv_file) as csvfh:
            csvwriter = csv.writer(csvfh)
            csv_props_list = list(csv_props)
            csvwriter.writerow(csv_props_list)
            for predata in csv.reader(precsvfh):
                # Generate a dict from the properties for this ct:
                predict = {}
                key = None
                for item in predata:
                    if key:
                        predict[key] = item
                        key = None
                    else:
                        key = item

                # Convert this dict to a row in the actual CSV file:
                newdata = []
                for prop in csv_props_list:
                    value = predict.get(prop, '')
                    newdata.append(value)
                csvwriter.writerow(newdata)
        self.info('Properties CSV file: %s' % csv_file)

[docs]    def check_subjob_output(self, glide_jobname):
        """
        Will return the output file for the specified subjob. If no valid poses
        were produced, None is returned.
        Raises a descriptive RuntimeError if logfile and/or output file is
        incomplete.
        """

        logfile = glide_jobname + '.log'

        if not os.path.isfile(logfile):
            msg = "Both output and log files are missing for job: %s" % glide_jobname
            raise RuntimeError(msg)

        outfile = self._find_subjob_output(glide_jobname)
        if outfile:
            return outfile

        # If we got here, then the sorted output file was missing for this
        # subjob

        # Parse the log file:
        no_out_line = False
        glide_sort_started = False
        glide_sort_command = None
        glide_sort_succeeded = False

        # Output file is missing but log file is in place:
        # Parse the log file to see if Glide produced any structures:
        for line in pipeutils.BackwardsReader(logfile):
            # Ev:92076 for the "neither output file" test:
            if 'no output lig structures' in line or 'No Ligand Poses were written' in line or 'No report data collected' in line or 'glide_sort command succeeded, but neither output file' in line:
                no_out_line = True
            if 'Glide is executing the glide_sort command' in line:
                glide_sort_started = True
            if 'utilities/glide_sort' in line or r'utilities\glide_sort' in line:
                glide_sort_command = line
            if 'glide_sort command succeeded' in line:
                glide_sort_succeeded = True

        if no_out_line:  # No output poses
            return None

        elif glide_sort_succeeded:
            msg = "Glide_sort succeeded, but could not locate output file"
            self.error(msg)
            raise RuntimeError(msg)

        elif glide_sort_started:
            # Poses were produced and glide_sort was launched

            msg = 'Glide produced structures but output file could not be located'
            self.error(msg)
            if not glide_sort_command:
                msg = "No glide_sort command in %s file" % logfile
                raise RuntimeError(msg)

            if not os.path.isfile(glide_jobname + '_raw.mae') \
                    and not os.path.isfile(glide_jobname + '_raw.maegz') \
                    and not os.path.isfile(glide_jobname + '_raw.mae.gz'):
                msg = "Raw file is missing! Cannot run glide_sort."
                raise RuntimeError(msg)

            # Try to re-run glide_sort:
            self.info("Running glide_sort to sort the output poses...")
            self.info('\nRUNNING CMD: %s' % glide_sort_command)
            os.system(glide_sort_command)

            # Check whether glide_sort produced a valid output file:
            outfile = self._find_subjob_output(glide_jobname)
            if outfile:
                self.info("\nSorted poses file: %s\n" % outfile)
                return outfile

            msg = "glide_sort failed for subjob: %s" % glide_jobname
            raise RuntimeError(msg)

        else:  # Invalid log file; neither line present
            msg = "Glide output file missing and log file is incomplete for subjob: %s" % glide_jobname
            raise RuntimeError(msg)

    def _find_subjob_output(self, glide_jobname):

        if self.out_pv:
            outfile = glide_jobname + '_pv.mae'
        else:
            outfile = glide_jobname + '_lib.mae'

        if os.path.isfile(outfile):
            return outfile
        elif os.path.isfile(outfile + 'gz'):
            return outfile + 'gz'
        elif os.path.isfile(outfile + '.gz'):
            return outfile + '.gz'
        else:
            return None

[docs]    def buildDockedLigandsMap(self):
        """
        Builds up a dict of self.docked_ligands from the job names in
        self.glide_jobnames.

        If self._force_jobs is False, the job fails in the event of subjob
        failure. Otherwise, the errors are logged and the job continues.
        """
        self.docked_ligands = {}

        failed_jobnames = []
        for glide_jobname in self.glide_jobnames:
            try:
                outfile = self.check_subjob_output(glide_jobname)
            except RuntimeError as err:
                # Log file could not be read
                self.error(str(err))
                failed_jobnames.append(glide_jobname)
                continue
            else:
                # Log file read OK
                if outfile is None:
                    msg = 'WARNING: Glide produced no output structures for job: %s' % glide_jobname
                    self.warning(msg)
                    self.docked_ligands[glide_jobname] = None
                else:
                    self.docked_ligands[glide_jobname] = outfile

        if failed_jobnames:
            if self._force_jobs:
                num_failures = len(failed_jobnames)
                num_total = len(self.glide_jobnames)
                for failed_jobname in failed_jobnames:
                    failure_msg = "WARNING: %s failed" % failed_jobname
                    self.warning(failure_msg)
                msg = ("WARNING: %i subjobs failed out of a total of %i. "
                       "Continuing with job because '-force' option is "
                       "selected. " % (num_failures, num_total))
                self.warning(msg)
            else:
                self.dump()
                self.exit("Some jobs failed. Try restarting. Exiting...")

        self.status = "PROCESSING FILES"
        self.dump()

[docs]    def operate(self):
        """ Perform an operation on the input files. """

        if self.status == "NOT STARTED" or \
                self.status == "PREPARING INPUT FILES":

            # Use LIGAND_CONFS (old name for KOCKING_METHOD) if specified:
            # Ev:68426

            # Get DOCKING_METHOD keyword (or LIGAND_CONFS, old name):
            if self['LIGAND_CONFS']:  # Old keyword specified:
                self['DOCKING_METHOD'] = self['LIGAND_CONFS']

            if not self['DOCKING_METHOD']:
                self['DOCKING_METHOD'] = 'confgen'  # Use defualt

            self.status = "PREPARING INPUT FILES"

            self.recombineInputLigands()

            self.status = "SETTING UP JOBS"
            self.dump()

        if self.status == "SETTING UP JOBS":
            self.info("Setting up Glide jobs...")
            sys.stdout.flush()

            self.setupJobs()
            self.status = "RUNNING JOBS"
            # key: jobname; value: output file (Will be None if the job failed
            # to produce valid poses).
            self.docked_ligands = {}
            self.dump()

        if self.status == "RUNNING JOBS":
            # Update JobDJ to correct options (may change if restarting):
            self.setJobDJOptions(self.jobdj)

            # Ev:64286 if restarting, do not restart jobs that have output files:
            # FIXME Consider subjobs that completed without poses

            if self.jobdj.hasStarted():
                for i, job in enumerate(self.jobdj.all_jobs):
                    glide_jobname = self.glide_jobnames[i]

                    self.debug('Looking for output of job: %s' %
                               (glide_jobname))

                    try:
                        outfile = self.check_subjob_output(glide_jobname)
                    except RuntimeError as err:
                        # Log file could not be read
                        self.error(str(err))
                        self.info('  Marking for restart')
                        self.jobdj.markForRestart(job, "no-output")
                    else:
                        # Output file found (or job produced no poses)
                        self.info('Output is okay; marking as completed: %s' %
                                  (glide_jobname))
                        if outfile is None:
                            # Subjob produced no results
                            self.info('  Job produced no poses: %s' % outfile)
                        else:
                            self.info('  Output file: %s' % outfile)
                        job._markFinished('found-output')

            self.info("Running Glide jobs...")
            sys.stdout.flush()

            self.runJobDJ(self.jobdj)

            # ALL JOBS ARE COMPLETE maybe some have failed but not more than
            # max_failures.

            self.buildDockedLigandsMap()

        if self.status == "PROCESSING FILES":
            docked_ligand_files = []
            for filename in self.docked_ligands.values():
                if filename is not None:
                    # If at least one ligand from this subjob docked:
                    docked_ligand_files.append(filename)
            self.checkFiles(docked_ligand_files)
            self.processJobOutputs()
            self.status = "CLEANING UP"
            self.dump()

        if self.status == "CLEANING UP":
            if not self.getCleanupRequested():
                self.status = "COMPLETE"
                return

            self.info("Cleaning up...")

            archive_files = []

            for jobname in self.glide_jobnames:
                # Remove these files:
                for ext in ['_raw.mae', '_raw.mae.gz', '_raw.maegz']:
                    filename = jobname + ext
                    if os.path.isfile(filename):
                        fileutils.force_remove(filename)

                # Remove these files only if the user did not ask for them to
                # be returned (or not generating codes):
                if not self.outputRequested(2) or not self.gencodes:
                    for ext in ['_in.mae', 'in.maegz']:
                        filename = jobname + ext
                        if os.path.isfile(filename):
                            fileutils.force_remove(filename)

                # Archive these files:
                for ext in ['.inp', '.log', '.rept', '.out', '.xpdes']:
                    filename = jobname + ext
                    if os.path.isfile(filename):
                        archive_files.append(filename)

                # Compress these files individually:
                # NOTE: Do NOT compress *maegz files as they are already
                # compressed.
                for ext in ['_pv.mae', '_lib.mae']:
                    filename = jobname + ext
                    if os.path.isfile(filename):
                        self.info('  Compressing %s...' % filename)
                        compress(filename)

            if archive_files:  # There are files to archive:
                # Create an archive for log files:
                tar_file = self.genFileName(extension="_logs.tar.gz")
                log_tar = tarfile.open(tar_file, 'w:gz')

                for filename in archive_files:
                    self.info('  Archiving %s...' % filename)
                    log_tar.add(filename)

                # Close the archive file:
                log_tar.close()

                # Remove the files that were added to the archive:
                for filename in archive_files:
                    fileutils.force_remove(filename)

                # Ev:94264 Add the file to the stage's job record:
                self.addOutputFile(tar_file)

            self.status = "COMPLETE"
            self.dump()

        if self.status == "COMPLETE":
            return

        raise RuntimeError("End of JRStage.operate and self.status = " +
                           self.status)


[docs]class GridgenStage(stage.Stage):

[docs]    def __init__(self, *args, **kwargs):

        specs = """
        RECEP_VSCALE = float(default=None)
        RECEP_CCUT = float(default=None)
        XCENT = float(default=None)
        YCENT = float(default=None)
        ZCENT = float(default=None)
        GRID_CENTER = float_list(default=None)
        INNERBOX = integer_list(default=None)
        OUTERBOX = float_list(default=None)
        WRITEZIP = boolean(default=False) # Write out compressed grids
        PEPTIDE = boolean(default=None) # Create grid optimized for polypeptides
        HBOND_CONSTRAINTS = string_list(default=None) # H-bond constraints
        METAL_CONSTRAINTS = string_list(default=None) # Metal constraints
        POSIT_CONSTRAINTS = string_list(default=None) # Positional constraints
        NOE_CONSTRAINTS = string_list(default=None) # NOE Positional constraints
        """
        stage.Stage.__init__(self, specs=specs, *args, **kwargs)

        self.setMainProduct('glide')

        self.addExpectedInput(1, "structures", True)

        self.addExpectedOutput(1, "grid", True)
        self.addExpectedOutput(2, "text", False)

        self.jobdj = None
        self.model_file = ""
        self.jobname = None
        self.grid_files = []

        self.status = "NOT STARTED"

[docs]    def operate(self):
        """ Perform an operation on the input files. """

        enter_status = self.status

        if self.status == "NOT STARTED" or \
                self.status == "SETTING UP JOBS":

            self.status = "SETTING UP JOBS"
            self.info("Setting up the Gridgen job")

            model_file = self.getInput(1).getFiles()[0]

            if not model_file:
                raise RuntimeError(
                    "Please specify a model file to generate grids from.")

            self.model_file = model_file

            self.setupJobs()

            self.status = "RUNNING JOBS"
            self.grid_file = None
            self.dump()

        if self.status == "RUNNING JOBS":
            # Update JobDJ to correct options (may change if restarting):
            self.setJobDJOptions(self.jobdj)

            if enter_status == "RUNNING JOBS":
                self.info("Trying to restart jobs...")
                sys.stdout.flush()

            self.runJobDJ(self.jobdj)

            if self['WRITEZIP']:
                self.grid_file = self.jobname + '.zip'
                self.checkFile(self.grid_file)
            else:
                self.grid_file = self.jobname + '.grd'
                grid_files = [
                    self.jobname + "_coul2.fld", self.jobname + ".csc",
                    self.jobname + ".grd", self.jobname + "_greedy.save",
                    self.jobname + ".gsc", self.jobname + ".save",
                    self.jobname + ".site", self.jobname + "_vdw.fld"
                ]
                self.checkFiles(grid_files)

            self.info("processing job outputs...")
            sys.stdout.flush()

            time.sleep(1)
            grid = pipeio.Grid(self.grid_file)
            try:
                grid.check()
            except:
                raise RuntimeError(
                    "Grid generation job did not complete successfully!")
            self.info("  Output grid file: %s" % self.grid_file)
            sys.stdout.flush()

            self.setOutput(1, grid)
            self.info("  processing complete.")
            sys.stdout.flush()

            self.status = "COMPLETED"
            self.dump()
            return

[docs]    def setupJobs(self):

        outname = self.getOutputName(1)

        self.jobname = outname

        # FIXME move to using the new GRID_CENTER keyword
        keynames = [
            'WRITEZIP', 'RECEP_VSCALE', 'RECEP_CCUT', 'XCENT', 'YCENT', 'ZCENT',
            'GRID_CENTER', 'INNERBOX', 'OUTERBOX', 'PEPTIDE'
        ]
        keywords = {}
        keywords['JOBNAME'] = self.jobname
        keywords['RECEP_FILE'] = self.model_file

        # Add constraitns keywords:
        keynames += [
            'HBOND_CONSTRAINTS', 'METAL_CONSTRAINTS', 'POSIT_CONSTRAINTS',
            'NOE_CONSTRAINTS'
        ]

        # Setup keywords:
        for key in keynames:
            value = self[key]
            if value is not None:
                keywords[key] = value

        # Write the input file:
        input_file = glide.Gridgen(keywords).writeSimplified()

        self.checkFile(input_file)

        sys.stdout.flush()

        cmd = [glide_path, input_file]

        self.info(
            "Starting glide grid generation...(estimated cpu time 10 minutes)")
        sys.stdout.flush()

        self.jobdj = self.getJobDJ()
        # FIXME self.jobdj.setOptions(verbosity="normal")
        self.jobdj.addJob(cmd)


[docs]class MergeStage(stage.Stage):
    """
    Stage for merging up to 999 Glide output files with 'glide_ensemble_merge'.
    The input files can be either PV or LIB, and have to be sorted by
    GlideScoreglide.  The output file will also be sorted, and the GlideScores
    can be offset by a specified value for each input file.

    The keywords specific to this stage are...

        NREPORT     Save only the top NREPORT poses.  The default is 0, which
                    retains all poses.

        MAXPERLIG   Retain at most MAXPERLIG poses for each ligand
                    (determined by the structure title).  The default is 1.
                    Use 0 to retain all poses.

        UNIQUEFIELD Property to uniquely identify compounds by. Default is title.

        OFFSETS     List of GlideScore offsets (float) to be applied to the
                    input structures. First item in the list is the offset
                    for the first input, second is for second input, etc.
                    If not specified, all are defaulted to 0.0.

        MARKERS     List of string lables for each receptor. Each ligand will
                    be marked by this value depending on which input set it
                    originated from.

    The stage takes up to 100 input structure file sets (PV or LIB) and
    generates a single output pose file.
    """

[docs]    def __init__(self, *args, **kwargs):
        """
        See class docstring.
        """

        specs = """
        NREPORT = integer(default=0) # Default is unlimited (0)
        MAXPERLIG = integer(default=1)
        UNIQUEFIELD = string(default="s_m_title")
        OFFSETS = float_list(default=None)
        MARKERS = string_list(default=None)
        """
        stage.Stage.__init__(self, specs=specs, *args, **kwargs)

        for i in range(1, 1000):
            self.addExpectedInput(i, 'structures', i < 2)

        self.addExpectedOutput(1, "structures", True)

        self._job_outputs = []

[docs]    def operate(self):
        """
        Run the 'glide_ensemble_merge' script on all the input files from all
        sets, optionally applying GlideScore offsets to some sets.  Generates
        a single output pose file.
        """

        # Fill offset_dict with all input files and offsets:
        offset_dict = {}
        marker_dict = {}

        offsets = self['OFFSETS']
        markers = self['MARKERS']
        uniquefield = self['UNIQUEFIELD']

        inputs = {}  # key: INPUT number, value: list of ligand files

        i = 0
        while True:
            i += 1
            inputobj = self.getInput(i)
            if not inputobj:
                break  # No more inputs
            ligfiles = inputobj.getFiles()
            inputs[i] = ligfiles

        if offsets:
            if len(offsets) != len(inputs):
                msg = "Number of OFFSETS (%i) != number of INPUTs (%i)" % \
                    (len(offsets), len(inputs))
                self.error(msg)
                raise RuntimeError(msg)
        else:
            offsets = [0.0 for inp in inputs]

        if markers:
            if len(markers) != len(inputs):
                msg = "Number of MARKERS (%i) != number of INPUTs (%i)" % \
                    (len(markers), len(inputs))
                self.error(msg)
                raise RuntimeError(msg)
        else:
            markers = [str(inp) for inp in inputs]

        for i, ligfiles in inputs.items():
            offset = offsets[i - 1]  # 0-indexed
            marker = markers[i - 1]  # 0-indexed
            self.debug("LIGANDS #%i: %s" % (i, ligfiles))
            self.debug("OFFSET #%i: %f" % (i, offset))
            self.debug("MARKER #%i: %s" % (i, marker))

            foundligs = False
            for ligfile in ligfiles:
                foundligs = True
                offset_dict[ligfile] = offset
                marker_dict[ligfile] = marker
            if not foundligs:
                self.warning("WARNING: No structure files for input pin %i" % i)

        if not offset_dict:
            msg = "ERROR: No structure files were found to merge"
            raise RuntimeError(msg)

        # Ru glide_ensemble_merge on all the input files:
        gem_jobname = self.stagename
        args = ['-j', gem_jobname]

        nr = self['NREPORT']
        # If not specified, use default of 'unlimited'
        if nr:
            args.extend(['-n', str(nr)])

        mpl = self['MAXPERLIG']
        # If not specified, use default of 1.
        if mpl:
            args.extend(['-m', str(mpl)])
        for ligfile, offset in offset_dict.items():
            marker = marker_dict.get(ligfile)
            a = '%s:%f:%s' % (ligfile, offset, marker)
            args.append(a)

        args += ["-unique_field", uniquefield]

        schro = os.environ['SCHRODINGER']
        script = os.path.join(schro, 'utilities', 'glide_ensemble_merge')
        cmd = [script] + args

        # Run with the -epv option, which adds special markers pairing
        # receptors and poses and merges them into a single _epv.maegz file
        # See VSW-860
        cmd.append("-epv")

        self.info("Running glide_ensemble_merge command:")
        self.info("%s" % cmd)
        self.info("...")

        gemlog = 'glide_ensemble_merge.log'
        verbosity = self.getVerbosity()
        if verbosity == 'quiet':
            fh = open(gemlog, 'w')
            subprocess.call(cmd, stdout=fh)
            fh.close()
        else:
            subprocess.call(cmd)

        self.info("     complete.")

        # Rename glide_ensemble_merge outputs:
        merged_file = gem_jobname + '_epv.maegz'

        msg = "glide_ensemble_merge did not produce an output:"
        self.checkFile(merged_file, msg)

        renamed_outfile = self.genOutputFileName(1, extension="_epv.maegz")
        shutil.move(merged_file, renamed_outfile)
        self.checkFile(renamed_outfile, "Failed to move file to:")

        self.debug("Merged poses file: " + renamed_outfile)

        if os.path.isfile(gemlog):
            fileutils.force_remove(gemlog)

        self.setOutput(1, pipeio.Structures([renamed_outfile]))


[docs]class GlideSortStage(stage.Stage):
    """
    Stage for sorting Glide _pv/lib.mae files.  Currently, a sorted file is
    produced for each input file; that is, the input files are merged and
    sorted together.

    The keywords specific to this stage are...

        BLOCK_SORT_BY_TITLE       Whether to block-sort the poses by title.  If
                                  True (the default), all poses with the same
                                  title are grouped and sorted by the
                                  INTRATITLE_SORT_KEYS keys.  The blocks
                                  themselves are sorted by applying the
                                  SORT_KEYS to just the top pose in each block.

        INTRATITLE_SORT_KEYS      The sort key(s) within a title block.  The
                                  default is 'r_i_glide_emodel' (appropriate
                                  for Glide HTVS/SP results).  Multiple sort
                                  keys must be whitespace-separated.

        SORT_KEYS                 The key(s) for sorting the pose file.  If
                                  BLOCK_SORT_BY_TITLE is True, the blocks are
                                  sorted by the values of their top pose's
                                  SORT_KEYS properties.  Multiple sort keys
                                  must be whitespace-separated.

        OUTPUT_POSES_PER_TITLE    The number of top poses per title to keep.
                                  Applies only if BLOCK_SORT_BY_TITLE is true.
                                  The default is 0, which means retain all
                                  poses.

        INPUT_TYPE                Specify whether the receptor is ('pv') or is
                                  not ('lib') in the file.  By default,
                                  the type will be determined from
                                  the file name (i.e., _pv/lib.mae).
    """

[docs]    def __init__(self, *args, **kwargs):
        """
        See class docstring.
        """

        specs = """
        BLOCK_SORT_BY_TITLE = boolean(default=True)
        INTRATITLE_SORT_KEYS = string(default="r_i_glide_emodel")
        SORT_KEYS = string(default="r_i_docking_score")
        OUTPUT_POSES_PER_TITLE = integer(default=0)
        INPUT_TYPE = string(default=None)
        """
        stage.Stage.__init__(self, specs=specs, *args, **kwargs)

        self.addExpectedInput(1, "structures", True)
        self.addExpectedOutput(1, "structures", True)

[docs]    def operate(self):
        """
        Sort each input pose file using the application.glide.glideanalysis
        module.  Generates a sorted output pose file for each input pose file.
        """
        ligfiles = self.getInput(1).getFiles()
        outfiles = []
        for index, ligfile in enumerate(ligfiles):
            outfile = self.genOutputFileName(1,
                                             filenum=index + 1,
                                             extension=".mae")
            outfiles.append(outfile)
            # NOTE: the PoseLibrary class is not yet maegz-aware
            pl = glideanalysis.PoseLibrary(
                file_name=ligfile,
                file_type=self['INPUT_TYPE'],
                sort_property_1=self['SORT_KEYS'],
                sort_property_2=self['INTRATITLE_SORT_KEYS'],
                block_sort=self['BLOCK_SORT_BY_TITLE'])
            if (self['OUTPUT_POSES_PER_TITLE'] < 1 or
                    not self['BLOCK_SORT_BY_TITLE']):
                # Write all poses
                indices = []
                for item in pl.title_tuples:
                    indices.extend(item[:])
                pl.write(outfile, indexes=indices)
            else:
                pl.writeTopPoses(
                    output_lib_name=outfile,
                    max_pose_per_title=self['OUTPUT_POSES_PER_TITLE'])
        self.setOutput(1, pipeio.Structures(outfiles))


# EOF