# python module
"""
Glide Docking & Gridgen stages.
DockingStage - stage for running Glide docking jobs
GridgenStage - stage for generating Glide grids (not functional)
HBOND_CONSTRAINTS: List of constraints in format: "label <atomnum>"
METAL_CONSTRAINTS: List of constraints in format: "label <atomnum>"
POSIT_CONSTRAINTS: List of constraints in format: "label <x> <y> <z> <radius>"
NOE_CONSTRAINTS: List of constraints in format: "label <x> <y> <z> <min> <max>"
Copyright Schrodinger, LLC. All rights reserved.
"""
import csv
import gzip # For compress()
import os
import re
import shutil
import sys
import tarfile # For cleanup
import time
import schrodinger.utils.subprocess as subprocess
from schrodinger import structure
from schrodinger.application.glide import glide
from schrodinger.application.glide import glideanalysis
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import pipeutils
from schrodinger.pipeline import stage
from schrodinger.utils import csv_unicode
from schrodinger.utils import fileutils
glide_path = 'glide'
GLIDE_MERGE = os.path.join(os.environ['SCHRODINGER'], 'utilities',
'glide_merge')
[docs]def compress(origFile):
""" Compresses the specified file and removes the original """
if origFile.endswith('.mae'):
compFile = origFile + 'gz' # To produce .maegz
else:
compFile = origFile + '.gz'
orig_handle = open(origFile, 'r')
gzip_handle = gzip.GzipFile(compFile, mode='w')
for line in orig_handle:
gzip_handle.write(line)
orig_handle.close()
gzip_handle.close()
fileutils.force_remove(origFile)
return compFile
[docs]class DockingStage(stage.Stage):
""" VSW stage for performing Glide docking """
[docs] def __init__(self, *args, **kwargs):
# For these specs, None represents MMIM default
# Use the specification that $SCHRODINGER/glide uses, except append
# a few VSW-specific keywords:
docking_specs = "\n".join(glide.validation_specs('docking'))
specs = docking_specs + """
RECOMBINE = boolean(default=True) # Whether to recombine ligand files
UNIQUEFIELD = string(default="s_m_title")
OUTVARIANTFIELD = string(default=None) # Field for generated variant codes
NUM_TO_KEEP = integer(default=0) # Keep at most this many output ligands
PERCENT_TO_KEEP = float(default=0.0) # Keep only this percentage of the (input) ligands
COMPRESS_IN_FILES = boolean(default=True) # Whether to compress subjob *_in files
BEST_BY_TITLE = boolean(default=False) # Whether to keep only the best scoring pose per compound at the end.
LVDW = float(default=0.8) # Old name for LIG_VSCALE. glide.py will recognize it.
LIGCCUT = float(default=0.15) # Old name for LIG_CCUT. glide.py will recognize it.
LIGAND_CONFS = string(default=None) # Old name for DOCKING_METHOD. glide.py will recognize it.
DIELECTRIC = float(default=None) # Old name for GLIDE_DIELCO (ignored)
# Metal and H-bond constraint options:
[CONSTRAINT_GROUP:1]
USE_CONS = string()
NREQUIRED_CONS = string()
[CONSTRAINT_GROUP:2]
USE_CONS = string()
NREQUIRED_CONS = string()
[CONSTRAINT_GROUP:3]
USE_CONS = string()
NREQUIRED_CONS = string()
[CONSTRAINT_GROUP:4]
USE_CONS = string()
NREQUIRED_CONS = string()
[FEATURE:1]
__many__ = string()
[FEATURE:2]
__many__ = string()
[FEATURE:3]
__many__ = string()
[FEATURE:4]
__many__ = string()
[FEATURE:5]
__many__ = string()
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
self.setMainProduct('glide')
self.addExpectedInput(1, "structures", True)
self.addExpectedInput(2, "grid", True)
self.addExpectedOutput(1, "structures", True)
# Input ligands after GENCODES
self.addExpectedOutput(2, "structures", False)
# BEST_BY_TITLE - GUI match:
# 'Only best scoring pose':'YES'
# 'All good scoring poses':'NO'
# 'All states':'NO'
self.recombined_ligands = []
self.docked_ligands = {} # Key: jobname, value: output ligand file
self.jobdj = None
# Whether to continue with jobs in the event of subjob failure. The
# default is True and can be overridden from the command line using the
# noforce option. See VSW-886
self._force_jobs = True
self.status = "NOT STARTED"
self.input_compound_count = None
self.subjob_inposes = {}
[docs] def setupJobs(self):
""" Docks the structures in ligfiles to the grid <gridprefix>. """
self.jobdj = self.getJobDJ()
self.glide_jobnames = []
gridfile = self.getInput(2).getPath()
output_type = self.get('POSE_OUTTYPE', 'pv')
poses_per_lig = self.get('POSES_PER_LIG')
if poses_per_lig is None:
poses_per_lig = 1
if output_type.lower() in ['pv', 'poseviewer']:
self.out_pv = True
elif output_type.lower() in ['lib', 'ligandlib']:
self.out_pv = False
else:
raise RuntimeError("Bad output type: %s" % output_type)
# Default for this keyword in glide.py is None (Use MMIM default)
# MMIM default is True.
self.compress_poses = self['COMPRESS_POSES']
if self.compress_poses is None:
# Keyword not specified Ev:87161:
self.compress_poses = True
for i, ligfile in enumerate(self.recombined_ligands):
filenum = i + 1 # starts at 1
# Just so that we keep all of the output.
nreport = self.subjob_inposes[filenum] * poses_per_lig
glide_jobname = self.genFileName(filenum=filenum)
input_file = self.write_glide_input_file(ligfile, gridfile, nreport,
glide_jobname)
cmd = [glide_path, '-RESTART', input_file]
self.debug("CMD: %s" % cmd)
self.jobdj.addJob(cmd)
self.glide_jobnames.append(glide_jobname)
[docs] def processJobOutputs(self):
docked_ligand_files = []
for filename in self.docked_ligands.values():
if filename is not None:
# If at least one ligand from this subjob docked:
docked_ligand_files.append(filename)
if not docked_ligand_files:
msg = "Glide subjob(s) did not produce any poses"
self.error(msg)
#raise RuntimeError(msg)
self.setOutput(1, pipeio.Structures([]))
return
sys.stdout.flush()
num_comps_to_keep = self['NUM_TO_KEEP']
percent_to_keep = self['PERCENT_TO_KEEP']
if num_comps_to_keep and percent_to_keep:
raise RuntimeError(
'"NUM_TO_KEEP" and "PERCENT_TO_KEEP" are mutually exclusive')
if not num_comps_to_keep and not percent_to_keep:
percent_to_keep = 100.0
if num_comps_to_keep:
if num_comps_to_keep > self.input_compound_count:
num_comps_to_keep = self.input_compound_count
msg = " Keeping %i of %i compounds." % (num_comps_to_keep,
self.input_compound_count)
self.info(msg)
sys.stdout.flush()
else:
int_percent = int(percent_to_keep)
num_comps_to_keep = int(percent_to_keep *
self.input_compound_count / 100)
if num_comps_to_keep < 1:
num_comps_to_keep = 1
if num_comps_to_keep > self.input_compound_count:
num_comps_to_keep = self.input_compound_count
if int_percent == percent_to_keep:
self.info(" Keeping %i" % int_percent + "% of compounds.")
else:
self.info(" Keeping %f" % percent_to_keep + "% of compounds.")
msg = " Actually keeping %i of %i compounds." % (
num_comps_to_keep, self.input_compound_count)
self.info(msg)
sys.stdout.flush()
combined_logfile = self.genFileName(extension="_combined.log")
combined_logfile_fh = open(combined_logfile, 'w')
best_by_compound = self['BEST_BY_TITLE']
self.info(" Best by compound: " + str(best_by_compound))
sys.stdout.flush()
total_cpu_time = 0 # seconds
myre = re.compile(r'Total\sCPU\stime\s=\s+(\d+?\.\d+?)')
self.info(" Calculating CPU time and combining log files...")
for glide_jobname, outfile in self.docked_ligands.items():
logfile = glide_jobname + '.log'
if outfile is None:
# This subjob did not produce any poses
continue
# Merge the *.log file:
if not os.path.isfile(logfile):
self.error(" Glide log file is missing: %s" % logfile)
combined_logfile_fh = None
elif combined_logfile_fh: # log file exists and all OK till now:
self.debug(" Combining %s..." % logfile)
try:
with open(logfile) as fh:
for line in fh:
match = myre.search(line)
if match:
total_cpu_time += float(match.group(1))
combined_logfile_fh.write(line)
except IOError:
self.error(" Error merging Glide log file")
combined_logfile_fh = None
# Close the file handle if all files merged successfully; if there
# was a problem, remove the file altogether:
if combined_logfile_fh:
combined_logfile_fh.close()
elif os.path.isfile(combined_logfile):
fileutils.force_remove(combined_logfile)
total_cpu_time = None
if total_cpu_time:
sec = int(total_cpu_time * 10) / 10.0
hrs = int(sec // 3600)
msg = "\nAll Glide subjobs: Total CPU time = %i seconds ( %i hours)" % (
sec, hrs)
self.info(msg)
total_cpu_time = int(total_cpu_time)
sec = total_cpu_time % 60
mins = ((total_cpu_time - sec) // 60) % 60
hrs = (total_cpu_time - sec - mins * 60) // 3600
msg = " Human readable = %i hours, %i minutes, %i seconds\n" % (
hrs, mins, sec)
self.info(msg)
sys.stdout.flush()
else:
self.info(
" have not calculated CPU time or generated combined logfile"
)
sys.stdout.flush()
merged_file, num_merged_sts = self.mergeGlideOutputs(
docked_ligand_files, num_comps_to_keep, best_by_compound)
self.info('') # Line break
if merged_file is None:
# May happen in SIP mode, if no poses docked. VSW-926
self.setOutput(1, pipeio.Structures([]))
else:
self.setOutput(1, pipeio.Structures([merged_file], num_merged_sts))
if self.gencodes and self.outputRequested(2):
self.setOutput(2, pipeio.Structures(self.recombined_ligands))
self.addOutputFile(combined_logfile)
self.info("Combined logfile: " + combined_logfile)
sys.stdout.flush()
[docs] def mergeGlideOutputs(self, input_files, num_comps_to_keep,
best_by_compound):
"""
Takes in a list of _lib.mae or _pv.mae files, and merges them together
based on glidescore. Assumes that the files have already been sorted
with glide_sort (default in Glide).
:param input_files: List of docked pose files (PV or LIB).
:type input_files: list of str
:param num_comps_to_keep: Stop merging when this number of compounds
have been reached.
:type num_comps_to_keep: int
:param best_by_compound: whether to keep only the best scoring pose per
compound (based on unique ID property).
:return: Merged file path and number of structures in it (including rec).
If none of the structures docked (SIP mode), will return (None, 0).
:rtype: (str, int)
"""
inputs_file = self.genFileName(extension="_files_to_merge.txt")
fh = open(inputs_file, 'w')
assert len(input_files) > 0
for filename in input_files:
fh.write(filename + '\n')
fh.close()
assert os.path.isfile(inputs_file)
if self.out_pv:
out_ext = '_pv.mae'
else:
out_ext = '_lib.mae'
if self.compress_poses:
out_ext += 'gz'
merged_file = self.genOutputFileName(1, extension=out_ext)
if self.uniquefield.upper() == 'NONE':
id_prop = "i_vsw_st_num_backup"
else:
id_prop = self.uniquefield
cmd = [
GLIDE_MERGE, '-f', inputs_file, '-m',
str(num_comps_to_keep), '-id_prop', id_prop, '-o', merged_file
]
if best_by_compound:
cmd += ['-p', '1']
# FIXME
# In case NREPORT keyword was specified for this docking stage:
# nreport = self.get('NREPORT', 0)
# if nreport:
# cmd += ['-n', str(nreport)]
self.info("Merging the Glide output files...")
self.info(subprocess.list2cmdline(cmd))
gm_log = os.path.abspath('glide_merge.log')
self.info("For progress, see: %s" % gm_log)
with open(gm_log, 'w') as fh:
p = subprocess.Popen(cmd,
stderr=fh,
stdout=fh,
universal_newlines=True)
ret = p.wait()
self.info(pipeutils.get_last_20_lines(gm_log))
if ret != 0:
self.exit("glide_merge failed; see %s" % gm_log)
with open(gm_log) as fh:
stdout = fh.read()
if "** NO ACCEPTABLE LIGAND POSES WERE FOUND **" in stdout:
return None, 0
else:
self.info("glide_merge completed suffessfully")
self.checkFile(merged_file)
# Create the CSV file and count the number of compounds and sts:
self.info("Reading merged structures...")
# File that will need to be post-processed to generate the CSV:
# This file is in format: propname1, value1, propname2, value2, ...
pre_csv_file = self.genFileName(extension="_tmp.csv")
csv_props = set() # Set of props in all structures
num_kept_structures = 0
kept_compounds = set()
with csv_unicode.writer_open(pre_csv_file) as precsvfh:
precsvwriter = csv.writer(precsvfh)
for st in pipeutils.get_reader(merged_file, astext=True):
if st.property.get("b_glide_receptor"):
continue
# Write the properties to the pre-csv file:
row = []
for propname, value in st.property.items():
row += [propname, value]
csv_props.add(propname)
precsvwriter.writerow(row)
num_kept_structures += 1
kept_compounds.add(st.property[id_prop])
assert num_kept_structures > 0
num_kept_compounds = len(kept_compounds)
assert os.path.isfile(pre_csv_file)
if num_kept_compounds < num_comps_to_keep:
self.warning("WARNING: Number of Glide output compounds is less "
"than the requested number of compounds to keep")
self.info(" Number of compounds expected to keep: %i" %
num_comps_to_keep)
self.info(" Number of compounds kept: %i" % num_kept_compounds)
self.info(" Number of structures kept: %i" % num_kept_structures)
self.generateCsvFile(pre_csv_file, csv_props)
fileutils.force_remove(pre_csv_file)
fileutils.force_remove(inputs_file)
if self.out_pv:
num_kept_structures += 1
return merged_file, num_kept_structures
[docs] def generateCsvFile(self, pre_csv_file, csv_props):
self.info("Generating csv report file...")
# Generate the CSV file from the pre-csv file:
csv_file = self.genOutputFileName(1, extension='.csv')
with csv_unicode.reader_open(pre_csv_file) as precsvfh, \
csv_unicode.writer_open(csv_file) as csvfh:
csvwriter = csv.writer(csvfh)
csv_props_list = list(csv_props)
csvwriter.writerow(csv_props_list)
for predata in csv.reader(precsvfh):
# Generate a dict from the properties for this ct:
predict = {}
key = None
for item in predata:
if key:
predict[key] = item
key = None
else:
key = item
# Convert this dict to a row in the actual CSV file:
newdata = []
for prop in csv_props_list:
value = predict.get(prop, '')
newdata.append(value)
csvwriter.writerow(newdata)
self.info('Properties CSV file: %s' % csv_file)
[docs] def check_subjob_output(self, glide_jobname):
"""
Will return the output file for the specified subjob. If no valid poses
were produced, None is returned.
Raises a descriptive RuntimeError if logfile and/or output file is
incomplete.
"""
logfile = glide_jobname + '.log'
if not os.path.isfile(logfile):
msg = "Both output and log files are missing for job: %s" % glide_jobname
raise RuntimeError(msg)
outfile = self._find_subjob_output(glide_jobname)
if outfile:
return outfile
# If we got here, then the sorted output file was missing for this
# subjob
# Parse the log file:
no_out_line = False
glide_sort_started = False
glide_sort_command = None
glide_sort_succeeded = False
# Output file is missing but log file is in place:
# Parse the log file to see if Glide produced any structures:
for line in pipeutils.BackwardsReader(logfile):
# Ev:92076 for the "neither output file" test:
if 'no output lig structures' in line or 'No Ligand Poses were written' in line or 'No report data collected' in line or 'glide_sort command succeeded, but neither output file' in line:
no_out_line = True
if 'Glide is executing the glide_sort command' in line:
glide_sort_started = True
if 'utilities/glide_sort' in line or r'utilities\glide_sort' in line:
glide_sort_command = line
if 'glide_sort command succeeded' in line:
glide_sort_succeeded = True
if no_out_line: # No output poses
return None
elif glide_sort_succeeded:
msg = "Glide_sort succeeded, but could not locate output file"
self.error(msg)
raise RuntimeError(msg)
elif glide_sort_started:
# Poses were produced and glide_sort was launched
msg = 'Glide produced structures but output file could not be located'
self.error(msg)
if not glide_sort_command:
msg = "No glide_sort command in %s file" % logfile
raise RuntimeError(msg)
if not os.path.isfile(glide_jobname + '_raw.mae') \
and not os.path.isfile(glide_jobname + '_raw.maegz') \
and not os.path.isfile(glide_jobname + '_raw.mae.gz'):
msg = "Raw file is missing! Cannot run glide_sort."
raise RuntimeError(msg)
# Try to re-run glide_sort:
self.info("Running glide_sort to sort the output poses...")
self.info('\nRUNNING CMD: %s' % glide_sort_command)
os.system(glide_sort_command)
# Check whether glide_sort produced a valid output file:
outfile = self._find_subjob_output(glide_jobname)
if outfile:
self.info("\nSorted poses file: %s\n" % outfile)
return outfile
msg = "glide_sort failed for subjob: %s" % glide_jobname
raise RuntimeError(msg)
else: # Invalid log file; neither line present
msg = "Glide output file missing and log file is incomplete for subjob: %s" % glide_jobname
raise RuntimeError(msg)
def _find_subjob_output(self, glide_jobname):
if self.out_pv:
outfile = glide_jobname + '_pv.mae'
else:
outfile = glide_jobname + '_lib.mae'
if os.path.isfile(outfile):
return outfile
elif os.path.isfile(outfile + 'gz'):
return outfile + 'gz'
elif os.path.isfile(outfile + '.gz'):
return outfile + '.gz'
else:
return None
[docs] def buildDockedLigandsMap(self):
"""
Builds up a dict of self.docked_ligands from the job names in
self.glide_jobnames.
If self._force_jobs is False, the job fails in the event of subjob
failure. Otherwise, the errors are logged and the job continues.
"""
self.docked_ligands = {}
failed_jobnames = []
for glide_jobname in self.glide_jobnames:
try:
outfile = self.check_subjob_output(glide_jobname)
except RuntimeError as err:
# Log file could not be read
self.error(str(err))
failed_jobnames.append(glide_jobname)
continue
else:
# Log file read OK
if outfile is None:
msg = 'WARNING: Glide produced no output structures for job: %s' % glide_jobname
self.warning(msg)
self.docked_ligands[glide_jobname] = None
else:
self.docked_ligands[glide_jobname] = outfile
if failed_jobnames:
if self._force_jobs:
num_failures = len(failed_jobnames)
num_total = len(self.glide_jobnames)
for failed_jobname in failed_jobnames:
failure_msg = "WARNING: %s failed" % failed_jobname
self.warning(failure_msg)
msg = ("WARNING: %i subjobs failed out of a total of %i. "
"Continuing with job because '-force' option is "
"selected. " % (num_failures, num_total))
self.warning(msg)
else:
self.dump()
self.exit("Some jobs failed. Try restarting. Exiting...")
self.status = "PROCESSING FILES"
self.dump()
[docs] def operate(self):
""" Perform an operation on the input files. """
if self.status == "NOT STARTED" or \
self.status == "PREPARING INPUT FILES":
# Use LIGAND_CONFS (old name for KOCKING_METHOD) if specified:
# Ev:68426
# Get DOCKING_METHOD keyword (or LIGAND_CONFS, old name):
if self['LIGAND_CONFS']: # Old keyword specified:
self['DOCKING_METHOD'] = self['LIGAND_CONFS']
if not self['DOCKING_METHOD']:
self['DOCKING_METHOD'] = 'confgen' # Use defualt
self.status = "PREPARING INPUT FILES"
self.recombineInputLigands()
self.status = "SETTING UP JOBS"
self.dump()
if self.status == "SETTING UP JOBS":
self.info("Setting up Glide jobs...")
sys.stdout.flush()
self.setupJobs()
self.status = "RUNNING JOBS"
# key: jobname; value: output file (Will be None if the job failed
# to produce valid poses).
self.docked_ligands = {}
self.dump()
if self.status == "RUNNING JOBS":
# Update JobDJ to correct options (may change if restarting):
self.setJobDJOptions(self.jobdj)
# Ev:64286 if restarting, do not restart jobs that have output files:
# FIXME Consider subjobs that completed without poses
if self.jobdj.hasStarted():
for i, job in enumerate(self.jobdj.all_jobs):
glide_jobname = self.glide_jobnames[i]
self.debug('Looking for output of job: %s' %
(glide_jobname))
try:
outfile = self.check_subjob_output(glide_jobname)
except RuntimeError as err:
# Log file could not be read
self.error(str(err))
self.info(' Marking for restart')
self.jobdj.markForRestart(job, "no-output")
else:
# Output file found (or job produced no poses)
self.info('Output is okay; marking as completed: %s' %
(glide_jobname))
if outfile is None:
# Subjob produced no results
self.info(' Job produced no poses: %s' % outfile)
else:
self.info(' Output file: %s' % outfile)
job._markFinished('found-output')
self.info("Running Glide jobs...")
sys.stdout.flush()
self.runJobDJ(self.jobdj)
# ALL JOBS ARE COMPLETE maybe some have failed but not more than
# max_failures.
self.buildDockedLigandsMap()
if self.status == "PROCESSING FILES":
docked_ligand_files = []
for filename in self.docked_ligands.values():
if filename is not None:
# If at least one ligand from this subjob docked:
docked_ligand_files.append(filename)
self.checkFiles(docked_ligand_files)
self.processJobOutputs()
self.status = "CLEANING UP"
self.dump()
if self.status == "CLEANING UP":
if not self.getCleanupRequested():
self.status = "COMPLETE"
return
self.info("Cleaning up...")
archive_files = []
for jobname in self.glide_jobnames:
# Remove these files:
for ext in ['_raw.mae', '_raw.mae.gz', '_raw.maegz']:
filename = jobname + ext
if os.path.isfile(filename):
fileutils.force_remove(filename)
# Remove these files only if the user did not ask for them to
# be returned (or not generating codes):
if not self.outputRequested(2) or not self.gencodes:
for ext in ['_in.mae', 'in.maegz']:
filename = jobname + ext
if os.path.isfile(filename):
fileutils.force_remove(filename)
# Archive these files:
for ext in ['.inp', '.log', '.rept', '.out', '.xpdes']:
filename = jobname + ext
if os.path.isfile(filename):
archive_files.append(filename)
# Compress these files individually:
# NOTE: Do NOT compress *maegz files as they are already
# compressed.
for ext in ['_pv.mae', '_lib.mae']:
filename = jobname + ext
if os.path.isfile(filename):
self.info(' Compressing %s...' % filename)
compress(filename)
if archive_files: # There are files to archive:
# Create an archive for log files:
tar_file = self.genFileName(extension="_logs.tar.gz")
log_tar = tarfile.open(tar_file, 'w:gz')
for filename in archive_files:
self.info(' Archiving %s...' % filename)
log_tar.add(filename)
# Close the archive file:
log_tar.close()
# Remove the files that were added to the archive:
for filename in archive_files:
fileutils.force_remove(filename)
# Ev:94264 Add the file to the stage's job record:
self.addOutputFile(tar_file)
self.status = "COMPLETE"
self.dump()
if self.status == "COMPLETE":
return
raise RuntimeError("End of JRStage.operate and self.status = " +
self.status)
[docs]class GridgenStage(stage.Stage):
[docs] def __init__(self, *args, **kwargs):
specs = """
RECEP_VSCALE = float(default=None)
RECEP_CCUT = float(default=None)
XCENT = float(default=None)
YCENT = float(default=None)
ZCENT = float(default=None)
GRID_CENTER = float_list(default=None)
INNERBOX = integer_list(default=None)
OUTERBOX = float_list(default=None)
WRITEZIP = boolean(default=False) # Write out compressed grids
PEPTIDE = boolean(default=None) # Create grid optimized for polypeptides
HBOND_CONSTRAINTS = string_list(default=None) # H-bond constraints
METAL_CONSTRAINTS = string_list(default=None) # Metal constraints
POSIT_CONSTRAINTS = string_list(default=None) # Positional constraints
NOE_CONSTRAINTS = string_list(default=None) # NOE Positional constraints
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
self.setMainProduct('glide')
self.addExpectedInput(1, "structures", True)
self.addExpectedOutput(1, "grid", True)
self.addExpectedOutput(2, "text", False)
self.jobdj = None
self.model_file = ""
self.jobname = None
self.grid_files = []
self.status = "NOT STARTED"
[docs] def operate(self):
""" Perform an operation on the input files. """
enter_status = self.status
if self.status == "NOT STARTED" or \
self.status == "SETTING UP JOBS":
self.status = "SETTING UP JOBS"
self.info("Setting up the Gridgen job")
model_file = self.getInput(1).getFiles()[0]
if not model_file:
raise RuntimeError(
"Please specify a model file to generate grids from.")
self.model_file = model_file
self.setupJobs()
self.status = "RUNNING JOBS"
self.grid_file = None
self.dump()
if self.status == "RUNNING JOBS":
# Update JobDJ to correct options (may change if restarting):
self.setJobDJOptions(self.jobdj)
if enter_status == "RUNNING JOBS":
self.info("Trying to restart jobs...")
sys.stdout.flush()
self.runJobDJ(self.jobdj)
if self['WRITEZIP']:
self.grid_file = self.jobname + '.zip'
self.checkFile(self.grid_file)
else:
self.grid_file = self.jobname + '.grd'
grid_files = [
self.jobname + "_coul2.fld", self.jobname + ".csc",
self.jobname + ".grd", self.jobname + "_greedy.save",
self.jobname + ".gsc", self.jobname + ".save",
self.jobname + ".site", self.jobname + "_vdw.fld"
]
self.checkFiles(grid_files)
self.info("processing job outputs...")
sys.stdout.flush()
time.sleep(1)
grid = pipeio.Grid(self.grid_file)
try:
grid.check()
except:
raise RuntimeError(
"Grid generation job did not complete successfully!")
self.info(" Output grid file: %s" % self.grid_file)
sys.stdout.flush()
self.setOutput(1, grid)
self.info(" processing complete.")
sys.stdout.flush()
self.status = "COMPLETED"
self.dump()
return
[docs] def setupJobs(self):
outname = self.getOutputName(1)
self.jobname = outname
# FIXME move to using the new GRID_CENTER keyword
keynames = [
'WRITEZIP', 'RECEP_VSCALE', 'RECEP_CCUT', 'XCENT', 'YCENT', 'ZCENT',
'GRID_CENTER', 'INNERBOX', 'OUTERBOX', 'PEPTIDE'
]
keywords = {}
keywords['JOBNAME'] = self.jobname
keywords['RECEP_FILE'] = self.model_file
# Add constraitns keywords:
keynames += [
'HBOND_CONSTRAINTS', 'METAL_CONSTRAINTS', 'POSIT_CONSTRAINTS',
'NOE_CONSTRAINTS'
]
# Setup keywords:
for key in keynames:
value = self[key]
if value is not None:
keywords[key] = value
# Write the input file:
input_file = glide.Gridgen(keywords).writeSimplified()
self.checkFile(input_file)
sys.stdout.flush()
cmd = [glide_path, input_file]
self.info(
"Starting glide grid generation...(estimated cpu time 10 minutes)")
sys.stdout.flush()
self.jobdj = self.getJobDJ()
# FIXME self.jobdj.setOptions(verbosity="normal")
self.jobdj.addJob(cmd)
[docs]class MergeStage(stage.Stage):
"""
Stage for merging up to 999 Glide output files with 'glide_ensemble_merge'.
The input files can be either PV or LIB, and have to be sorted by
GlideScoreglide. The output file will also be sorted, and the GlideScores
can be offset by a specified value for each input file.
The keywords specific to this stage are...
NREPORT Save only the top NREPORT poses. The default is 0, which
retains all poses.
MAXPERLIG Retain at most MAXPERLIG poses for each ligand
(determined by the structure title). The default is 1.
Use 0 to retain all poses.
UNIQUEFIELD Property to uniquely identify compounds by. Default is title.
OFFSETS List of GlideScore offsets (float) to be applied to the
input structures. First item in the list is the offset
for the first input, second is for second input, etc.
If not specified, all are defaulted to 0.0.
MARKERS List of string lables for each receptor. Each ligand will
be marked by this value depending on which input set it
originated from.
The stage takes up to 100 input structure file sets (PV or LIB) and
generates a single output pose file.
"""
[docs] def __init__(self, *args, **kwargs):
"""
See class docstring.
"""
specs = """
NREPORT = integer(default=0) # Default is unlimited (0)
MAXPERLIG = integer(default=1)
UNIQUEFIELD = string(default="s_m_title")
OFFSETS = float_list(default=None)
MARKERS = string_list(default=None)
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
for i in range(1, 1000):
self.addExpectedInput(i, 'structures', i < 2)
self.addExpectedOutput(1, "structures", True)
self._job_outputs = []
[docs] def operate(self):
"""
Run the 'glide_ensemble_merge' script on all the input files from all
sets, optionally applying GlideScore offsets to some sets. Generates
a single output pose file.
"""
# Fill offset_dict with all input files and offsets:
offset_dict = {}
marker_dict = {}
offsets = self['OFFSETS']
markers = self['MARKERS']
uniquefield = self['UNIQUEFIELD']
inputs = {} # key: INPUT number, value: list of ligand files
i = 0
while True:
i += 1
inputobj = self.getInput(i)
if not inputobj:
break # No more inputs
ligfiles = inputobj.getFiles()
inputs[i] = ligfiles
if offsets:
if len(offsets) != len(inputs):
msg = "Number of OFFSETS (%i) != number of INPUTs (%i)" % \
(len(offsets), len(inputs))
self.error(msg)
raise RuntimeError(msg)
else:
offsets = [0.0 for inp in inputs]
if markers:
if len(markers) != len(inputs):
msg = "Number of MARKERS (%i) != number of INPUTs (%i)" % \
(len(markers), len(inputs))
self.error(msg)
raise RuntimeError(msg)
else:
markers = [str(inp) for inp in inputs]
for i, ligfiles in inputs.items():
offset = offsets[i - 1] # 0-indexed
marker = markers[i - 1] # 0-indexed
self.debug("LIGANDS #%i: %s" % (i, ligfiles))
self.debug("OFFSET #%i: %f" % (i, offset))
self.debug("MARKER #%i: %s" % (i, marker))
foundligs = False
for ligfile in ligfiles:
foundligs = True
offset_dict[ligfile] = offset
marker_dict[ligfile] = marker
if not foundligs:
self.warning("WARNING: No structure files for input pin %i" % i)
if not offset_dict:
msg = "ERROR: No structure files were found to merge"
raise RuntimeError(msg)
# Ru glide_ensemble_merge on all the input files:
gem_jobname = self.stagename
args = ['-j', gem_jobname]
nr = self['NREPORT']
# If not specified, use default of 'unlimited'
if nr:
args.extend(['-n', str(nr)])
mpl = self['MAXPERLIG']
# If not specified, use default of 1.
if mpl:
args.extend(['-m', str(mpl)])
for ligfile, offset in offset_dict.items():
marker = marker_dict.get(ligfile)
a = '%s:%f:%s' % (ligfile, offset, marker)
args.append(a)
args += ["-unique_field", uniquefield]
schro = os.environ['SCHRODINGER']
script = os.path.join(schro, 'utilities', 'glide_ensemble_merge')
cmd = [script] + args
# Run with the -epv option, which adds special markers pairing
# receptors and poses and merges them into a single _epv.maegz file
# See VSW-860
cmd.append("-epv")
self.info("Running glide_ensemble_merge command:")
self.info("%s" % cmd)
self.info("...")
gemlog = 'glide_ensemble_merge.log'
verbosity = self.getVerbosity()
if verbosity == 'quiet':
fh = open(gemlog, 'w')
subprocess.call(cmd, stdout=fh)
fh.close()
else:
subprocess.call(cmd)
self.info(" complete.")
# Rename glide_ensemble_merge outputs:
merged_file = gem_jobname + '_epv.maegz'
msg = "glide_ensemble_merge did not produce an output:"
self.checkFile(merged_file, msg)
renamed_outfile = self.genOutputFileName(1, extension="_epv.maegz")
shutil.move(merged_file, renamed_outfile)
self.checkFile(renamed_outfile, "Failed to move file to:")
self.debug("Merged poses file: " + renamed_outfile)
if os.path.isfile(gemlog):
fileutils.force_remove(gemlog)
self.setOutput(1, pipeio.Structures([renamed_outfile]))
[docs]class GlideSortStage(stage.Stage):
"""
Stage for sorting Glide _pv/lib.mae files. Currently, a sorted file is
produced for each input file; that is, the input files are merged and
sorted together.
The keywords specific to this stage are...
BLOCK_SORT_BY_TITLE Whether to block-sort the poses by title. If
True (the default), all poses with the same
title are grouped and sorted by the
INTRATITLE_SORT_KEYS keys. The blocks
themselves are sorted by applying the
SORT_KEYS to just the top pose in each block.
INTRATITLE_SORT_KEYS The sort key(s) within a title block. The
default is 'r_i_glide_emodel' (appropriate
for Glide HTVS/SP results). Multiple sort
keys must be whitespace-separated.
SORT_KEYS The key(s) for sorting the pose file. If
BLOCK_SORT_BY_TITLE is True, the blocks are
sorted by the values of their top pose's
SORT_KEYS properties. Multiple sort keys
must be whitespace-separated.
OUTPUT_POSES_PER_TITLE The number of top poses per title to keep.
Applies only if BLOCK_SORT_BY_TITLE is true.
The default is 0, which means retain all
poses.
INPUT_TYPE Specify whether the receptor is ('pv') or is
not ('lib') in the file. By default,
the type will be determined from
the file name (i.e., _pv/lib.mae).
"""
[docs] def __init__(self, *args, **kwargs):
"""
See class docstring.
"""
specs = """
BLOCK_SORT_BY_TITLE = boolean(default=True)
INTRATITLE_SORT_KEYS = string(default="r_i_glide_emodel")
SORT_KEYS = string(default="r_i_docking_score")
OUTPUT_POSES_PER_TITLE = integer(default=0)
INPUT_TYPE = string(default=None)
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
self.addExpectedInput(1, "structures", True)
self.addExpectedOutput(1, "structures", True)
[docs] def operate(self):
"""
Sort each input pose file using the application.glide.glideanalysis
module. Generates a sorted output pose file for each input pose file.
"""
ligfiles = self.getInput(1).getFiles()
outfiles = []
for index, ligfile in enumerate(ligfiles):
outfile = self.genOutputFileName(1,
filenum=index + 1,
extension=".mae")
outfiles.append(outfile)
# NOTE: the PoseLibrary class is not yet maegz-aware
pl = glideanalysis.PoseLibrary(
file_name=ligfile,
file_type=self['INPUT_TYPE'],
sort_property_1=self['SORT_KEYS'],
sort_property_2=self['INTRATITLE_SORT_KEYS'],
block_sort=self['BLOCK_SORT_BY_TITLE'])
if (self['OUTPUT_POSES_PER_TITLE'] < 1 or
not self['BLOCK_SORT_BY_TITLE']):
# Write all poses
indices = []
for item in pl.title_tuples:
indices.extend(item[:])
pl.write(outfile, indexes=indices)
else:
pl.writeTopPoses(
output_lib_name=outfile,
max_pose_per_title=self['OUTPUT_POSES_PER_TITLE'])
self.setOutput(1, pipeio.Structures(outfiles))
# EOF