# python module
"""
Stages for running MacroModel jobs.
ConfSearchStage - stage for running MacroModel conf-search calculations.
SampleRingsStage - stage for sampling large rings
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Matvey Adzhigirey, Shawn Watts
##########################################################################
# Packages
##########################################################################
import os
import sys
import schrodinger.application.macromodel.input as mmod_input
import schrodinger.application.macromodel.tools as mt
import schrodinger.structure as structure
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import pipeutils
from schrodinger.pipeline import stage
MACROMODEL = os.path.join(os.getenv('SCHRODINGER'), 'macromodel')
##########################################################################
# Classes
##########################################################################
[docs]class ConfSearchStage(stage.Stage):
"""
Stage for running MacroModel calculations
"""
[docs] def __init__(self, *args, **kwargs):
specs = mmod_input.GENERAL_SPECS + \
mmod_input.MINIMIZER_SPECS + mmod_input.CONFSEARCH_SPECS
specs += ["SERIAL_SPLIT_OUTPUT = string(default=False)"]
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
# Used by Pipeline to associate -host_mmod with this stage:
self.setMainProduct('macromodel')
self.requiredProduct('macromodel')
self.addExpectedInput(1, "structures", True)
self.addExpectedOutput(1, "structures", True)
self.jobdj = None
self.status = "NOT STARTED"
return
[docs] def setupJobs(self):
"""
Build up command to use to launch each subjob, and add that command
to JobDJ.
"""
self.jobdj = self.getJobDJ()
self.expected_jobdj_outputs = []
filenum = 0
for ligfile in self.bmin_input_files:
filenum += 1
jobname = self.genFileName(filenum=filenum, extension="")
inpfile = jobname + '.inp'
outfile = jobname + '-out.maegz'
self.writeSIFFile(ligfile, inpfile, outfile)
cmd = [MACROMODEL, inpfile]
self.jobdj.addJob(cmd)
self.expected_jobdj_outputs.append(outfile)
return
[docs] def processJobOutputs(self):
"""
After subjobs are complete, they are combined into a few output files
"""
outligands = self.expected_jobdj_outputs
# Set output_files to input ligands plus the output ligands:
self.info(" BMIN output files: " + str(outligands))
self.info(" Combining the BMIN output files")
st_num = 0
writer = structure.MultiFileStructureWriter(self.genFileName(),
extension=".maegz")
for ligfile in outligands:
for st in pipeutils.get_reader(ligfile):
st_num += 1
if st_num % 1000 == 0:
sys.stdout.write(".")
sys.stdout.flush()
writer.append(st)
# Print return after priods:
self.info("")
output_files = writer.getFiles()
self.setOutput(1, pipeio.Structures(output_files, st_num))
return
[docs] def splitJobOutputs(self):
# FIXME: Add documentation
csearch_out = self.expected_jobdj_outputs
self.info(" BMIN output files: " + str(csearch_out))
self.info(" Splitting the BMIN output files")
split_out = [] # split by serial number.
for outfile in csearch_out:
split_out.extend(mt.serial_split(outfile))
self.setOutput(1, pipeio.Structures(split_out))
[docs] def operate(self):
"""
Run MacroModel conformer serch operation on the input files
"""
if self.status in ["NOT STARTED", "PREPARING INPUT FILES"]:
self.status = "PREPARING INPUT FILES"
self.recombineInputLigands()
self.status = "SETTING UP JOBS"
self.dump()
if self.status == "SETTING UP JOBS":
self.setupJobs()
self.status = "RUNNING JOBS"
self.dump()
if self.status == "RUNNING JOBS":
# Update JobDJ to correct options (may change if restarting):
self.setJobDJOptions(self.jobdj)
self.runJobDJ(self.jobdj)
self.status = "PROCESSING FILES"
self.dump()
if self.status == "PROCESSING FILES":
if self['SERIAL_SPLIT_OUTPUT'] == 'True':
self.splitJobOutputs()
else:
self.processJobOutputs()
self.status = "COMPLETE"
return
# COMPLETE
return
[docs] def writeSIFFile(self, ligfile, inpfile, outfile):
fh = open(inpfile, 'w')
kw_dict = {}
for key in self.keys():
if key not in ['SERIAL_SPLIT_OUTPUT']:
kw_dict[key] = self[key]
kw_dict['INPUT_STRUCTURE_FILE'] = ligfile
kw_dict['OUTPUT_STRUCTURE_FILE'] = outfile
fh = open(inpfile, 'w')
fh.write("# Input file for $SCHRODINGER/macromodel\n")
config = mmod_input.ConfSearch(kw_dict)
config.writeInputFile(fh)
fh.close()
# print 'Input file written:', inpfile
[docs]class SampleRingsStage(stage.Stage):
"""
Stage for sampling rings with 7 or more members using MacroModel
"""
[docs] def __init__(self, *args, **kwargs):
# FIXME allow more MacroModel keywords
# Like this:
# specs = mmod_input.GENERAL_SPECS + mmod_input.MINIMIZER_SPECS + mmod_input.CONFSEARCH_SPECS
# specs += [
# "SERIAL_SPLIT_OUTPUT = string(default=False)"
#]
#stage.Stage.__init__(self, specs=specs, *args, **kwargs)
specs = """
FORCE_FIELD = string(default="OPLS_2005") # force field to use
SOLVENT = string(default="Water") # Solvent model
ELECTROSTATIC_TREATMENT = string(default="Constant dielectric")
CHARGES_FROM = string(default="Force field")
CUTOFF = string(default="Extended")
OUTCONFS_PER_SEARCH = integer(default=1) # 1 output per input structure
MAXIMUM_ITERATION = integer(default=500)
CONVERGE_ON = string(default="Gradient")
CONFSEARCH_STEPS_PER_ROTATABLE = integer(default=10)
CONVERGENCE_THRESHOLD = float(default=0.05)
CONFSEARCH_STEPS = integer(default=100)
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
self.setMainProduct('macromodel')
self.requiredProduct('macromodel')
self.addExpectedInput(1, "structures", True)
self.addExpectedOutput(1, "structures", True)
self._job_outputs = []
self.samplerings_input_files = None
self.jobdj = None
self.status = "NOT STARTED"
[docs] def setupJobs(self):
"""
Built up the MacroModel command for each subjob, and add it to JobDJ
"""
self.jobdj = self.getJobDJ()
self.expected_jobdj_outputs = []
filenum = 0
for ligfile in self.samplerings_input_files:
filenum += 1
jobname = self.genFileName(filenum=filenum, extension="")
inpfile = jobname + '.inp'
outfile = jobname + '-out.maegz'
self.writeSIFFile(ligfile, inpfile, outfile)
cmd = [MACROMODEL, inpfile]
self.jobdj.addJob(cmd)
self.expected_jobdj_outputs.append(outfile)
[docs] def processJobOutputs(self):
"""
Is run after all subjobs are complete to recombine the output ligands
"""
# FIXME: Get rid of expected_jobdj_outputs and use jobnames instead.
self.checkFiles(self.expected_jobdj_outputs)
# FIXME: Use outname = self.getOutputName(1)
self.info(" BMIN output files: %s" % self.expected_jobdj_outputs)
self.info(" Combining the BMIN output files")
#
# Combine the through ligands and the bmim output ligands:
#
if self.through_num:
# Some ligands did not need ring sampling:
outligand_files = [self.through_ligands_file] + \
self.expected_jobdj_outputs
else:
# All ligands needed ring sampling (Ev:56857):
outligand_files = self.expected_jobdj_outputs
writer = structure.MultiFileStructureWriter(self.genFileName(),
extension=".maegz")
for ligfile in outligand_files:
st_num = 0
for st in pipeutils.get_reader(ligfile):
# Print progress period:
st_num += 1
if st_num % 1000 == 0:
sys.stdout.write(".")
sys.stdout.flush()
writer.append(st)
# Print return after priods:
self.info("")
output_files = writer.getFiles()
self.setOutput(1, pipeio.Structures(output_files, st_num))
[docs] def operate(self):
""" Perform an operation on the input files. """
#enter_status = self.status
if self.status in ["NOT STARTED", "PREPARING INPUT FILES"]:
self.status = "PREPARING INPUT FILES"
need_sampling = self.prepareInputLigands()
if need_sampling:
self.status = "SETTING UP JOBS"
else:
self.status = "COMPLETE"
self.dump()
if self.status == "SETTING UP JOBS":
self.setupJobs()
self.status = "RUNNING JOBS"
self.dump()
if self.status == "RUNNING JOBS":
# Update JobDJ to correct options (may change if restarting):
self.setJobDJOptions(self.jobdj)
self.runJobDJ(self.jobdj)
self.status = "PROCESSING FILES"
self.dump()
if self.status == "PROCESSING FILES":
self.processJobOutputs()
self.status = "COMPLETE"
self.dump()
# COMPLETE
return
[docs] def writeSIFFile(self, ligfile, inpfile, outfile):
kw_dict = {}
for key in self.keys():
if key not in []:
kw_dict[key] = self[key]
kw_dict['INPUT_STRUCTURE_FILE'] = ligfile
kw_dict['OUTPUT_STRUCTURE_FILE'] = outfile
# MCMM may be ineffective for constrained systems and low mode seems
# to perform better; a mixed of the 2 seems to be the best balance
# for sampling rings in ligand-like molecules:
kw_dict['CONFSEARCH_METHOD'] = "Mixed"
kw_dict['MULTI_LIGAND'] = True
kw_dict['JOB_TYPE'] = 'CONFSEARCH'
fh = open(inpfile, 'w')
fh.write("# Input file for $SCHRODINGER/macromodel\n")
config = mmod_input.ConfSearch(kw_dict)
config.writeInputFile(fh)
fh.close()
return inpfile
# EOF