Source code for schrodinger.pipeline.stages.macromodel

# python module
"""
Stages for running MacroModel jobs.

    ConfSearchStage - stage for running MacroModel conf-search calculations.
    SampleRingsStage - stage for sampling large rings

Copyright Schrodinger, LLC. All rights reserved.

"""
# Contributors: Matvey Adzhigirey, Shawn Watts

##########################################################################
# Packages
##########################################################################

import os
import sys

import schrodinger.application.macromodel.input as mmod_input
import schrodinger.application.macromodel.tools as mt
import schrodinger.structure as structure
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import pipeutils
from schrodinger.pipeline import stage

MACROMODEL = os.path.join(os.getenv('SCHRODINGER'), 'macromodel')

##########################################################################
# Classes
##########################################################################


[docs]class ConfSearchStage(stage.Stage): """ Stage for running MacroModel calculations """
[docs] def __init__(self, *args, **kwargs): specs = mmod_input.GENERAL_SPECS + \ mmod_input.MINIMIZER_SPECS + mmod_input.CONFSEARCH_SPECS specs += ["SERIAL_SPLIT_OUTPUT = string(default=False)"] stage.Stage.__init__(self, specs=specs, *args, **kwargs) # Used by Pipeline to associate -host_mmod with this stage: self.setMainProduct('macromodel') self.requiredProduct('macromodel') self.addExpectedInput(1, "structures", True) self.addExpectedOutput(1, "structures", True) self.jobdj = None self.status = "NOT STARTED" return
[docs] def recombineInputLigands(self): """ Read all input structure files, and write them to separate subjob input files. The number of subjobs is determined by the Pipeline and adjusted so that the subjob size is within a good limit, if requested by the user. """ input_files = self.getInput(1).getFiles() self.info(" Counting number of input structures") (self.input_variants_count, self.input_compound_count) = \ pipeutils.countRoots(input_files) msg = "\n Number of input structures: " + \ str(self.input_variants_count) self.info(msg) msg = " Number of input compounds: " + str(self.input_compound_count) self.info(msg) # ConfSearchStage # Determine the number of subjobs to create: # NOTE: This min/max range is used only when the Pipeline driver # requests the number of jobs to be adjusted to yield optimal sizes min_job_size = 50 # About 1 hour max_job_size = 500 # About 10 hours (n_st_per_file, njobs, adjusted) = \ self.getAdjustedNJobs( self.input_variants_count, min_job_size, max_job_size) if adjusted: self.info( " Adjusting number of jobs to yield %i-%i ligands per job" % (min_job_size, max_job_size)) self.info( " Separating ligands into %i subjobs (%i max ligands per job)." % (njobs, n_st_per_file)) writer = structure.MultiFileStructureWriter(self.genFileName(), extension="_in.maegz", sts_per_file=n_st_per_file) st_num = 0 for ligfile in input_files: for st in pipeutils.get_reader(ligfile): st_num += 1 if st_num % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() writer.append(st) writer.close() # close last writer output_files = writer.getFiles() self.checkFiles(output_files) self.bmin_input_files = output_files return
[docs] def setupJobs(self): """ Build up command to use to launch each subjob, and add that command to JobDJ. """ self.jobdj = self.getJobDJ() self.expected_jobdj_outputs = [] filenum = 0 for ligfile in self.bmin_input_files: filenum += 1 jobname = self.genFileName(filenum=filenum, extension="") inpfile = jobname + '.inp' outfile = jobname + '-out.maegz' self.writeSIFFile(ligfile, inpfile, outfile) cmd = [MACROMODEL, inpfile] self.jobdj.addJob(cmd) self.expected_jobdj_outputs.append(outfile) return
[docs] def processJobOutputs(self): """ After subjobs are complete, they are combined into a few output files """ outligands = self.expected_jobdj_outputs # Set output_files to input ligands plus the output ligands: self.info(" BMIN output files: " + str(outligands)) self.info(" Combining the BMIN output files") st_num = 0 writer = structure.MultiFileStructureWriter(self.genFileName(), extension=".maegz") for ligfile in outligands: for st in pipeutils.get_reader(ligfile): st_num += 1 if st_num % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() writer.append(st) # Print return after priods: self.info("") output_files = writer.getFiles() self.setOutput(1, pipeio.Structures(output_files, st_num)) return
[docs] def splitJobOutputs(self): # FIXME: Add documentation csearch_out = self.expected_jobdj_outputs self.info(" BMIN output files: " + str(csearch_out)) self.info(" Splitting the BMIN output files") split_out = [] # split by serial number. for outfile in csearch_out: split_out.extend(mt.serial_split(outfile)) self.setOutput(1, pipeio.Structures(split_out))
[docs] def operate(self): """ Run MacroModel conformer serch operation on the input files """ if self.status in ["NOT STARTED", "PREPARING INPUT FILES"]: self.status = "PREPARING INPUT FILES" self.recombineInputLigands() self.status = "SETTING UP JOBS" self.dump() if self.status == "SETTING UP JOBS": self.setupJobs() self.status = "RUNNING JOBS" self.dump() if self.status == "RUNNING JOBS": # Update JobDJ to correct options (may change if restarting): self.setJobDJOptions(self.jobdj) self.runJobDJ(self.jobdj) self.status = "PROCESSING FILES" self.dump() if self.status == "PROCESSING FILES": if self['SERIAL_SPLIT_OUTPUT'] == 'True': self.splitJobOutputs() else: self.processJobOutputs() self.status = "COMPLETE" return # COMPLETE return
[docs] def writeSIFFile(self, ligfile, inpfile, outfile): fh = open(inpfile, 'w') kw_dict = {} for key in self.keys(): if key not in ['SERIAL_SPLIT_OUTPUT']: kw_dict[key] = self[key] kw_dict['INPUT_STRUCTURE_FILE'] = ligfile kw_dict['OUTPUT_STRUCTURE_FILE'] = outfile fh = open(inpfile, 'w') fh.write("# Input file for $SCHRODINGER/macromodel\n") config = mmod_input.ConfSearch(kw_dict) config.writeInputFile(fh) fh.close()
# print 'Input file written:', inpfile
[docs]class SampleRingsStage(stage.Stage): """ Stage for sampling rings with 7 or more members using MacroModel """
[docs] def __init__(self, *args, **kwargs): # FIXME allow more MacroModel keywords # Like this: # specs = mmod_input.GENERAL_SPECS + mmod_input.MINIMIZER_SPECS + mmod_input.CONFSEARCH_SPECS # specs += [ # "SERIAL_SPLIT_OUTPUT = string(default=False)" #] #stage.Stage.__init__(self, specs=specs, *args, **kwargs) specs = """ FORCE_FIELD = string(default="OPLS_2005") # force field to use SOLVENT = string(default="Water") # Solvent model ELECTROSTATIC_TREATMENT = string(default="Constant dielectric") CHARGES_FROM = string(default="Force field") CUTOFF = string(default="Extended") OUTCONFS_PER_SEARCH = integer(default=1) # 1 output per input structure MAXIMUM_ITERATION = integer(default=500) CONVERGE_ON = string(default="Gradient") CONFSEARCH_STEPS_PER_ROTATABLE = integer(default=10) CONVERGENCE_THRESHOLD = float(default=0.05) CONFSEARCH_STEPS = integer(default=100) """ stage.Stage.__init__(self, specs=specs, *args, **kwargs) self.setMainProduct('macromodel') self.requiredProduct('macromodel') self.addExpectedInput(1, "structures", True) self.addExpectedOutput(1, "structures", True) self._job_outputs = [] self.samplerings_input_files = None self.jobdj = None self.status = "NOT STARTED"
[docs] def prepareInputLigands(self): input_object = self.getInput(1) input_files = input_object.getFiles() self.through_ligands_file = \ self.genFileName(extension='_through_ligands.maegz') self.presample_ligands_file = \ self.genFileName(extension='_presample_ligands.maegz') # Split up the files into 2 files: one for ligands that ringconf did # well on, and another for problematic ligands. self.presample_num = 0 self.through_num = 0 through_writer = structure.StructureWriter(self.through_ligands_file) presample_writer = structure.StructureWriter( self.presample_ligands_file) st_num = 0 for ligfile in input_files: for st in pipeutils.get_reader(ligfile): st_num += 1 if st_num % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() result = st.property.get('i_lp_ring_sampling_problem') if result is None: # property does not exist through_writer.append(st) self.through_num += 1 else: # property exists. Will be 2 for partial match # and 1 for full mismatch. presample_writer.append(st) self.presample_num += 1 through_writer.close() presample_writer.close() if self.presample_num == 0: self.info( "No ligands need ring sampling. Returning a copy of the input structures object." ) self.setOutput( 1, pipeio.Structures([self.through_ligands_file], self.through_num)) return False # No need to sample else: msg = "\nringconf Problem ligands: %s" % self.presample_ligands_file self.info(msg) if not self.through_num: self.through_ligands_file = None msg = "\nringconf Non-problem ligands: %s" % self.through_ligands_file self.info(msg) # SampleRingsStage # Determine the number of subjobs to create: # NOTE: This min/max range is used only when the Pipeline driver # requests the number of jobs to be adjusted to yield optimal sizes # # NOTE These job sizes have NOT been tested. How long the jobs take # largely depends on how many of the input structures need to be # processed min_job_size = 100 max_job_size = 5000 (n_st_per_file, njobs, adjusted) = self.getAdjustedNJobs(self.presample_num, min_job_size, max_job_size) if adjusted: self.info( " Adjusting number of jobs to yield %i-%i ligands per job" % (min_job_size, max_job_size)) self.info( " Separating ligands into %i subjobs (%i max ligands per job)." % (njobs, n_st_per_file)) # # Split up the ligands into the appropriate number of subjobs: # writer = structure.MultiFileStructureWriter(self.genFileName(), extension="_in.maegz", sts_per_file=n_st_per_file) st_num = 0 for st in pipeutils.get_reader(self.presample_ligands_file): st_num += 1 # Print progress period: if st_num % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() writer.append(st) self.samplerings_input_files = writer.getFiles() return True # Need to sample rings
[docs] def setupJobs(self): """ Built up the MacroModel command for each subjob, and add it to JobDJ """ self.jobdj = self.getJobDJ() self.expected_jobdj_outputs = [] filenum = 0 for ligfile in self.samplerings_input_files: filenum += 1 jobname = self.genFileName(filenum=filenum, extension="") inpfile = jobname + '.inp' outfile = jobname + '-out.maegz' self.writeSIFFile(ligfile, inpfile, outfile) cmd = [MACROMODEL, inpfile] self.jobdj.addJob(cmd) self.expected_jobdj_outputs.append(outfile)
[docs] def processJobOutputs(self): """ Is run after all subjobs are complete to recombine the output ligands """ # FIXME: Get rid of expected_jobdj_outputs and use jobnames instead. self.checkFiles(self.expected_jobdj_outputs) # FIXME: Use outname = self.getOutputName(1) self.info(" BMIN output files: %s" % self.expected_jobdj_outputs) self.info(" Combining the BMIN output files") # # Combine the through ligands and the bmim output ligands: # if self.through_num: # Some ligands did not need ring sampling: outligand_files = [self.through_ligands_file] + \ self.expected_jobdj_outputs else: # All ligands needed ring sampling (Ev:56857): outligand_files = self.expected_jobdj_outputs writer = structure.MultiFileStructureWriter(self.genFileName(), extension=".maegz") for ligfile in outligand_files: st_num = 0 for st in pipeutils.get_reader(ligfile): # Print progress period: st_num += 1 if st_num % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() writer.append(st) # Print return after priods: self.info("") output_files = writer.getFiles() self.setOutput(1, pipeio.Structures(output_files, st_num))
[docs] def operate(self): """ Perform an operation on the input files. """ #enter_status = self.status if self.status in ["NOT STARTED", "PREPARING INPUT FILES"]: self.status = "PREPARING INPUT FILES" need_sampling = self.prepareInputLigands() if need_sampling: self.status = "SETTING UP JOBS" else: self.status = "COMPLETE" self.dump() if self.status == "SETTING UP JOBS": self.setupJobs() self.status = "RUNNING JOBS" self.dump() if self.status == "RUNNING JOBS": # Update JobDJ to correct options (may change if restarting): self.setJobDJOptions(self.jobdj) self.runJobDJ(self.jobdj) self.status = "PROCESSING FILES" self.dump() if self.status == "PROCESSING FILES": self.processJobOutputs() self.status = "COMPLETE" self.dump() # COMPLETE return
[docs] def writeSIFFile(self, ligfile, inpfile, outfile): kw_dict = {} for key in self.keys(): if key not in []: kw_dict[key] = self[key] kw_dict['INPUT_STRUCTURE_FILE'] = ligfile kw_dict['OUTPUT_STRUCTURE_FILE'] = outfile # MCMM may be ineffective for constrained systems and low mode seems # to perform better; a mixed of the 2 seems to be the best balance # for sampling rings in ligand-like molecules: kw_dict['CONFSEARCH_METHOD'] = "Mixed" kw_dict['MULTI_LIGAND'] = True kw_dict['JOB_TYPE'] = 'CONFSEARCH' fh = open(inpfile, 'w') fh.write("# Input file for $SCHRODINGER/macromodel\n") config = mmod_input.ConfSearch(kw_dict) config.writeInputFile(fh) fh.close() return inpfile
# EOF