Source code for schrodinger.pipeline.stages.qsite

"""
A stage for running QSite jobs.

Input structure file - maestro file of one or more complexes OR a PV file

Copyright Schrodinger, LLC. All rights reserved.

"""

# Contributors: Matvey Adzhigirey

# Written for QPLD (Ev:56414)

import os
import sys

import schrodinger.application.qsite.input as qinput
from schrodinger import structure
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import pipeutils
from schrodinger.pipeline import stage


[docs]class QSiteStage(stage.Stage): """ Stage for running QSite jobs. Input can be one of the following: 1. Complex maestro file QM region may be defined via molecule number or atom numbers 2. PV maestro file QM region will be set to the ligand """
[docs] def addExpectedKeyword(self, fullname, qsname, type, default=None, doc=None): """ Used by constructor. Saves "QSite name" for specified keyword and returns ConfigObj validation string for it. """ if not hasattr(self, "_qskeywords"): self._qskeywords = {} if qsname: # Link full name to QSite keyword name self._qskeywords[fullname] = qsname configobj_str = "%s = %s(default=%s)\n" % (fullname, type, default) return configobj_str
[docs] def __init__(self, *args, **kwargs): """ See class docstring. """ # If the keyword is ommitted from the input file, the Jaguar default # will be used (determinted by mmjag) # General keywords: specs = self.addExpectedKeyword('OUTPUT_LIGS_ONLY', None, 'boolean', False) specs += self.addExpectedKeyword('IGNORE_RECEP', None, 'boolean', False) specs += self.addExpectedKeyword('SOLVATION', None, 'boolean', False) # Use QM (Jaguar) or NDDO (semi-empirical) specs += self.addExpectedKeyword('THEORY', None, 'string', 'QM') # QM KEYWORDS: specs += self.addExpectedKeyword('QM_VSHIFT', 'vshift', 'float') specs += self.addExpectedKeyword('QM_MAXITG', 'maxitg', 'integer') specs += self.addExpectedKeyword('QM_BASIS', 'basis', 'string') specs += self.addExpectedKeyword('QM_HCAPESCHG', 'hcapeschg', 'integer') specs += self.addExpectedKeyword('QM_MAXIT', ' maxit', 'integer') specs += self.addExpectedKeyword('QM_MULTIP', 'multip', 'integer') specs += self.addExpectedKeyword('QM_MOLCHG', 'molchg', 'integer') specs += self.addExpectedKeyword('QM_IGEOPT', 'igeopt', 'integer') specs += self.addExpectedKeyword('QM_DFTNAME', 'dftname', 'string') specs += self.addExpectedKeyword('QM_MMQM', 'mmqm', 'integer') specs += self.addExpectedKeyword('QM_IACC', 'iacc', 'integer') specs += self.addExpectedKeyword('QM_IACSCF', 'iacscf', 'integer') specs += self.addExpectedKeyword('QM_ICFIT', 'icfit', 'integer') # MM KEYWORDS: specs += self.addExpectedKeyword('MM_USE_NB_CUTOFF', 'use_nb_cutoff', 'integer') specs += self.addExpectedKeyword('MM_UPDATE_FREQ', 'nb_update_freq', 'integer') specs += self.addExpectedKeyword('MM_TN_FORCE_UPDATE', 'tn_force_update', 'integer') specs += self.addExpectedKeyword('MM_MAXCYCLES', 'maxcycles', 'integer') specs += self.addExpectedKeyword('MM_DIELECTRIC', 'dielectric', 'float') specs += self.addExpectedKeyword('MM_NB_CUTOFF_DIST', 'nb_cutoff_dist', 'float') specs += self.addExpectedKeyword('MM_INIT_STEP_SIZE', 'init_step_size', 'float') specs += self.addExpectedKeyword('MM_MAX_STEP_SIZE', 'max_step_size', 'float') specs += self.addExpectedKeyword('MM_TN_FORCE_CUTOFF', 'tn_force_cutoff', 'float') specs += self.addExpectedKeyword('MM_BUF_FORCE_CONST', 'buf_force_const', 'float') specs += self.addExpectedKeyword('MM_DELTAE', 'deltae', 'float') specs += self.addExpectedKeyword('MM_GRADIENT', 'rms_gradient', 'float') specs += self.addExpectedKeyword('MM_PARAMSTD', 'paramstd', 'string') specs += self.addExpectedKeyword('MM_ESTATICS', 'estatics', 'string') specs += self.addExpectedKeyword('MM_OPT_METHOD', 'opt_method', 'string') specs += self.addExpectedKeyword('MM_CONVERGE_ON', 'converge_on', 'string') # Use 'qsite_ff' rather than 'forcefield' -- Ev:95813. specs += self.addExpectedKeyword('MM_FORCEFIELD', 'qsite_ff', 'string') specs += self.addExpectedKeyword('MM_QSITE_FF', 'qsite_ff', 'string') # NDDO KEYWORDS: specs += self.addExpectedKeyword('NDDO_METHOD', 'method', 'string') specs += self.addExpectedKeyword('NDDO_OPTIONS', 'options', 'string') # QM REGION (used for complex inputs): specs += self.addExpectedKeyword('QM_MOLECULES', None, 'integer_list') specs += self.addExpectedKeyword('QM_HCAP', None, 'integer_list') stage.Stage.__init__(self, specs=specs, *args, **kwargs) # Used by Pipeline to associate -host_mmod with this stage: self.setMainProduct('qsite') # Will quit the program if this product is not installed self.requiredProduct('jaguar') # The input #1 needs to be of type 'structures' self.addExpectedInput(1, "structures", True) # The output #1 will be of type 'structures' self.addExpectedOutput(1, "structures", True) self.status = "NOT STARTED" self.jobdj = None self.qsite_jobnames = []
[docs] def setupJobs(self): """ Sets up the QSite jobs, which are distributed via JobDJ. """ # Create a JobDJ instance based on VSW parameters: self.jobdj = self.getJobDJ() self.qsite_jobnames = [] # Get the filenames of ligand files specified in input #1: structure_files = self.getInput(1).getFiles() st = None if len(structure_files) != 1: self.error("QSite stage requires exactly 1 input file!") sys.exit(1) structure_file = structure_files[0] count = structure.count_structures(structure_file) if count == 0: self.error("Structure file is empty!") sys.exit(1) if count == 1: print('Input is a complex (1 structure)') self.input_pv = False qsite_jobname = self.genFileName(filenum=1) st = structure.Structure.read(structure_file) self.setupQSiteJob(st, qsite_jobname) else: num = count - 1 self.input_pv = True if num == 1: print('Input is a PV file (1 ligand)') else: print('Input is a PV file (%i ligands)' % num) ignorerec = self['IGNORE_RECEP'] if ignorerec: print('Ignore receptor in QSite calculations') receptor_st = None lignum = 0 for st in structure.StructureReader(structure_file): if not receptor_st: receptor_st = st continue lignum += 1 qsite_jobname = self.genFileName(filenum=lignum) if ignorerec: # Run QSite on just the ligands self.setupQSiteJob(st, qsite_jobname) else: # Generate the complex: complex_st = receptor_st.copy() complex_st.extend(st) # Copy ligand entry-level properties to receptor # (overwriting): complex_st.property.update(st.property) self.setupQSiteJob(complex_st, qsite_jobname)
[docs] def setupQSiteJob(self, complex_st, qsite_jobname): """ Setup the QSite job(s) """ complex_file = qsite_jobname + '.mae' input_file = qsite_jobname + '.in' complex_st.write(complex_file) # Write the QSite input file: qs = qinput.QSiteInput(qsite_jobname) qm_molnums = [] for key, value in self.items(): if value is None: # Use default continue if key == 'QM_MOLECULES': for molnum in value: qm_molnums.append(molnum) elif key == 'QM_HCAP': self.error("QM_HCAP keyword is not supported at this time") sys.exit(1) # In the future QM_HCAP keyword will only be supported for # single-ligand complex jobs: if self.input_pv: self.error( "QM_HCAP keyword is not supported if input is a PV file" ) sys.exit(1) elif key == 'SOLVATION': if value: qs.gen['isolv'] = 2 qs.mmkey['solvation_method'] = 'pbf' elif key.startswith('MM_'): if key == 'MM_FORCEFIELD': # QSite now has its own MMIM keyword for setting its force # force field, which may have a different default than the # general Impact keyword. If used, MM_FORCEFIELD will set # the 'qsite_ff' QSite key, rather than 'forcefield'. import warnings msg = "MM_FORCEFIELD is deprecated. Use MM_QSITE_FF instead." warnings.warn(msg, DeprecationWarning, stacklevel=2) qsname = self._qskeywords[key] # Get short name qs.mmkey[qsname] = value elif key.startswith('QM_'): qsname = self._qskeywords[key] # Get short name qs.gen[qsname] = value elif key.startswith('NDDO_OPTIONS'): # Have to split this space-separated list of <key>=<value> or # <key> options into separate dictionary items. tokens = value.split() for tk in tokens: s = tk.split('=') if len(s) == 2: qs.mopac[s[0]] = s[1] elif len(s) == 1: qs.mopac[s[0]] = 1 # Indicating option is turned on? else: self.error( "NDDO_OPTIONS value '%s' has invalid format" % value) self.exit(1) elif key.startswith('NDDO_'): qsname = self._qskeywords[key] # Get short name qs.mopac[qsname] = value if self.input_pv: # If input is PV file, automaticall add the ligand (last molecule) # to the QM region: ligand_molid = complex_st.mol_total qm_molnums.append(ligand_molid) # Setup the QM/NDDO region and calculate the total formal charge of QM # atoms: total_fc = 0 for molnum in qm_molnums: qs.addQMMolecule(molnum, theory=self['THEORY']) for atom in complex_st.molecule[molnum].atom: total_fc += atom.formal_charge qs.gen['molchg'] = total_fc qs.setStructureFile(complex_file) self.debug('Calling QSiteInput.write() method...') qs.write() # writes <jobname>.in file self.debug('After QSiteInput.write() method') # Build-up & add the QSite command: qsite = os.path.join(os.environ['SCHRODINGER'], 'qsite') cmd = [qsite, input_file] self.jobdj.addJob(cmd) self.qsite_jobnames.append(qsite_jobname)
[docs] def processJobOutputs(self): """ Checks for failure of any subjobs (by reading the .log file). Renames the QSite output files. Raises a RuntimeError if any subjob failed. """ ligands_only = self['OUTPUT_LIGS_ONLY'] final_output_files = [] output_ligands_file = self.genOutputFileName(1, extension=".mae") for lignum, qsite_jobname in enumerate(self.qsite_jobnames): #logfile = qsite_jobname + '.log' outfile = qsite_jobname + '.01.mae' self.checkFile(outfile) if self.input_pv and ligands_only: complex_st = structure.Structure.read(outfile) ligand_molnum = complex_st.mol_total ligand_st = complex_st.molecule[ligand_molnum].extractStructure( ) ligand_st.property.update(complex_st.property) if lignum == 0: ligand_st.write(output_ligands_file) final_output_files.append(output_ligands_file) else: ligand_st.append(output_ligands_file) else: # Find out what the output file should be called: renamed_outfile = self.genOutputFileName(1, filenum=lignum + 1, extension=".mae") # Rename the QSite output: if os.path.exists(renamed_outfile): os.remove(renamed_outfile) # For Win os.rename(outfile, renamed_outfile) final_output_files.append(renamed_outfile) self.checkFiles(final_output_files) # Set the output #1 of this stage to the list of renamed output files: self.setOutput(1, pipeio.Structures(final_output_files))
[docs] def operate(self): """ Perform an operation on the input files. There are setup, running, and post-processing steps, and the stage records its current status so that it can be restarted in that step if there is a failure. Raises a RuntimeError if the JobDJ run() method fails, or if the stage finishes with an improper status. """ if self.status == "NOT STARTED" or self.status == "SETTING UP JOBS": self.status = "SETTING UP JOBS" # Setup JobDJ: self.setupJobs() # If restarting, go directly to next step. self.status = "RUNNING JOBS" self.dump() if self.status == "RUNNING JOBS": # Update JobDJ to correct options (may change if restarting): self.setJobDJOptions(self.jobdj) self.runJobDJ(self.jobdj) # Make sure that all QSite outputs are present: for qsite_jobname in self.qsite_jobnames: outfile = qsite_jobname + '.01.mae' logfile = qsite_jobname + '.log' if not os.path.isfile(logfile): msg = "ERROR: QSite subjob log file is missing\n" self.exit(msg) elif not os.path.isfile(outfile): msg = "ERROR: QSite subjob output file is missing\n" msg += pipeutils.get_last_20_lines(logfile) msg += "ERROR: QSite subjob output file is missing\n" self.exit(msg) # Ev:108034 Since failed QSite jobs still write the output # file, existence of that file is not enough to determine # that the job succeeded. Therefore we need to check the log # file for any errors as well: job_failed = False num_lines_read = 0 for line in pipeutils.BackwardsReader(logfile): num_lines_read += 1 if num_lines_read > 20: break if "halted prematurely" in line: job_failed = True break if job_failed: msg = "ERROR: QSite subjob halted prematurely" msg += pipeutils.get_last_20_lines(logfile) msg += "ERROR: QSite subjob halted prematurely" self.exit(msg) self.status = "PROCESSING FILES" self.dump() if self.status == "PROCESSING FILES": # Rename the QSite output files: self.processJobOutputs() self.status = "COMPLETE" self.dump() # COMPLETE return
# EOF