"""
A stage for running QSite jobs.
Input structure file - maestro file of one or more complexes OR a PV file
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Matvey Adzhigirey
# Written for QPLD (Ev:56414)
import os
import sys
import schrodinger.application.qsite.input as qinput
from schrodinger import structure
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import pipeutils
from schrodinger.pipeline import stage
[docs]class QSiteStage(stage.Stage):
"""
Stage for running QSite jobs.
Input can be one of the following:
1. Complex maestro file
QM region may be defined via molecule number or atom numbers
2. PV maestro file
QM region will be set to the ligand
"""
[docs] def addExpectedKeyword(self,
fullname,
qsname,
type,
default=None,
doc=None):
"""
Used by constructor. Saves "QSite name" for specified keyword and
returns ConfigObj validation string for it.
"""
if not hasattr(self, "_qskeywords"):
self._qskeywords = {}
if qsname:
# Link full name to QSite keyword name
self._qskeywords[fullname] = qsname
configobj_str = "%s = %s(default=%s)\n" % (fullname, type, default)
return configobj_str
[docs] def __init__(self, *args, **kwargs):
"""
See class docstring.
"""
# If the keyword is ommitted from the input file, the Jaguar default
# will be used (determinted by mmjag)
# General keywords:
specs = self.addExpectedKeyword('OUTPUT_LIGS_ONLY', None, 'boolean',
False)
specs += self.addExpectedKeyword('IGNORE_RECEP', None, 'boolean', False)
specs += self.addExpectedKeyword('SOLVATION', None, 'boolean', False)
# Use QM (Jaguar) or NDDO (semi-empirical)
specs += self.addExpectedKeyword('THEORY', None, 'string', 'QM')
# QM KEYWORDS:
specs += self.addExpectedKeyword('QM_VSHIFT', 'vshift', 'float')
specs += self.addExpectedKeyword('QM_MAXITG', 'maxitg', 'integer')
specs += self.addExpectedKeyword('QM_BASIS', 'basis', 'string')
specs += self.addExpectedKeyword('QM_HCAPESCHG', 'hcapeschg', 'integer')
specs += self.addExpectedKeyword('QM_MAXIT', ' maxit', 'integer')
specs += self.addExpectedKeyword('QM_MULTIP', 'multip', 'integer')
specs += self.addExpectedKeyword('QM_MOLCHG', 'molchg', 'integer')
specs += self.addExpectedKeyword('QM_IGEOPT', 'igeopt', 'integer')
specs += self.addExpectedKeyword('QM_DFTNAME', 'dftname', 'string')
specs += self.addExpectedKeyword('QM_MMQM', 'mmqm', 'integer')
specs += self.addExpectedKeyword('QM_IACC', 'iacc', 'integer')
specs += self.addExpectedKeyword('QM_IACSCF', 'iacscf', 'integer')
specs += self.addExpectedKeyword('QM_ICFIT', 'icfit', 'integer')
# MM KEYWORDS:
specs += self.addExpectedKeyword('MM_USE_NB_CUTOFF', 'use_nb_cutoff',
'integer')
specs += self.addExpectedKeyword('MM_UPDATE_FREQ', 'nb_update_freq',
'integer')
specs += self.addExpectedKeyword('MM_TN_FORCE_UPDATE',
'tn_force_update', 'integer')
specs += self.addExpectedKeyword('MM_MAXCYCLES', 'maxcycles', 'integer')
specs += self.addExpectedKeyword('MM_DIELECTRIC', 'dielectric', 'float')
specs += self.addExpectedKeyword('MM_NB_CUTOFF_DIST', 'nb_cutoff_dist',
'float')
specs += self.addExpectedKeyword('MM_INIT_STEP_SIZE', 'init_step_size',
'float')
specs += self.addExpectedKeyword('MM_MAX_STEP_SIZE', 'max_step_size',
'float')
specs += self.addExpectedKeyword('MM_TN_FORCE_CUTOFF',
'tn_force_cutoff', 'float')
specs += self.addExpectedKeyword('MM_BUF_FORCE_CONST',
'buf_force_const', 'float')
specs += self.addExpectedKeyword('MM_DELTAE', 'deltae', 'float')
specs += self.addExpectedKeyword('MM_GRADIENT', 'rms_gradient', 'float')
specs += self.addExpectedKeyword('MM_PARAMSTD', 'paramstd', 'string')
specs += self.addExpectedKeyword('MM_ESTATICS', 'estatics', 'string')
specs += self.addExpectedKeyword('MM_OPT_METHOD', 'opt_method',
'string')
specs += self.addExpectedKeyword('MM_CONVERGE_ON', 'converge_on',
'string')
# Use 'qsite_ff' rather than 'forcefield' -- Ev:95813.
specs += self.addExpectedKeyword('MM_FORCEFIELD', 'qsite_ff', 'string')
specs += self.addExpectedKeyword('MM_QSITE_FF', 'qsite_ff', 'string')
# NDDO KEYWORDS:
specs += self.addExpectedKeyword('NDDO_METHOD', 'method', 'string')
specs += self.addExpectedKeyword('NDDO_OPTIONS', 'options', 'string')
# QM REGION (used for complex inputs):
specs += self.addExpectedKeyword('QM_MOLECULES', None, 'integer_list')
specs += self.addExpectedKeyword('QM_HCAP', None, 'integer_list')
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
# Used by Pipeline to associate -host_mmod with this stage:
self.setMainProduct('qsite')
# Will quit the program if this product is not installed
self.requiredProduct('jaguar')
# The input #1 needs to be of type 'structures'
self.addExpectedInput(1, "structures", True)
# The output #1 will be of type 'structures'
self.addExpectedOutput(1, "structures", True)
self.status = "NOT STARTED"
self.jobdj = None
self.qsite_jobnames = []
[docs] def setupJobs(self):
"""
Sets up the QSite jobs, which are distributed via JobDJ.
"""
# Create a JobDJ instance based on VSW parameters:
self.jobdj = self.getJobDJ()
self.qsite_jobnames = []
# Get the filenames of ligand files specified in input #1:
structure_files = self.getInput(1).getFiles()
st = None
if len(structure_files) != 1:
self.error("QSite stage requires exactly 1 input file!")
sys.exit(1)
structure_file = structure_files[0]
count = structure.count_structures(structure_file)
if count == 0:
self.error("Structure file is empty!")
sys.exit(1)
if count == 1:
print('Input is a complex (1 structure)')
self.input_pv = False
qsite_jobname = self.genFileName(filenum=1)
st = structure.Structure.read(structure_file)
self.setupQSiteJob(st, qsite_jobname)
else:
num = count - 1
self.input_pv = True
if num == 1:
print('Input is a PV file (1 ligand)')
else:
print('Input is a PV file (%i ligands)' % num)
ignorerec = self['IGNORE_RECEP']
if ignorerec:
print('Ignore receptor in QSite calculations')
receptor_st = None
lignum = 0
for st in structure.StructureReader(structure_file):
if not receptor_st:
receptor_st = st
continue
lignum += 1
qsite_jobname = self.genFileName(filenum=lignum)
if ignorerec:
# Run QSite on just the ligands
self.setupQSiteJob(st, qsite_jobname)
else:
# Generate the complex:
complex_st = receptor_st.copy()
complex_st.extend(st)
# Copy ligand entry-level properties to receptor
# (overwriting):
complex_st.property.update(st.property)
self.setupQSiteJob(complex_st, qsite_jobname)
[docs] def setupQSiteJob(self, complex_st, qsite_jobname):
"""
Setup the QSite job(s)
"""
complex_file = qsite_jobname + '.mae'
input_file = qsite_jobname + '.in'
complex_st.write(complex_file)
# Write the QSite input file:
qs = qinput.QSiteInput(qsite_jobname)
qm_molnums = []
for key, value in self.items():
if value is None: # Use default
continue
if key == 'QM_MOLECULES':
for molnum in value:
qm_molnums.append(molnum)
elif key == 'QM_HCAP':
self.error("QM_HCAP keyword is not supported at this time")
sys.exit(1)
# In the future QM_HCAP keyword will only be supported for
# single-ligand complex jobs:
if self.input_pv:
self.error(
"QM_HCAP keyword is not supported if input is a PV file"
)
sys.exit(1)
elif key == 'SOLVATION':
if value:
qs.gen['isolv'] = 2
qs.mmkey['solvation_method'] = 'pbf'
elif key.startswith('MM_'):
if key == 'MM_FORCEFIELD':
# QSite now has its own MMIM keyword for setting its force
# force field, which may have a different default than the
# general Impact keyword. If used, MM_FORCEFIELD will set
# the 'qsite_ff' QSite key, rather than 'forcefield'.
import warnings
msg = "MM_FORCEFIELD is deprecated. Use MM_QSITE_FF instead."
warnings.warn(msg, DeprecationWarning, stacklevel=2)
qsname = self._qskeywords[key] # Get short name
qs.mmkey[qsname] = value
elif key.startswith('QM_'):
qsname = self._qskeywords[key] # Get short name
qs.gen[qsname] = value
elif key.startswith('NDDO_OPTIONS'):
# Have to split this space-separated list of <key>=<value> or
# <key> options into separate dictionary items.
tokens = value.split()
for tk in tokens:
s = tk.split('=')
if len(s) == 2:
qs.mopac[s[0]] = s[1]
elif len(s) == 1:
qs.mopac[s[0]] = 1 # Indicating option is turned on?
else:
self.error(
"NDDO_OPTIONS value '%s' has invalid format" %
value)
self.exit(1)
elif key.startswith('NDDO_'):
qsname = self._qskeywords[key] # Get short name
qs.mopac[qsname] = value
if self.input_pv:
# If input is PV file, automaticall add the ligand (last molecule)
# to the QM region:
ligand_molid = complex_st.mol_total
qm_molnums.append(ligand_molid)
# Setup the QM/NDDO region and calculate the total formal charge of QM
# atoms:
total_fc = 0
for molnum in qm_molnums:
qs.addQMMolecule(molnum, theory=self['THEORY'])
for atom in complex_st.molecule[molnum].atom:
total_fc += atom.formal_charge
qs.gen['molchg'] = total_fc
qs.setStructureFile(complex_file)
self.debug('Calling QSiteInput.write() method...')
qs.write() # writes <jobname>.in file
self.debug('After QSiteInput.write() method')
# Build-up & add the QSite command:
qsite = os.path.join(os.environ['SCHRODINGER'], 'qsite')
cmd = [qsite, input_file]
self.jobdj.addJob(cmd)
self.qsite_jobnames.append(qsite_jobname)
[docs] def processJobOutputs(self):
"""
Checks for failure of any subjobs (by reading the .log file). Renames
the QSite output files. Raises a RuntimeError if any subjob failed.
"""
ligands_only = self['OUTPUT_LIGS_ONLY']
final_output_files = []
output_ligands_file = self.genOutputFileName(1, extension=".mae")
for lignum, qsite_jobname in enumerate(self.qsite_jobnames):
#logfile = qsite_jobname + '.log'
outfile = qsite_jobname + '.01.mae'
self.checkFile(outfile)
if self.input_pv and ligands_only:
complex_st = structure.Structure.read(outfile)
ligand_molnum = complex_st.mol_total
ligand_st = complex_st.molecule[ligand_molnum].extractStructure(
)
ligand_st.property.update(complex_st.property)
if lignum == 0:
ligand_st.write(output_ligands_file)
final_output_files.append(output_ligands_file)
else:
ligand_st.append(output_ligands_file)
else:
# Find out what the output file should be called:
renamed_outfile = self.genOutputFileName(1,
filenum=lignum + 1,
extension=".mae")
# Rename the QSite output:
if os.path.exists(renamed_outfile):
os.remove(renamed_outfile) # For Win
os.rename(outfile, renamed_outfile)
final_output_files.append(renamed_outfile)
self.checkFiles(final_output_files)
# Set the output #1 of this stage to the list of renamed output files:
self.setOutput(1, pipeio.Structures(final_output_files))
[docs] def operate(self):
"""
Perform an operation on the input files. There are setup, running,
and post-processing steps, and the stage records its current status so
that it can be restarted in that step if there is a failure. Raises
a RuntimeError if the JobDJ run() method fails, or if the stage
finishes with an improper status.
"""
if self.status == "NOT STARTED" or self.status == "SETTING UP JOBS":
self.status = "SETTING UP JOBS"
# Setup JobDJ:
self.setupJobs()
# If restarting, go directly to next step.
self.status = "RUNNING JOBS"
self.dump()
if self.status == "RUNNING JOBS":
# Update JobDJ to correct options (may change if restarting):
self.setJobDJOptions(self.jobdj)
self.runJobDJ(self.jobdj)
# Make sure that all QSite outputs are present:
for qsite_jobname in self.qsite_jobnames:
outfile = qsite_jobname + '.01.mae'
logfile = qsite_jobname + '.log'
if not os.path.isfile(logfile):
msg = "ERROR: QSite subjob log file is missing\n"
self.exit(msg)
elif not os.path.isfile(outfile):
msg = "ERROR: QSite subjob output file is missing\n"
msg += pipeutils.get_last_20_lines(logfile)
msg += "ERROR: QSite subjob output file is missing\n"
self.exit(msg)
# Ev:108034 Since failed QSite jobs still write the output
# file, existence of that file is not enough to determine
# that the job succeeded. Therefore we need to check the log
# file for any errors as well:
job_failed = False
num_lines_read = 0
for line in pipeutils.BackwardsReader(logfile):
num_lines_read += 1
if num_lines_read > 20:
break
if "halted prematurely" in line:
job_failed = True
break
if job_failed:
msg = "ERROR: QSite subjob halted prematurely"
msg += pipeutils.get_last_20_lines(logfile)
msg += "ERROR: QSite subjob halted prematurely"
self.exit(msg)
self.status = "PROCESSING FILES"
self.dump()
if self.status == "PROCESSING FILES":
# Rename the QSite output files:
self.processJobOutputs()
self.status = "COMPLETE"
self.dump()
# COMPLETE
return
# EOF