Source code for schrodinger.application.macromodel.apps

"""
Functions that combine macromodel.utils and macromodel.tools to create
mini-applications and workflows.

While these may be useful in themselves, they also serve as examples of
how to use the other modules in this package.

Copyright Schrodinger, LLC. All rights reserved.

"""

# Contributors: K Shawn Watts

################################################################################
# Packages
################################################################################

import os
import re

import schrodinger.application.macromodel.tools as mt
import schrodinger.application.macromodel.utils as mu
import schrodinger.job.jobcontrol as jc
import schrodinger.structure as structure

################################################################################
# Functions
################################################################################





[docs]def get_rep_confs(mae_file="", max_confs=5, steps=5000, **kwargs): """ This function combines a series of tasks to automate the processing of non-conformers in to a smaller representative collection of conformers. Returns a list of the file names that contain representative confs, grouped by input serial number. This method accepts the name of a mae file containing a set of non-conformers, optionally the max number of representative confs, optionally the number of search steps, optional kwargs for the ComUtil object, and then: a) performs a serial conformation search of non-conformers b) groups output of conformational search by serial number into multiple files c) performs multiple minimization of each serial number d) reduces output of each serial number with cluster, to N representative confs. The default maximum number of conformers per serial filtered by Cluster is 5. The default number steps used in the Monte Carlo Multiple Minima search is 5000. Returns the array of representative file names. Depends on macromodel.utils.CluUtil() class for clustering. Depends on macromodel.utils.ComUtil() for writing and running com files. Depends on macromodel.tools.serial_split() for grouping output. """ # get instances mcu = mu.ComUtil(**kwargs) mxu = mu.CluUtil() # set object values mcu.serial = True # mandatory serial mcu.MCMM[1] = steps mcu.MCOP[1] = 100 # define lists for intermediate files serial_files = [] # name of files from serial split mult_files = [] # name of files mult mini qp_files = [] # name of files clustered (used to be sent to QikProp) cleanup_files = [] # all the intermediate file names for removal # Perform serial AUTO search print("get_rep_confs(): starting serial conformation search...") search_job = jc.launch_job(mcu.getLaunchCommand(mcu.mcmm(mae_file))) search_job.wait(mcu.interval) search_out_mae_file = search_job.StructureOutputFile cleanup_files.append(search_out_mae_file) # Filter CTs in file, write groups of like serial numbers. print( "get_rep_confs(): separating conformation search output by serial number..." ) serial_files = mt.serial_split(search_out_mae_file) # Perform MultMini print("get_rep_confs(): starting sequence of multiple minimizations...") for file in serial_files: mult_job = jc.launch_job(mcu.getLaunchCommand(mcu.mult(file))) mult_job.wait(mcu.interval) mult_files.append(mult_job.StructureOutputFile) # Cluster to reduce output to representative sample print( "get_rep_confs(): starting sequence of cluster conformation reductions..." ) for file in mult_files: # figure out cluster thresh level that generates the desired max confs # thresh level = N - ( M + 1) # N = total number of confs # M = desired max number of confs # generate the name out_file = file mae_re = re.compile(r'-out\.mae$') if mae_re.search(out_file): out_file = mae_re.sub('-clust_rep.mae', out_file, 1) else: raise Exception( "get_rep_confs() can't determine file name pattern. ") # determine if user's max is possible if not just copy the file count = mt.count(file) # user input desired max thresh = count - max_confs + 1 if count > max_confs: # generate command file args mxu.arms[0] = 'heavy' # atoms for RMS after superim mxu.writerep = " ".join([repr(thresh), out_file, "all"]) # run cluster mxu.doCluster(file) else: os.rename(file, out_file) # add file to list for cleanup qp_files.append(out_file) # Clean up print("get_rep_confs(): cleaning up intermediate files...") for file in mult_files: cleanup_files.append(file) for file in serial_files: cleanup_files.append(file) for file in cleanup_files: if os.path.exists(file): os.unlink(file) print("get_rep_confs(): finished. ") return qp_files
[docs]def get_lead_confs(mae_file="", max_confs=5, steps=5000, **kwargs): """ This function combines a series of tasks to automate the processing of non-conformers in to a smaller representative collection of conformers. Returns a list of the file names that contain leading (lowest energy) cluster confs, grouped by input serial number. This method accepts the name of a mae file containing a set of non-conformers, optionally the max number of leading energy confs, optionally the number of search steps, optional kwargs for the ComUtil object, and then: a) performs a serial conformation search of non-conformers b) groups output of conformational search by serial number into multiple files c) performs multiple minimization of each serial number d) reduces output of each serial number with cluster, to N leading energy confs. The default maximum number of conformers per serial filtered by Cluster is 5. The default number steps used in the Monte Carlo Multiple Minima search is 5000. Returns the array of leading energy conf file names. Depends on macromodel.utils.CluUtil() class for clustering. Depends on macromodel.utils.ComUtil() for writing and running com files. Depends on macromodel.tools.serial_split() for grouping output. """ # get instances mcu = mu.ComUtil(**kwargs) mxu = mu.CluUtil() # set object values mcu.wait = True # mandatory wait, jc_wait optionally defined in object mcu.serial = True # mandatory serial mcu.MCMM[1] = steps mcu.MCOP[1] = 100 # define lists for intermediate files serial_files = [] # name of files from serial split mult_files = [] # name of files mult mini qp_files = [] # name of files clustered (used to be sent to QikProp) cleanup_files = [] # all the intermediate file names for removal # Perform serial AUTO search print("get_lead_confs(): starting serial conformation search...") search_job = jc.launch_job(mcu.getLaunchCommand(mcu.mcmm(mae_file))) search_job.wait(mcu.interval) search_out_mae_file = search_job.StructureOutputFile cleanup_files.append(search_out_mae_file) # Filter CTs in file, write groups of like serial numbers. print( "get_lead_confs(): separating conformation search output by serial number..." ) serial_files = mt.serial_split(search_out_mae_file) # Perform MultMini print("get_lead_confs(): starting sequence of multiple minimizations...") for file in serial_files: mult_job = jc.launch_job(mcu.getLaunchCommand(mcu.mult(file))) mult_job.wait(mcu.interval) mult_files.append(mult_job.StructureOutputFile) # Cluster to reduce output to leading energy sample print( "get_lead_confs(): starting sequence of cluster conformation reductions..." ) for file in mult_files: # figure out cluster thresh level that generates the desired max confs # thresh level = N - M + 1 # N = total number of confs # M = desired max number of confs # generate the name out_file = file mae_re = re.compile(r'-out\.mae$') if mae_re.search(out_file): out_file = mae_re.sub('-clust_lead.mae', out_file, 1) else: raise Exception( "get_lead_confs() can't determine file name pattern. ") # determine if user's max is possible if not just copy the file count = mt.count(file) # user input desired max thresh = count - max_confs + 1 if count > max_confs: # generate command file args mxu.arms[0] = 'heavy' # atoms for RMS after superim mxu.writelead = " ".join([repr(thresh), out_file, "all"]) # run cluster mxu.doCluster(file) else: os.rename(file, out_file) # add file to list for cleanup qp_files.append(out_file) # Clean up print("get_lead_confs(): cleaning up intermediate files...") for file in mult_files: cleanup_files.append(file) for file in serial_files: cleanup_files.append(file) for file in cleanup_files: if os.path.exists(file): os.unlink(file) print("get_lead_confs(): finished. ") return qp_files
[docs]def get_energy(structure=structure.Structure, ffld='OPLS_2005', solvent=False, cleanup=True, units=""): """ Returns a dictionary of MacroModel's molecular mechanics terms generated from a single point energy calculation. The dictionary may be restricted to a collection of values with a particular unit, kJ/mol or kcal/mol. By default, the function cleans up all intermediate files created by the job. The 'structure' argument must be a schrodinger.structure.Structure object. """ # Returned dict mmod_energy = {} mmod_energy['title'] = structure.title # Mmod energy terms terms = "|".join([ 'Total energy', 'Stretch', 'Bend', 'Proper torsion', 'Out-of-plane', 'Van der Waals', 'Electrostatic', 'Explcit Hydrogen Bonds', 'All solvation', 'Solvation Term 1', 'Solvation Term 2' ]) # Compile the re for snarfing energy terms energy_re = re.compile(r'(%s):\s+(\S+)\s+kJ\/mol\s+\(\s+(\S+) kcal' % terms) # JobIO file prep and clean up base_name = 'mmod_energy_calc' mae_file_name = ''.join([base_name, '.mae']) mmo_file_name = ''.join([base_name, '-out.mmo']) out_file_name = ''.join([base_name, '-out.mae']) com_file_name = ''.join([base_name, '.com']) cleanup_files = [mae_file_name, mmo_file_name, out_file_name, com_file_name] # Remove any previous debris for file in cleanup_files: if cleanup and os.path.exists(file): os.remove(file) # Get ComUtil customized with ffld but no jobcontrol, we will capture stdout mcu = mu.ComUtil(ffld=ffld, solv=solvent) mcu.no_redirect = True mcu.ELST[1] = 0 # terse reporting of terms mcu.ELST[2] = 1 # report in kcal/mol units # Write out our structure file, clobber if needed structure.write(mae_file_name) # Write the comfile, get the command to invoke the job cmd = mcu.getLaunchCommand(mcu.elst(mae_file_name)) cmd[0] = os.path.join(os.getenv('SCHRODINGER'), cmd[0]) # Run job, capture stdout child_stdin, child_stdout, child_stderr = os.popen3(cmd) # Parse stdout for the terms for line in child_stdout.readlines(): print(line) match = energy_re.search(line) if match: # make keys for both unit types key_kJ_per_mol = " ".join([match.group(1), "kJ/mol"]) key_kcal_per_mol = " ".join([match.group(1), "kcal/mol"]) # but only populate with requested values if units.startswith('kcal'): mmod_energy[key_kcal_per_mol] = match.group(3) elif units.startswith('kJ'): mmod_energy[key_kJ_per_mol] = match.group(2) else: mmod_energy[key_kJ_per_mol] = match.group(2) mmod_energy[key_kcal_per_mol] = match.group(3) # Don't leave a mess for file in cleanup_files: if cleanup and os.path.exists(file): os.remove(file) child_stdin.close() child_stdout.close() child_stderr.close() return mmod_energy
# EOF