Source code for schrodinger.application.desmond.stage.app.mxmd

"""
Multisim stages that relate to Mixed solvent MD.
"""
import glob
import os
from pathlib import Path

import numpy as np
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure

from schrodinger.application.desmond import cmj
from schrodinger.application.desmond import cms
from schrodinger.application.desmond import constants
from schrodinger.application.desmond import envir
from schrodinger.application.desmond import struc
from schrodinger.application.desmond import util
from schrodinger.application.desmond.cns_io import write_cns
from schrodinger.application.desmond.mxmd import mxmd_system_builder as msb
from schrodinger.structutils.analyze import evaluate_asl
from schrodinger.utils import subprocess

_WATER_MODELS_STR = " ".join(constants.WATER_MODELS)
SYS_BUILDER = os.path.join(envir.CONST.SCHRODINGER_UTILITIES, 'system_builder')
# radial density function g(r) for SPC water model
GOFR_PURE_WATER = np.asarray([
    0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
    0., 0., 0., 0., 0., 0., 0.010634, 0.338050, 1.682675, 2.736977, 2.503406,
    1.829748, 1.333802, 1.071222, 0.965162, 0.914427, 0.893070, 0.898258,
    0.904990, 0.924600, 0.940725, 0.966754, 0.972843, 1.002643, 1.025567,
    1.048943, 1.049505, 1.060412, 1.050993, 1.042898, 1.041842, 1.024149,
    1.003722, 0.991205, 0.980145, 0.969153, 0.952752, 0.938180, 0.942619,
    0.939926, 0.949592, 0.956575, 0.963424, 0.980776, 0.994291, 1.009102,
    1.015644, 1.019306, 1.024479, 1.033992, 1.032949, 1.032074, 1.030571,
    1.026839, 1.021950, 1.015802, 1.013546, 1.004896, 0.998060, 0.993408,
    0.986467, 0.985717, 0.986613, 0.990264, 0.986388, 0.991880, 0.989678,
    0.994545, 0.995628, 0.998898, 1.002230, 1.003586, 1.000542, 1.004498,
    1.005333, 1.001349, 1.001855, 1.003135, 1.000729, 1.002678, 1.000686,
    0.997965, 1.001651, 1.001225, 1.002781, 1.002162, 1.000959, 0.998455,
    1.001817, 1.001155, 0.997895, 1.004563, 1.000949, 1.002749, 1.001887,
    1.000505, 0.999098, 1.000673, 1.001367, 1.001307, 0.997006, 0.997471
])
__all__ = ['MixedSolventSetup', 'MixedSolventAnalysis', 'MixedSolventCleanup']


[docs]def normalize_probe_occupancy_grid(grid): ''' Convert the grid counts to z-scores by normalizing the grid. Since the grid contains probe occupancies, this matrix is mostly sparse (with ~1% of non- zero values) we need to omit all the zero-containing values from calculating of mean and standard deviation. ''' normgrid = np.zeros(grid.shape, dtype='float16') mask = grid != 0.0 normgrid[mask] = grid[mask] - grid[mask].mean() normgrid /= grid[mask].std() return normgrid
def _can_import_project(): # Needed for the cleanup stage try: # This is used to check the analysis can run from schrodinger import project # noqa: F401 return True except ImportError: return False
[docs]class MixedSolventSetup(cmj.StageBase): """ Multisim stage that builds a cosolvent system. Setting acetonitrile, isopropanol and pyrimidine as default probes as they are water-miscible. Provide a custom_probe_dir to use custom <probe>.box.mae files. This should be a relative path pointing to a subdirectory of the launch directory """ NAME = "mixed_solvent_setup" PARAM = cmj._create_param_when_needed([ """ DATA = { cosolvent_probes = [acetonitrile isopropanol pyrimidine] cosolvent_layer_size = 7.0 number_cosolvent_systems = 10 cosolvent_volume_ratio = 5.0 cosolvent_vdw_scaling = ? water = 'SPC' init_water_buffer = %s custom_probe_dir = ? } VALIDATE = { cosolvent_probes = {type = list} cosolvent_layer_size = {type = float+} number_cosolvent_systems = {type = int range = [1 10]} cosolvent_volume_ratio = {type = float range = [0.0 100.0]} cosolvent_vdw_scaling = [{type = float+} {type = none}] water = {type = enum range = [%s] } init_water_buffer = {type = float+} custom_probe_dir = [{type = str} {type = none}] } """ % (msb.BIG_BOX_BUFFER, _WATER_MODELS_STR) ])
[docs] def check_param(self): """ Check that the necessary box .mae files for the specified probes exist """ ev = super().check_param() custom_probe_dir = self.param.custom_probe_dir.val if custom_probe_dir: custom_probe_dir = Path(custom_probe_dir) if custom_probe_dir.is_absolute(): ev.record_error( f"custom_probe_dir ({custom_probe_dir}) must be a relative " f"path pointing to a subdirectory of the launch dir") return ev
[docs] def collect_inputfile(self): """ Get any probe boxes specified by user that are in the provided custom directory """ custom_probes, _, _ = msb.get_probe_paths( self.param.cosolvent_probes.val, self.param.custom_probe_dir.val) return custom_probes
[docs] def crunch(self): """ Generate a solvent box for each specified cosolvent """ self._print("debug", "In MixedSolventSetup.crunch") # Required for the analysis stage. if not _can_import_project(): self._print( "quiet", "ERROR: Could not import project library needed for the workflow. " "Make sure the X11 libraries are installed and try agian.") return custom_probe_dir = self.param.custom_probe_dir.val if custom_probe_dir: custom_probe_dir = os.path.abspath(custom_probe_dir) pj = self.get_prejobs()[0] jobname, job_dir = self._get_jobname_and_dir(pj) if not os.path.isdir(job_dir): os.makedirs(job_dir) util.chdir(job_dir) mxmd_sb_options = { 'cosolvent_layer_size': self.param.cosolvent_layer_size.val, 'cosolvent_vdw_scaling': self.param.cosolvent_vdw_scaling.val, 'cosolvent_volume_ratio': self.param.cosolvent_volume_ratio.val, 'water': self.param.water.val, 'init_water_buffer': self.param.init_water_buffer.val, } mae_fname = pj.output.struct_file() custom_probes, builtin_probes, _ = msb.get_probe_paths( self.param.cosolvent_probes.val, custom_probe_dir) for probe_path in custom_probes + builtin_probes: for box_index in range(self.param.number_cosolvent_systems.val): probe_name = Path(probe_path).name[:-8] # strip off .box.mae basename = f'mxmd_{probe_name}-{box_index}' out_mae_fname = f'{basename}.mae' gen = msb.CosolventBoxGenerator(mae_fname, probe_path, box_index, **mxmd_sb_options) gen.generate(out_mae_fname) self._print("debug", out_mae_fname) new_job = cmj.Job(None, pj, self, None, job_dir) if not os.path.exists(out_mae_fname): new_job.status.set(cmj.JobStatus.BACKEND_ERROR) self._print( "quiet", f"ERROR: {self.NAME} failed: Could not generate cosolvent structures {out_mae_fname} for {probe_name}. " "Check input structures and parameters and try again.") else: new_job.output.add(os.path.abspath(out_mae_fname)) new_job.status.set(cmj.JobStatus.SUCCESS) new_job.tag = basename self.add_job(new_job) self._print("debug", "Out MixedSolventSetup.crunch")
[docs]class MixedSolventAnalysis(cmj.StageBase): """ Analyze cosolovent probes around a the protein. """ NAME = "mixed_solvent_analysis" PARAM = cmj._create_param_when_needed([ """ DATA = { grid_spacing = 0.5 align_asl = '(protein and atom.ptype CA)' box_length = ? } VALIDATE = { grid_spacing = {type = float+} align_asl = {type = str1} box_length = [{type = none} {type = float+}] } """, ])
[docs] def crunch(self): """ Calculate occupancy grids, write .cns and .raw files """ self._print("debug", "In MixedSolventAnalysis.crunch") from schrodinger.application.desmond.packages import analysis from schrodinger.application.desmond.packages import topo from schrodinger.application.desmond.packages import traj_util from schrodinger.structure import create_new_structure for pj in self.get_prejobs(): jobname, jobdir = self._get_jobname_and_dir(pj) if not os.path.isdir(jobdir): os.makedirs(jobdir) util.chdir(jobdir) cms_fname = pj.output.struct_file() grid_spacing = self.param.grid_spacing.val align_asl = self.param.align_asl.val box_length = self.param.box_length.val msys_model, cms_model, tr = traj_util.read_cms_and_traj(cms_fname) cosolvent_ct = struc.component_structures(cms_model).cosolvent cosolvent_aids = cms_model.get_fullsys_ct_atom_index_range( cms_model.comp_ct.index(cosolvent_ct)) cosolvent_aids_noh = [ aid for aid in cosolvent_aids if cms_model.fsys_ct.atom[aid].atomic_number != 1 ] cosolvent_mols = cosolvent_ct.mol_total cosolvent_probe = cosolvent_ct.property.get( constants.MXMD_COSOLVENT_PROBE, '') # extract reference structure and use it as a reference to align # trajectory and cms_model ref_mae = struc.get_reference_ct(cms_model.fsys_ct) ref_pos = ref_mae.extract(evaluate_asl(ref_mae, align_asl)).getXYZ() center_pos = np.mean(ref_pos, axis=0) ref_gids = topo.asl2gids(cms_model, align_asl, include_pseudoatoms=False) tr = topo.superimpose(msys_model, ref_gids, tr, ref_pos) cms_model = topo.superimpose_cms(msys_model, ref_gids, cms_model, ref_pos) # Get the box around the protein for grid occupacy calculation if not box_length: solute_size = np.max( np.max(ref_pos, axis=0) - np.min(ref_pos, axis=0)) # Box to calculate the occupancies should be large enough to # count all the probes in the simulation box. The size of the # occupancy grid can be roughly approximated by doubling the # solute size, and calculating its cube's diagonal: # diag = sqrt(3*l^2) box_length = np.sqrt(3 * (solute_size * 2)**2) self._print( "quiet", f"Cubic box with length of {box_length:.2f}, " f"with grid spacing of {grid_spacing}, will be used to " f"calculate {cosolvent_probe} occupancy.") grid_spacing = [grid_spacing] * 3 box_length = [box_length] * 3 vm = analysis.VolumeMapper(cms_model, aids=cosolvent_aids_noh, spacing=grid_spacing, length=box_length, center=center_pos, normalize=False) grid = analysis.analyze(tr, vm) # reduce precision grid = grid.astype('uint16') normgrid = normalize_probe_occupancy_grid(grid) # Write .cns and .raw files. The later file will be used by the # cleanup stage to generate aggregate data for each probe type. _, cms_fname = os.path.split(cms_fname) out_cns_fname = f'{jobname}-out.cns' out_raw_fname = f'{jobname}-out.raw' out_mae_fname = f'{jobname}-out.mae' out_probes_fname = f'{jobname}-probes.mae' cms_fname = os.path.join(os.getcwd(), cms_fname) write_cns(normgrid, box_length, grid_spacing, out_cns_fname) # Save probe molecules from 20 frames into a CT for later use. probes_ct = create_new_structure() _pct = struc.delete_ffio_ff(cosolvent_ct) cosolvent_gids = topo.aids2gids(cms_model, cosolvent_aids, include_pseudoatoms=False) nframes = len(tr) fr_interval = 1 if nframes < 20 else nframes // 20 for fr in tr[::fr_interval]: _pct.setXYZ(fr.pos(cosolvent_gids)) probes_ct = probes_ct.merge(_pct) probes_ct.title = _pct.title probes_ct.property[constants.MXMD_COSOLVENT_PROBE] = cosolvent_probe probes_ct.write(out_probes_fname) self._print( "quiet", "Extracted and merged probes from 20 frames:\n" f" {', '.join(map(str, range(0, len(tr), fr_interval)))}") self._print("quiet", f"Saved {out_probes_fname}") # Use the first component ct for output ct = struc.delete_ffio_ff( struc.component_structures(cms_model).solute) # Set ct properties ct.property[constants.MXMD_COSOLVENT_PROBE] = cosolvent_probe ct.property[constants.MXMD_GRID_SPACING] = grid_spacing[0] ct.property[constants.MXMD_BOX_LENGTH] = box_length[0] ct.property[constants.MXMD_NUM_PROBES] = cosolvent_mols ct.property[constants.MXMD_CENTER_X] = center_pos[0] ct.property[constants.MXMD_CENTER_Y] = center_pos[1] ct.property[constants.MXMD_CENTER_Z] = center_pos[2] ct.write(out_mae_fname) with open(out_raw_fname, 'wb') as fh: np.save(fh, grid) new_job = cmj.Job(jobname, pj.parent, self, None, jobdir) new_job.need_host = False new_job.status.set(cmj.JobStatus.SUCCESS) for file in [out_cns_fname, out_raw_fname, out_mae_fname]: new_job.output.add(os.path.abspath(file)) self.add_job(new_job) self._print("debug", "Out MixedSolventAnalysis.crunch")
[docs]class MixedSolventCleanup(cmj.StageBase): NAME = "mixed_solvent_cleanup" TAG = "MIXED_SOLVENT_CLEANUP" PARAM = cmj._create_param_when_needed([ """ DATA = { sigma = 20.0 } VALIDATE = { sigma = {type = float range = [0.0 100.0]} } """ ])
[docs] def crunch(self): self._print("debug", "In MixedSolventCleanup.crunch") # Nothing to do if len(self.get_prejobs()) == 0: self._print("debug", "Out MixedSolventCleanup.crunch") return # Set up output job pj = self.get_prejobs()[0] jobname, jobdir = self._get_jobname_and_dir(pj) if not os.path.isdir(jobdir): os.makedirs(jobdir) base_dir = cmj.ENGINE.base_dir jobname = cmj.ENGINE.jobname checkpoint = os.path.abspath( os.path.join(base_dir, f'{jobname}-multisim_checkpoint')) cmd = [ "run", 'mxmd_cleanup.py', checkpoint, '-sigma', str(self.param.sigma.val) ] job = cmj.Job(jobname, pj.parent, self, None, jobdir) job.need_host = False # Script must be run from the base dir util.chdir(base_dir) # Run the job log_fn = os.path.join(jobdir, 'job.log') with open(log_fn, 'a') as log_fh: ret_code = subprocess.call(cmd, stdout=log_fh, stderr=log_fh) if ret_code == 0: job.status.set(cmj.JobStatus.SUCCESS) else: job.status.set(cmj.JobStatus.BACKEND_ERROR) self._print("quiet", "Error running cleanup stage:") # Display the log, filtering out QT warning log_lines = [] with open(log_fn, 'r') as log_fh: for line in log_fh.readlines(): # This warning is triggered by mxmd_cleanup::write_maestro_project if 'Timers can only be used' in line: continue log_lines.append(line) log = "\n".join(log_lines) self._print("quiet", f'{log}') self.add_job(job) self._print("debug", "Out MixedSolventCleanup.crunch")
[docs] def poststage(self): base_dir = cmj.ENGINE.base_dir # Copy the output files for pattern in ['*.prjzip', '*_results']: for fn in glob.glob(os.path.join(base_dir, pattern)): self._files4copy.append(os.path.abspath(fn))
[docs]class GenerateMxmdBox(cmj.StructureStageBase): """ Solvate cosolvent CT with water to produce an output system with 50% water:cosolvent weight/weight """ NAME = "generate_mxmd_box"
[docs] def run(self, jobname: str, cms_fname: str) -> str: cosolvent_ct = cms.Cms(cms_fname).comp_ct[0] cosolvent_ct.property[ constants.CT_TYPE] = constants.CT_TYPE.VAL.COSOLVENT cosolvent_ct.write('input.mae') # Solvate cosolvent CT with water # `solvate_cosolvent` returns # 1) full system (cosolvent + water) # 2) cosolvent # 3) water cts = msb.solvate_cosolvent(Path('input.mae'), water_model='spc', init_water_buffer=10) full_system, cosolvent_box, water_box = cts cosolvent_ct.property[ constants.CT_TYPE] = constants.CT_TYPE.VAL.COSOLVENT self._print( "quiet", "Deleting waters to match 50% cosolvent/water " "weight/weight") probes_weight = cosolvent_box.total_weight required_nwat = int(probes_weight // 18.02 + 1) msb.shrink_cosolvent_box(cts, required_nwat) self._print( "quiet", "Done generating mixed-solvent box with target " "weight/weight ratio") out_fname = 'shrunk.mae' struc.write_structures(cts, out_fname) return out_fname
[docs]class AnalyzeMxmdProbeMixture(cmj.StageBase): """ Analyze the solvent/cosolvent trajectories to determine miscibility. """ NAME = "analyze_mxmd_probe_mixture" PARAM = cmj._create_param_when_needed([ """ DATA = { title = "Analyze results" jobname = "$MAINJOBNAME" dir = "." compress = "" } """ ])
[docs] def crunch(self): self._print("debug", "In AnalyzeMxmdProbeMixture.crunch") pj = self.get_prejobs()[0] jobname, job_dir = self._get_jobname_and_dir(pj) if not os.path.isdir(job_dir): os.makedirs(job_dir) util.chdir(job_dir) probe_name = jobname.split('_')[0] probe_cmsfn = f'{jobname}_probe-out.cms' probe_water_cmsfn = f'{jobname}_probe-water-out.cms' plot_fname = self._generatePlot(probe_name, probe_water_cmsfn, probe_cmsfn) new_job = cmj.Job(jobname, pj, self, None, job_dir) new_job.output = cmj.JobOutput() new_job.status.set(cmj.JobStatus.SUCCESS) new_job.output.add(os.path.abspath(plot_fname)) self.add_job(new_job) self._print("debug", "Out AnalyzeMxmdProbeMixture.crunch")
def _generatePlot(self, probe_name: str, probe_water_cms_fn: str, probe_cms_fn: str): from schrodinger.application.desmond.packages import analysis from schrodinger.application.desmond.packages import traj_util msys_model_pw, cms_model_pw, tr_pw = traj_util.read_cms_and_traj( probe_water_cms_fn) msys_model_p, cms_model_p, tr_p = traj_util.read_cms_and_traj( probe_cms_fn) nframes_pw, nframes_p = len(tr_pw), len(tr_p) weight = cms_model_p.total_weight volume = np.mean([fr.box[0][0]**3 for fr in tr_p[nframes_p // 2:]]) density = weight * constants.Conversion.AU_TO_KG / volume weight_over_weight = cms_model_pw.comp_ct[0].total_weight / \ cms_model_pw.comp_ct[1].total_weight self._print('quiet', f'{probe_name} w/w ratio: {weight_over_weight:.3f}') self._print('quiet', f'{probe_name} density: {density:.3f}') rmax, dr = 12.0, 0.1 r = np.arange(0, rmax + dr, dr) rdf_pw = analysis.Rdf(msys_model_pw, cms_model_pw, asl0='not water', asl1=None, pos_type0='com', pos_type1=None, rmax=rmax, dr=dr) rdf_p = analysis.Rdf(msys_model_p, cms_model_p, asl0='not water', asl1=None, pos_type0='com', pos_type1=None, rmax=rmax, dr=dr) rdf_wat = analysis.Rdf(msys_model_pw, cms_model_pw, asl0='water and not a.e H', asl1=None, rmax=rmax, dr=dr) rgyr = analysis.Gyradius(msys_model_p, cms_model_p, asl='mol.num 1') gofr_pw, _ = analysis.analyze(tr_pw[nframes_pw // 2:], rdf_pw) gofr_p, _ = analysis.analyze(tr_p[nframes_p // 2:], rdf_p) gofr_wat, _ = analysis.analyze(tr_pw[nframes_pw // 2:], rdf_wat) rad_probe = analysis.analyze(tr_p[nframes_p // 2:], rgyr) rgyr_ave, rgyr_std = np.mean(rad_probe), np.std(rad_probe) self._print( 'quiet', f'{probe_name} Rgyr: {rgyr_ave:.3f} +- {rgyr_std:.3f}') fig = Figure() canvas = FigureCanvasAgg(fig) axes = fig.add_subplot(111) axes.axvline(x=(rgyr_ave**2) * 2, label='Probe diameter', color='red') axes.plot(r, gofr_p, label='Probe - Neat', color='black') axes.plot(r, gofr_pw, label='Probe - with Water', color='black', linestyle='--') axes.plot(r, GOFR_PURE_WATER, label='Water - Neat', color='blue') axes.plot(r, gofr_wat, label='Water - with Probe', color='blue', linestyle='--') axes.set_ylim(0, 5) axes.autoscale(enable=True, axis='x') axes.autoscale_view() fig.suptitle(f'{probe_name}', fontsize=18) axes.set_title( f'W/W ratio: {weight_over_weight:.3f}; density: {density:.3f} ' + r'$gm/cm^3$', fontsize=11) axes.legend() canvas.draw() output_fname = f'{probe_name}.png' fig.savefig(output_fname) return output_fname