Source code for schrodinger.application.desmond.mxmd.mxmd_system_builder

"""Script to setup cosolvent system"""

import os
from pathlib import Path
from typing import List
from typing import Optional
from typing import Tuple
from functools import cached_property

import numpy as np

import schrodinger.application.desmond.system_builder_inp as sb_inp
from schrodinger import structure
from schrodinger.application.desmond import constants
from schrodinger.application.desmond import envir
from schrodinger.application.desmond import struc
from schrodinger.application.desmond.cms import get_box
from schrodinger.job import jobcontrol
from schrodinger.structure import Structure
from schrodinger.structutils.analyze import evaluate_asl
from schrodinger.utils import fileutils
from schrodinger.utils import log

logger = log.get_output_logger(name="mixed_solvent")

WATER_MOLECULAR_MASS = 18.01528
BIG_BOX_BUFFER = 15.0
SHRINK_BOX_BY = 0.01
BOX_DATA = Path(envir.CONST.MMSHARE_SYSTEM_BUILDER_DATA_DIR) / 'mxmd'
DEFAULT_PROBES_STR = "acetonitrile,isopropanol,pyrimidine"
SYS_BUILDER = os.path.join(envir.CONST.SCHRODINGER_UTILITIES, "system_builder")
BUILTIN_PROBE_NAMES = [
    box_fname.replace('.box.mae', '') for box_fname in os.listdir(BOX_DATA)
]


[docs]class Cosolvent:
[docs] def __init__(self, filename: str): """ `filename` should follow the pattern `<probe_name>.box.mae` """ self._filename = filename self.name = Path(filename).name[:-8] # strip off .box.mae
@cached_property def box_strucs(self) -> List[Structure]: return list(structure.StructureReader(self._filename)) @property def _ct0(self): return self.box_strucs[0] @cached_property def probe_pdbres(self): res = next(iter(self._ct0.residue)) return res.pdbres @cached_property def density(self) -> float: """ Return average density of the box in g/cm^3 """ # FIXME: AU_TO_KG is a misleading name # but the return value has the correct units of g/cm^3. mass_kg = self._ct0.total_weight * constants.Conversion.AU_TO_KG all_boxes = [ np.array(get_box(ct)).reshape((3, 3)) for ct in self.box_strucs ] vol = np.array([np.linalg.det(box) for box in all_boxes]) return np.mean(mass_kg / vol) @cached_property def probe_nheavy(self) -> int: """ Return number of heavy atoms per probe molecule """ return len( [a for a in self._ct0.molecule[1].atom if a.atomic_number > 1]) @cached_property def probe_mass(self) -> float: """ Return molecular weight of the probe """ mol = self._ct0.molecule[1] return sum([a.atomic_weight for a in mol.atom]) @property def filename(self): return self._filename
[docs]def get_probe_paths(probe_names: List[str], custom_probe_dir: Optional[str]=None) \ -> Tuple[List[str], List[str], List[str]]: """ Process the given probe names and return paths to their .box.mae file. Additionally return any missing probes. :return: A 3-tuple of two lists containing probe paths for custom probe and builtin probes and a list containing probe names with missing files. """ custom = [] builtin = [] missing_probes = [] for probe_name in probe_names: if custom_probe_dir: probe_path = _get_probe_path(probe_name, custom_probe_dir) if Path(probe_path).exists(): custom.append(probe_path) continue builtin_probe_name = _get_probe_path(probe_name) if Path(builtin_probe_name).exists(): builtin.append(builtin_probe_name) continue missing_probes.append(probe_name) return custom, builtin, missing_probes
def _get_probe_path(probe_name: str, probe_dir: str = BOX_DATA): """ Get a path to the given probe's solvent box structure file """ return os.path.join(probe_dir, f"{probe_name}.box.mae")
[docs]def convert_vv2mol(probe: Cosolvent, target_vv_ratio: float = 5.0) -> int: ''' This function reports the number of water molecules required to maintain the required volume/volume ratio for each molecules of the probe Assumes that density is provided in gm/cm3 molecular mass also should be specified in gm/mol :param probe_name: Name of the cosolvent used to build mixed-solvent box :param target_vv_ratio: Volume over volume ratio in percent ''' return int((100.0 - target_vv_ratio) / WATER_MOLECULAR_MASS) / ( (target_vv_ratio * probe.density) / probe.probe_mass)
[docs]class CosolventBoxGenerator: """ Example:: gen = CosolventBoxGenerator( inp_fname, 'probe_directory/acetonitrile.box.mae', 1, cosolvent_layer_size=5.0, cosolvent_volume_ratio=2.0) gen.generate(out_fname) """
[docs] def __init__(self, solute_fname: str, probe_path: str, box_number: int, init_water_buffer: float = BIG_BOX_BUFFER, cosolvent_layer_size: float = 7.0, cosolvent_volume_ratio: float = 5.0, cosolvent_vdw_scaling: Optional[float] = None, water='SPC'): self._solute_fname = Path(solute_fname).absolute() self._box_number = box_number self._init_water_buffer = init_water_buffer self._water_model = water self._vdw_scaling = cosolvent_vdw_scaling self._layer_size = cosolvent_layer_size self._volume_ratio = cosolvent_volume_ratio self._probe = Cosolvent(probe_path) self._probe_name = self._probe.name
[docs] def generate(self, fname: str): mx_strucs = self._generate_cosolvent_box() if not mx_strucs: return self._shrink_water_box(mx_strucs, self._volume_ratio) with structure.StructureWriter(fname) as ct_writer: for ct in mx_strucs: ct_writer.append(ct)
def _generate_cosolvent_box(self) -> List[Structure]: ''' Creates solvated mixed solvent system in three steps: 1) Solvate the protein with cosolvent, and extract a layer of cosolvents around the protein 2) Solvate the protein-cosolvent system with specified water. Intentionally use a larger box so that it can laber be shrunk to match the targeted volume over volume (v/v) ratio. 3) Extract cosolvent from solute CT into a separate 'cosolvent' CT. ''' with fileutils.in_temporary_directory(): logger.info(f'Temp dir set to: {os.getcwd()}') layered_box_fname = self._make_layered_box() layered_shell_fname = self._make_layered_shell(layered_box_fname) mx_strucs = solvate_cosolvent(layered_shell_fname, self._water_model.lower(), self._init_water_buffer) self._split_cosolvent_from_solute(mx_strucs) return mx_strucs def _make_layered_box(self) -> Path: layered_box_fname = Path('layered_box.mae') csb_fn = 'create_cosolvent_box.csb' cosolvent_struc = self._probe.box_strucs[self._box_number] cosolvent_box_fname = 'cosolvent_box.mae' cosolvent_struc.write(cosolvent_box_fname) # backup reference coordinates for solute solute_ct = Structure.read(self._solute_fname) struc.set_ct_reference_coordinates(solute_ct) out_solute_fname = 'out_solute.mae' solute_ct.write(out_solute_fname) system = sb_inp.SystemBuilderInput() system.setSolute(out_solute_fname) # move non zero-ordered water molecules from solute to solvent CT system.treatSolventFromSolute() cosolvent_vdw_scaling = (self._vdw_scaling or 1.0 - (self._probe.probe_nheavy * 0.07) ) # yapf: disabled system.setVdwScalingFactor(cosolvent_vdw_scaling) system.setSolvent(cosolvent_box_fname) system.setNeutralize(0) system.setSoluteAlignment('rezero') system.setWriteMaeFile(str(layered_box_fname)) system.setBoundaryCondition(use_buffer=1, boundary_condition='cubic', a=self._layer_size) system.write(csb_fn) cmd = [SYS_BUILDER, csb_fn] logger.info( f'Creating a box of {self._probe_name} molecules around the ' f'protein, with vdw scaling: {cosolvent_vdw_scaling:.3f}') job = jobcontrol.launch_job(cmd, print_output=True, launch_dir='.') job.wait() return layered_box_fname def _make_layered_shell(self, layered_box_fname: Path) -> Path: full_system_ct = structure.Structure.read(layered_box_fname, index=1) layered_shell_fn = Path('layered_shell_raw.mae') logger.info( f"Converting the cosolvent box " f"({self._probe.filename}, " f"index={self._box_number}) to a layer of cosolvent molecules around " f"the protein.") # Keep cosolvents within a radius and delete xtal waters full_system_ct.deleteAtoms( evaluate_asl( full_system_ct, f'(fillmol beyond {self._layer_size} (protein or nucleic_acids)) ' 'or atom.i_desmond_crystal_water 1')) full_system_ct.title = structure.Structure.read( self._solute_fname).title full_system_ct.write(layered_shell_fn) logger.info(f'Written {layered_shell_fn}') return layered_shell_fn def _split_cosolvent_from_solute(self, mx_strucs: List[Structure]) -> None: ''' Split cosolvent molecules from solute CT into a seaparate CT. This CT is then inserted before solvent CT. Input `mx_strucs` will be mutated. :param mx_strucs: List of structures that assumes the same order as in Cms file structure (fullsystem, solute, solvent CTs) :param probe_name: name of the cosolvent used to build mixed-solvent box ''' probe_name = self._probe_name SOLUTE_CT_IDX = 1 cosol_atom_indices = evaluate_asl( mx_strucs[1], f'res.ptype "{self._probe.probe_pdbres}"') if not cosol_atom_indices: mx_strucs.clear() return cosolvent_ct = mx_strucs[1].extract(cosol_atom_indices) mx_strucs[SOLUTE_CT_IDX].deleteAtoms(cosol_atom_indices) cosolvent_ct.property[ constants.CT_TYPE] = constants.CT_TYPE.VAL.COSOLVENT cosolvent_ct.property[constants.NUM_COMPONENT] = 1 cosolvent_ct.title = f'Cosolvent - {probe_name}' cosolvent_ct.property[constants.MXMD_COSOLVENT_PROBE] = probe_name for prop in constants.SIM_BOX: cosolvent_ct.property[prop] = mx_strucs[SOLUTE_CT_IDX].property[ prop] for ires, res in enumerate(cosolvent_ct.residue, start=1): res.resnum = ires # Insert cosolvent CT to be before the solvent CT mx_strucs.insert(-1, cosolvent_ct) def _shrink_water_box(self, mx_strucs: List[Structure], target_vv_ratio: float) -> None: """ Shrink the water box to match volume-over-volume ratio :param mx_strucs: List of structures that assumes the same order as in Cms file structure (fullsystem, solute, solvent CTs) :param target_vv_ratio: Volume over volume ratio in percent """ cosol_ct_idx, wat_ct_idx = -2, -1 num_cosolvents = mx_strucs[cosol_ct_idx].mol_total nwat = mx_strucs[wat_ct_idx].mol_total required_nwat = int(num_cosolvents * convert_vv2mol(self._probe, target_vv_ratio)) logger.info( 'Shrinking the water box to match volume over volume ratio of ' f'{target_vv_ratio}%. Current box size is {self._init_water_buffer}' ) shrink_cosolvent_box(mx_strucs, required_nwat) logger.info( 'Done generating mixed-solvent box with targeted volume/volume ratio.' )
[docs]def solvate_cosolvent( inp_mae: Path, water_model: str = 'spc', init_water_buffer=BIG_BOX_BUFFER) -> List[structure.Structure]: """ Solvate the cosolvent ct with water :param inp_mae: The path to the input mae. This is a system containing the cosolvent. :param job_dir: A path to the directory to run the job in :param water_model: The water type to use as the solvent :param init_water_buffer: The initial size of water box :return: Three structures from system_builder output: the fullsystem, solute, and solvent """ # Solvate Protein-cosolvent CT with water solvated_fn = 'solvated.mae' csb_fn = 'create_water_box.csb' inp = sb_inp.SystemBuilderInput() inp.setSolute(str(inp_mae)) inp.setSolvent(f"{water_model}.box.mae") inp.setNeutralize(0) inp.setWriteMaeFile(solvated_fn) inp.setBoundaryCondition(use_buffer=1, boundary_condition='cubic', a=init_water_buffer) inp.write(csb_fn) cmd = [SYS_BUILDER, csb_fn] logger.info("Creating a solvated mixed-solvent system") job = jobcontrol.launch_job(cmd, print_output=True) job.wait() logger.info(f"Mixed-solvent ({solvated_fn}) system created.") return list(structure.StructureReader(solvated_fn))
[docs]def shrink_cosolvent_box(cts: List[structure.Structure], required_nwat: int): """ Shrink the system box so that it contains a given number of water molecules. :param cts: The input structures in the order of: [full system, non-solvent ct..., cosolvent ct, water ct]. These cts are modified in place. :param required_nwat: How many waters should remain after shrinking system """ fsys_ct, *non_water_cts, water_ct = cts # delete excess waters, to match 50% weight/weight ratio nwat = water_ct.mol_total logger.info(f'Required number of water molecules is {required_nwat} for ' f'{non_water_cts[-1].mol_total} cosolvent molecules. ' f'Current system contains {nwat} waters.') if required_nwat > nwat: raise ValueError("Number of water molecules smaller than required. " "Cannot shrink system") half_box = water_ct.property[constants.SIM_BOX[0]] / 2.0 noh_wat_aid = evaluate_asl(water_ct, "water and not a.e H") noh_wat_idx = [i - 1 for i in noh_wat_aid] xyz = np.abs(water_ct.getXYZ()[noh_wat_idx]) while nwat > required_nwat: nwat = len(xyz[np.max(xyz, axis=1) <= half_box]) half_box -= SHRINK_BOX_BY # add half of SHRINK_BOX_BY for a guess closer to the target half_box += SHRINK_BOX_BY / 2.0 logger.info('Cubic box length will be shrunk from ' f'{water_ct.property[constants.SIM_BOX[0]]:.3f} to ' f'{(half_box * 2):.3f} Angstroms, with {nwat} water molecules.') # remove water molecues from solvent and full_system CTs wat_atom_idx_to_delete = [] for mol in water_ct.molecule: if np.max(np.abs(mol.atom[1].xyz)) > half_box: wat_atom_idx_to_delete += mol.getAtomIndices() water_ct.deleteAtoms(wat_atom_idx_to_delete) # offset atom indices number of atoms prior to solvent block offset = sum([ct.atom_total for ct in non_water_cts]) fsys_ct.deleteAtoms([idx + offset for idx in wat_atom_idx_to_delete]) # update box values for ax, by, cz, of a cubic box for ct in cts: for prop in constants.SIM_BOX_DIAGONAL: ct.property[prop] = half_box * 2
if __name__ == '__main__': # pragma: no cover # For quick test. import sys in_fname, out_fname, cosolvent_layer_size = sys.argv[1:4] cosolvent_layer_size = float(cosolvent_layer_size) gen = CosolventBoxGenerator(in_fname, 'acetonitrile', 1, cosolvent_layer_size=cosolvent_layer_size, cosolvent_vdw_scaling=1.0) gen.generate(out_fname)