Source code for schrodinger.application.desmond.fep_net_charge

import os
import sys
import uuid
from past.utils import old_div

import numpy

import schrodinger.application.desmond.cms as cms
import schrodinger.utils.log as log
from schrodinger.application.desmond import constants
from schrodinger.application.desmond.simulation_block_data import \
    get_listof_props_ene

logger = log.get_output_logger(name="fep_net_charge")

WATER_DIA_CONST = 65.0
TEMPERATURE = 300.0
JOULES_PER_CAL = 4.184

COULOMB_CONST = 138.93545585  # (4*pi*eps_0)**-1 in (kJ nm)/(e**2 mol)
BOLTZMANN_CONST = 0.0083144621  # kJ/(mol K)
UNIT_CUBIC_COULOMB_ENG = 2.380077  #see eq. (21) in paper and accompanying text
CUBIC_LATTICE_SUM_CONST = -2.837297

PROTEIN_RIP = "protein_RIP"
LIG_RIP = "ligand_RIP"
PROTEIN_ELEC_BLOCK = "protein_only"
LIG_ELEC_BLOCK = "ligand_only"

PB_BOX_FACTOR = 1.5
APBS_INPUT = "apbs.in"


[docs]def find_component_cts(cms_sys): protein_ct = None ion = None for ct in cms_sys.comp_ct: if ct.property[constants.CT_TYPE] == constants.CT_TYPE.VAL.SOLUTE: if constants.FEPIO_STAGE in ct.property: if ct.property[constants.FEPIO_STAGE] == 1: ligand0 = ct elif ct.property[constants.FEPIO_STAGE] == 2: ligand1 = ct else: raise AssertionError("unknown fepio_stage") else: protein_ct = ct elif ct.property[constants.CT_TYPE] == constants.CT_TYPE.VAL.SOLVENT: water_box = ct elif ct.property[constants.CT_TYPE] == constants.CT_TYPE.VAL.ION: ion = ct return (protein_ct, ligand0, ligand1, water_box, ion)
[docs]def total_charge(ct): charge = 0.0 charge_sq = 0.0 for (i, site) in enumerate(ct.ffio.site): charge += site.charge charge_sq += site.charge * site.charge return (charge, charge_sq)
[docs]def write_pqr(ct, f, atom_offset=1, zero_charge=False): iatom = 0 res_name = "NA" vir_dict = {} atom_count = len(ct.atom) for t in ct.ffio.virtual: vir_dict[t.ai] = vir_dict.get(t.ai, []) + [t.index + atom_count] for (i, atom) in enumerate(ct.atom): charge = 0.0 if not zero_charge: charge = ct.ffio.site[i + 1].charge for j in vir_dict.get(i + 1, []): charge += ct.ffio.site[j].charge iatom = i + atom_offset line = "ATOM".ljust(6)[:6] line += "%5d" % iatom line += " " tstr = atom.property["s_m_pdb_atom_name"].replace(" ", "") if not tstr: tstr = atom.element + "%d" % atom.index tstr = tstr[:3] line += tstr.ljust(4)[:4] tstr = atom.property["s_m_pdb_residue_name"].replace(" ", "") if tstr: res_name = tstr if not tstr: tstr = res_name if len(tstr) == 4: line += tstr.ljust(4)[:4] else: line += " " + tstr.ljust(3)[:3] line += " " tstr = atom.property["s_m_chain_name"] line += tstr.ljust(1)[:1] tstr = "%d" % atom.property["i_m_residue_number"] line += tstr.rjust(4)[:4] if atom.inscode != "": line += "%s " % atom.inscode else: line += " " xyz_qr = " %-9.3f %-9.3f %-9.3f %7.4f %7.4f\n" % ( atom.x, atom.y, atom.z, charge, atom.radius) line += xyz_qr f.write(line) return iatom + 1
[docs]def write_elec_block(f, block_name, grid_center, mol, pot_dx, glen=120, dime=257): f.write("elec name %s \n mg-manual \n dime %d %d %d\n glen %d %d %d\n" % \ (block_name, dime, dime, dime, glen, glen, glen)) f.write(" gcent mol %d\n mol %d\n lpbe\n bcfl mdh\n pdie 1.0\n sdie %f\n chgm spl4\n" \ % (grid_center, mol, WATER_DIA_CONST)) f.write(" srfm smol\n srad 1.4\n swin 0.3\n sdens 40.0\n temp 300.0\n") f.write(" calcenergy no\n calcforce no\n write pot dx %s\nend\n" % pot_dx)
[docs]def write_apbs_input_with_protein(prolig_pgr, ligpro_pqr, ligonly_pqr, box_length, prefix=""): filename = prefix + APBS_INPUT if box_length > 150: logger.warning("box size might be too big for apbs calculations %f" % box_length) with open(filename, "w") as f: f.write("read\n mol pqr %s\n mol pqr %s\n mol pqr %s\nend\n" % (prolig_pgr, ligpro_pqr, ligonly_pqr)) write_elec_block(f=f, block_name=PROTEIN_ELEC_BLOCK, grid_center=3, mol=1, pot_dx=prefix + PROTEIN_RIP, glen=box_length * PB_BOX_FACTOR) write_elec_block(f=f, block_name=LIG_ELEC_BLOCK, grid_center=3, mol=2, pot_dx=prefix + LIG_RIP, glen=box_length * PB_BOX_FACTOR) return filename, prefix + PROTEIN_RIP + ".dx", prefix + LIG_RIP + ".dx"
[docs]def write_apbs_input_with_ligand(ligonly_pqr, box_length, prefix=""): filename = prefix + APBS_INPUT if box_length > 150: logger.warning("box size might be too big for apbs calculations %f" % box_length) with open(filename, "w") as f: f.write("read\n mol pqr %s\nend\n" % (ligonly_pqr)) write_elec_block(f=f, block_name=LIG_ELEC_BLOCK, grid_center=1, mol=1, pot_dx=prefix + LIG_RIP, glen=box_length * PB_BOX_FACTOR) return filename, prefix + LIG_RIP + ".dx"
[docs]def run_apbs(input, p_charge, protein_ct, lig_charge, ligand_ct, \ box_length, box_volume, has_receptor, prefix, ion_charge): filenames = write_pqrs(box_length, ligand_ct, protein_ct, prefix=prefix) import subprocess cmd = [ os.path.join(os.environ['SCHRODINGER'], "utilities", "apbs"), str(input) ] with open(input.replace(".in", ".log"), "w") as log_fh: apbs_proc = subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=log_fh, universal_newlines=True) null, stderr = apbs_proc.communicate() val = apbs_proc.returncode rip = 0.0 if not val: rip = compute_rip_correction(box_volume, p_charge, lig_charge, ion_charge, has_protein=has_receptor, prefix=prefix) try: os.remove(input.replace(".in", ".log")) os.remove("io.mc") for i in filenames: os.remove(i) except: pass return val, stderr, rip
[docs]def calculate_RIP(dxfile, netq): eps = WATER_DIA_CONST T = TEMPERATURE #temperature in K xi_CB = UNIT_CUBIC_COULOMB_ENG #see eq. (21) in paper and accompanying text coulomb_factor = COULOMB_CONST # (4*pi*eps_0)**-1 in (kJ nm)/(e**2 mol) kB = BOLTZMANN_CONST # kJ/(mol K) kBT = kB * T #read the APBS potential map with open(dxfile) as fh: lines = fh.readlines() #extract information from the dx file # #gap: the space between grid points, determined based on the x axis. #this assumes the grid is evenly spaced in all directions. gap = float(lines[6].split()[1]) #eliminate headers and other non-data lines from dx file lines = [x for x in lines if x[0] in '0123456789-.'] #initialize variables to store the total potential #and the number of grid points tot_pot = 0.0 numpts = 0 #add the potential on every line #and add that number of grid points for line in lines: tot_pot += sum([float(x) for x in line.split()]) numpts += len(line.split()) #calculate the volume in nm^3 V = ((old_div((gap**3), 1000.0)) * numpts) #convert the total potential to kJ nm^3 / (mol e) from kBT (gap^2) / (mol e) #label this as B for box integral, X because it refers to the system X. #See eq. (19) in paper. B_X = tot_pot * kBT * V / numpts #calculate the expected total potential based on the net charge #label this as B for box integral, QX because it refers to net charge Q (same Q as system X) #See eq. (20) in paper. B_QX = ((netq * xi_CB * coulomb_factor / eps * (V**(old_div(2.0, 3.0))))) #calculate the RIP #See Eq. (18) in paper. RIP = 1000.0 * (B_X - B_QX) / JOULES_PER_CAL return RIP
[docs]def compute_rip_correction(box_volume, p_charge, lig_charge, ion_charge, has_protein=True, prefix=""): I_protein = 0.0 if has_protein: I_protein = calculate_RIP(prefix + PROTEIN_RIP + ".dx", p_charge) I_lig = calculate_RIP(prefix + LIG_RIP + ".dx", lig_charge) return (I_protein + I_lig) * (p_charge + lig_charge + ion_charge) / box_volume
[docs]def compute_net_charge_correction(q_tot, box_length): charge_sq = q_tot * q_tot unit = 10.0 * CUBIC_LATTICE_SUM_CONST * 0.5 * COULOMB_CONST / WATER_DIA_CONST / JOULES_PER_CAL return -charge_sq * unit / box_length
[docs]def dis_solvent_correction(water_dens, quadral_pol, tot_charge): return -tot_charge * 10.0 * COULOMB_CONST * water_dens * quadral_pol * 4.0 * 3.1415926 / 6.0 / JOULES_PER_CAL
[docs]def write_pqrs(box_length, ligand_ct, protein_ct, prefix=""): files = [] ligonly_pqr = prefix + "ligonly.pqr" with open(ligonly_pqr, "w") as f: atoms_written = write_pqr(ligand_ct, f) files.append(ligonly_pqr) filenames = () prolig_pqr = prefix + "prolig.pqr" ligpro_pqr = prefix + "ligpro.pqr" if protein_ct: with open(prolig_pqr, "w") as f: atoms_written = write_pqr(protein_ct, f) atoms_written = write_pqr(ligand_ct, f, atom_offset=atoms_written, zero_charge=True) files.append(prolig_pqr) #zero charge protein + lig pqr with open(ligpro_pqr, "w") as f: atoms_written = write_pqr(protein_ct, f, zero_charge=True) atoms_written = write_pqr(ligand_ct, f, atom_offset=atoms_written) files.append(ligpro_pqr) filenames = write_apbs_input_with_protein(prolig_pqr, ligpro_pqr, ligonly_pqr, box_length, prefix=prefix) else: filenames = write_apbs_input_with_ligand(ligonly_pqr, box_length, prefix=prefix) for i in filenames: files.append(i) return files
[docs]def calc_volume(enefile): line_num, prop_list = get_listof_props_ene(enefile) icol = prop_list.index("V(A^3)") vol = [] with open(enefile) as fh: for line in fh.readlines(): line = line.strip() if line and line[0] != '#': data = line.split() vol.append(float(data[icol])) return old_div(sum(vol), len(vol))
[docs]def compute_net_charge_corrections(cmsfile, enefile, is_lambda0=True): """ cmsfile: cms file name enefile: ene file name is_lambda0: booline return [net_charge_correction, discreet solvent correction, RIP correction] """ prefix = "%s" % uuid.uuid4() cms_sys = cms.Cms(cmsfile) box_volume = calc_volume(enefile) box_length = box_volume**(old_div(1.0, 3.0)) protein_ct = None ligand0 = None ligand1 = None water_box = None ion = None (protein_ct, ligand0, ligand1, water_box, ion) = find_component_cts(cms_sys) water_density = old_div(water_box.mol_total, box_volume) oxygen_coord = numpy.array(water_box.atom[1].xyz) h1_coord = numpy.array(water_box.atom[2].xyz) h2_coord = numpy.array(water_box.atom[3].xyz) water_quadrapol = numpy.sum((h1_coord - oxygen_coord)**2) * water_box.ffio.site[2].charge + \ numpy.sum((h2_coord - oxygen_coord)**2) * water_box.ffio.site[3].charge p_charge = 0 p_charge_sq = 0 cntion_charge = 0 cntion_charge_sq = 0 l0_charge = 0 l0_charge_sq = 0 l1_charge = 0 l1_charge_sq = 0 if protein_ct: (p_charge, p_charge_sq) = total_charge(protein_ct) (l0_charge, l0_charge_sq) = total_charge(ligand0) (l1_charge, l1_charge_sq) = total_charge(ligand1) if ion: (cntion_charge, cntion_charge_sq) = total_charge(ion) if is_lambda0: useligand = ligand0 lig_charge = l0_charge else: useligand = ligand1 lig_charge = l1_charge has_receptor = False if protein_ct: has_receptor = True tot_charge = p_charge + lig_charge + cntion_charge logger.debug("Total protein charge %f" % p_charge) logger.debug("Total ligand0 charge %f" % l0_charge) logger.debug("Total ligand1 charge %f" % l1_charge) logger.debug("Using ligand %d" % (not is_lambda0)) logger.debug("Total ligand charge %f" % lig_charge) logger.debug("Total counter-ion charge %f" % cntion_charge) logger.debug("Total system charge %f" % tot_charge) logger.debug("Box Length %f" % box_length) logger.debug("Water Density %f" % water_density) logger.debug("Water Q-trace %f" % water_quadrapol) (rval, err, rip) = run_apbs(prefix + APBS_INPUT, p_charge, protein_ct, lig_charge, useligand, \ box_length, box_volume, has_receptor, prefix, cntion_charge) if rval: raise RuntimeError("APBS did not return zero: %s", err) net_chg_corr = compute_net_charge_correction(tot_charge, box_length) dis_sol_corr = dis_solvent_correction(water_density, water_quadrapol, tot_charge) return numpy.array((net_chg_corr, dis_sol_corr, rip))
[docs]def cleanup(topdir=None): """ Cleanup function to be run atexit """ #Garbage collect objects in case we hit the cancel button and htere are #any open Structure{Reader,Writer} objects, which will run fh.close() in #the __del__ method. import gc gc.collect() #See if we are running inside of maestro if topdir: import shutil shutil.rmtree(topdir)
if __name__ == '__main__': import atexit import tempfile from optparse import OptionParser usage = "$SCHRODINGER/run fep_net_charge.py [-i] input_cms input_ene" logger.setLevel(log.DEBUG) parser = OptionParser(usage) parser.add_option("-i", "--is_lambda0", action="store_false", dest="is_lambda0", default=True, help="set it to false if it is lambda 1") opt, args = parser.parse_args(sys.argv[1:]) if len(args) != 2: print(usage) sys.exit(1) topdir = tempfile.mkdtemp(prefix="fep_net_charge") atexit.register(cleanup, topdir=topdir) cwd = os.getcwd() os.chdir(topdir) input_cms = os.path.join(cwd, args[0]) input_ene = os.path.join(cwd, args[1]) correction = compute_net_charge_corrections(input_cms, input_ene, opt.is_lambda0) print("Correction terms:") print("[net charge corr, discrete solv corr, RIP]") print(correction) print("Sum of correction terms:", numpy.sum(correction)) os.chdir(cwd)