Source code for schrodinger.application.desmond.fep_net_charge

import os
import sys
import uuid
from past.utils import old_div

import numpy

import schrodinger.application.desmond.cms as cms
import schrodinger.utils.log as log
from schrodinger.application.desmond import constants
from schrodinger.application.desmond.simulation_block_data import \
    get_listof_props_ene

logger = log.get_output_logger(name="fep_net_charge")

WATER_DIA_CONST = 65.0
TEMPERATURE = 300.0
JOULES_PER_CAL = 4.184

COULOMB_CONST = 138.93545585  # (4*pi*eps_0)**-1 in (kJ nm)/(e**2 mol)
BOLTZMANN_CONST = 0.0083144621  # kJ/(mol K)
UNIT_CUBIC_COULOMB_ENG = 2.380077  #see eq. (21) in paper and accompanying text
CUBIC_LATTICE_SUM_CONST = -2.837297

PROTEIN_RIP = "protein_RIP"
LIG_RIP = "ligand_RIP"
PROTEIN_ELEC_BLOCK = "protein_only"
LIG_ELEC_BLOCK = "ligand_only"

PB_BOX_FACTOR = 1.5
APBS_INPUT = "apbs.in"


[docs]def find_component_cts(cms_sys):
    protein_ct = None
    ion = None
    for ct in cms_sys.comp_ct:
        if ct.property[constants.CT_TYPE] == constants.CT_TYPE.VAL.SOLUTE:
            if constants.FEPIO_STAGE in ct.property:
                if ct.property[constants.FEPIO_STAGE] == 1:
                    ligand0 = ct
                elif ct.property[constants.FEPIO_STAGE] == 2:
                    ligand1 = ct
                else:
                    raise AssertionError("unknown fepio_stage")
            else:
                protein_ct = ct
        elif ct.property[constants.CT_TYPE] == constants.CT_TYPE.VAL.SOLVENT:
            water_box = ct
        elif ct.property[constants.CT_TYPE] == constants.CT_TYPE.VAL.ION:
            ion = ct
    return (protein_ct, ligand0, ligand1, water_box, ion)


[docs]def total_charge(ct):
    charge = 0.0
    charge_sq = 0.0
    for (i, site) in enumerate(ct.ffio.site):
        charge += site.charge
        charge_sq += site.charge * site.charge
    return (charge, charge_sq)


[docs]def write_pqr(ct, f, atom_offset=1, zero_charge=False):
    iatom = 0
    res_name = "NA"
    vir_dict = {}
    atom_count = len(ct.atom)
    for t in ct.ffio.virtual:
        vir_dict[t.ai] = vir_dict.get(t.ai, []) + [t.index + atom_count]
    for (i, atom) in enumerate(ct.atom):
        charge = 0.0
        if not zero_charge:
            charge = ct.ffio.site[i + 1].charge
            for j in vir_dict.get(i + 1, []):
                charge += ct.ffio.site[j].charge
        iatom = i + atom_offset
        line = "ATOM".ljust(6)[:6]
        line += "%5d" % iatom
        line += " "
        tstr = atom.property["s_m_pdb_atom_name"].replace(" ", "")
        if not tstr:
            tstr = atom.element + "%d" % atom.index
        tstr = tstr[:3]
        line += tstr.ljust(4)[:4]
        tstr = atom.property["s_m_pdb_residue_name"].replace(" ", "")
        if tstr:
            res_name = tstr
        if not tstr:
            tstr = res_name
        if len(tstr) == 4:
            line += tstr.ljust(4)[:4]
        else:
            line += " " + tstr.ljust(3)[:3]
        line += " "

        tstr = atom.property["s_m_chain_name"]
        line += tstr.ljust(1)[:1]

        tstr = "%d" % atom.property["i_m_residue_number"]

        line += tstr.rjust(4)[:4]
        if atom.inscode != "":
            line += "%s   " % atom.inscode
        else:
            line += "    "
        xyz_qr = "  %-9.3f %-9.3f %-9.3f %7.4f %7.4f\n" % (
            atom.x, atom.y, atom.z, charge, atom.radius)
        line += xyz_qr
        f.write(line)

    return iatom + 1


[docs]def write_elec_block(f,
                     block_name,
                     grid_center,
                     mol,
                     pot_dx,
                     glen=120,
                     dime=257):
    f.write("elec name %s \n mg-manual \n dime %d %d %d\n glen %d %d %d\n" % \
            (block_name, dime, dime, dime, glen, glen, glen))
    f.write(" gcent mol %d\n mol %d\n lpbe\n bcfl mdh\n pdie 1.0\n sdie %f\n chgm spl4\n" \
            % (grid_center, mol, WATER_DIA_CONST))
    f.write(" srfm smol\n srad 1.4\n swin 0.3\n sdens 40.0\n temp 300.0\n")
    f.write(" calcenergy no\n calcforce no\n write pot dx %s\nend\n" % pot_dx)


[docs]def write_apbs_input_with_protein(prolig_pgr,
                                  ligpro_pqr,
                                  ligonly_pqr,
                                  box_length,
                                  prefix=""):
    filename = prefix + APBS_INPUT

    if box_length > 150:
        logger.warning("box size might be too big for apbs calculations %f" %
                       box_length)

    with open(filename, "w") as f:
        f.write("read\n mol pqr %s\n mol pqr %s\n mol pqr %s\nend\n" %
                (prolig_pgr, ligpro_pqr, ligonly_pqr))
        write_elec_block(f=f,
                         block_name=PROTEIN_ELEC_BLOCK,
                         grid_center=3,
                         mol=1,
                         pot_dx=prefix + PROTEIN_RIP,
                         glen=box_length * PB_BOX_FACTOR)
        write_elec_block(f=f,
                         block_name=LIG_ELEC_BLOCK,
                         grid_center=3,
                         mol=2,
                         pot_dx=prefix + LIG_RIP,
                         glen=box_length * PB_BOX_FACTOR)
    return filename, prefix + PROTEIN_RIP + ".dx", prefix + LIG_RIP + ".dx"


[docs]def write_apbs_input_with_ligand(ligonly_pqr, box_length, prefix=""):
    filename = prefix + APBS_INPUT

    if box_length > 150:
        logger.warning("box size might be too big for apbs calculations %f" %
                       box_length)

    with open(filename, "w") as f:
        f.write("read\n mol pqr %s\nend\n" % (ligonly_pqr))
        write_elec_block(f=f,
                         block_name=LIG_ELEC_BLOCK,
                         grid_center=1,
                         mol=1,
                         pot_dx=prefix + LIG_RIP,
                         glen=box_length * PB_BOX_FACTOR)
    return filename, prefix + LIG_RIP + ".dx"


[docs]def run_apbs(input, p_charge, protein_ct, lig_charge, ligand_ct, \
             box_length, box_volume, has_receptor, prefix, ion_charge):

    filenames = write_pqrs(box_length, ligand_ct, protein_ct, prefix=prefix)

    import subprocess
    cmd = [
        os.path.join(os.environ['SCHRODINGER'], "utilities", "apbs"),
        str(input)
    ]
    with open(input.replace(".in", ".log"), "w") as log_fh:
        apbs_proc = subprocess.Popen(cmd,
                                     stderr=subprocess.STDOUT,
                                     stdout=log_fh,
                                     universal_newlines=True)
        null, stderr = apbs_proc.communicate()
    val = apbs_proc.returncode
    rip = 0.0
    if not val:
        rip = compute_rip_correction(box_volume,
                                     p_charge,
                                     lig_charge,
                                     ion_charge,
                                     has_protein=has_receptor,
                                     prefix=prefix)
        try:
            os.remove(input.replace(".in", ".log"))
            os.remove("io.mc")
            for i in filenames:
                os.remove(i)
        except:
            pass
    return val, stderr, rip


[docs]def calculate_RIP(dxfile, netq):
    eps = WATER_DIA_CONST
    T = TEMPERATURE  #temperature in K
    xi_CB = UNIT_CUBIC_COULOMB_ENG  #see eq. (21) in paper and accompanying text
    coulomb_factor = COULOMB_CONST  # (4*pi*eps_0)**-1 in (kJ nm)/(e**2 mol)
    kB = BOLTZMANN_CONST  # kJ/(mol K)
    kBT = kB * T

    #read the APBS potential map
    with open(dxfile) as fh:
        lines = fh.readlines()

    #extract information from the dx file
    #
    #gap: the space between grid points, determined based on the x axis.
    #this assumes the grid is evenly spaced in all directions.
    gap = float(lines[6].split()[1])

    #eliminate headers and other non-data lines from dx file
    lines = [x for x in lines if x[0] in '0123456789-.']

    #initialize variables to store the total potential
    #and the number of grid points
    tot_pot = 0.0
    numpts = 0
    #add the potential on every line
    #and add that number of grid points
    for line in lines:
        tot_pot += sum([float(x) for x in line.split()])
        numpts += len(line.split())

    #calculate the volume in nm^3
    V = ((old_div((gap**3), 1000.0)) * numpts)

    #convert the total potential to kJ nm^3 / (mol e) from kBT (gap^2) / (mol e)
    #label this as B for box integral, X because it refers to the system X.
    #See eq. (19) in paper.
    B_X = tot_pot * kBT * V / numpts

    #calculate the expected total potential based on the net charge
    #label this as B for box integral, QX because it refers to net charge Q (same Q as system X)
    #See eq. (20) in paper.

    B_QX = ((netq * xi_CB * coulomb_factor / eps * (V**(old_div(2.0, 3.0)))))

    #calculate the RIP
    #See Eq. (18) in paper.
    RIP = 1000.0 * (B_X - B_QX) / JOULES_PER_CAL

    return RIP


[docs]def compute_rip_correction(box_volume,
                           p_charge,
                           lig_charge,
                           ion_charge,
                           has_protein=True,
                           prefix=""):
    I_protein = 0.0
    if has_protein:
        I_protein = calculate_RIP(prefix + PROTEIN_RIP + ".dx", p_charge)
    I_lig = calculate_RIP(prefix + LIG_RIP + ".dx", lig_charge)

    return (I_protein + I_lig) * (p_charge + lig_charge +
                                  ion_charge) / box_volume


[docs]def compute_net_charge_correction(q_tot, box_length):
    charge_sq = q_tot * q_tot
    unit = 10.0 * CUBIC_LATTICE_SUM_CONST * 0.5 * COULOMB_CONST / WATER_DIA_CONST / JOULES_PER_CAL
    return -charge_sq * unit / box_length


[docs]def dis_solvent_correction(water_dens, quadral_pol, tot_charge):
    return -tot_charge * 10.0 * COULOMB_CONST * water_dens * quadral_pol * 4.0 * 3.1415926 / 6.0 / JOULES_PER_CAL


[docs]def write_pqrs(box_length, ligand_ct, protein_ct, prefix=""):

    files = []
    ligonly_pqr = prefix + "ligonly.pqr"
    with open(ligonly_pqr, "w") as f:
        atoms_written = write_pqr(ligand_ct, f)

    files.append(ligonly_pqr)

    filenames = ()
    prolig_pqr = prefix + "prolig.pqr"
    ligpro_pqr = prefix + "ligpro.pqr"
    if protein_ct:
        with open(prolig_pqr, "w") as f:
            atoms_written = write_pqr(protein_ct, f)
            atoms_written = write_pqr(ligand_ct,
                                      f,
                                      atom_offset=atoms_written,
                                      zero_charge=True)
        files.append(prolig_pqr)

        #zero charge protein + lig pqr
        with open(ligpro_pqr, "w") as f:
            atoms_written = write_pqr(protein_ct, f, zero_charge=True)
            atoms_written = write_pqr(ligand_ct, f, atom_offset=atoms_written)
        files.append(ligpro_pqr)

        filenames = write_apbs_input_with_protein(prolig_pqr,
                                                  ligpro_pqr,
                                                  ligonly_pqr,
                                                  box_length,
                                                  prefix=prefix)
    else:
        filenames = write_apbs_input_with_ligand(ligonly_pqr,
                                                 box_length,
                                                 prefix=prefix)

    for i in filenames:
        files.append(i)
    return files


[docs]def calc_volume(enefile):

    line_num, prop_list = get_listof_props_ene(enefile)
    icol = prop_list.index("V(A^3)")

    vol = []

    with open(enefile) as fh:
        for line in fh.readlines():
            line = line.strip()
            if line and line[0] != '#':
                data = line.split()
                vol.append(float(data[icol]))

    return old_div(sum(vol), len(vol))


[docs]def compute_net_charge_corrections(cmsfile, enefile, is_lambda0=True):
    """
    cmsfile: cms file name
    enefile: ene file name
    is_lambda0: booline

    return [net_charge_correction, discreet solvent correction, RIP correction]
    """

    prefix = "%s" % uuid.uuid4()
    cms_sys = cms.Cms(cmsfile)
    box_volume = calc_volume(enefile)
    box_length = box_volume**(old_div(1.0, 3.0))

    protein_ct = None
    ligand0 = None
    ligand1 = None
    water_box = None
    ion = None
    (protein_ct, ligand0, ligand1, water_box, ion) = find_component_cts(cms_sys)

    water_density = old_div(water_box.mol_total, box_volume)
    oxygen_coord = numpy.array(water_box.atom[1].xyz)
    h1_coord = numpy.array(water_box.atom[2].xyz)
    h2_coord = numpy.array(water_box.atom[3].xyz)
    water_quadrapol = numpy.sum((h1_coord - oxygen_coord)**2) * water_box.ffio.site[2].charge +  \
                      numpy.sum((h2_coord - oxygen_coord)**2) * water_box.ffio.site[3].charge

    p_charge = 0
    p_charge_sq = 0
    cntion_charge = 0
    cntion_charge_sq = 0
    l0_charge = 0
    l0_charge_sq = 0
    l1_charge = 0
    l1_charge_sq = 0

    if protein_ct:
        (p_charge, p_charge_sq) = total_charge(protein_ct)

    (l0_charge, l0_charge_sq) = total_charge(ligand0)
    (l1_charge, l1_charge_sq) = total_charge(ligand1)

    if ion:
        (cntion_charge, cntion_charge_sq) = total_charge(ion)

    if is_lambda0:
        useligand = ligand0
        lig_charge = l0_charge
    else:
        useligand = ligand1
        lig_charge = l1_charge

    has_receptor = False
    if protein_ct:
        has_receptor = True

    tot_charge = p_charge + lig_charge + cntion_charge

    logger.debug("Total protein charge %f" % p_charge)
    logger.debug("Total ligand0 charge %f" % l0_charge)
    logger.debug("Total ligand1 charge %f" % l1_charge)
    logger.debug("Using ligand %d" % (not is_lambda0))
    logger.debug("Total ligand charge %f" % lig_charge)
    logger.debug("Total counter-ion charge %f" % cntion_charge)
    logger.debug("Total system charge %f" % tot_charge)
    logger.debug("Box Length %f" % box_length)
    logger.debug("Water Density %f" % water_density)
    logger.debug("Water Q-trace %f" % water_quadrapol)

    (rval, err, rip) = run_apbs(prefix + APBS_INPUT, p_charge, protein_ct, lig_charge, useligand, \
                           box_length, box_volume, has_receptor, prefix, cntion_charge)
    if rval:
        raise RuntimeError("APBS did not return zero: %s", err)

    net_chg_corr = compute_net_charge_correction(tot_charge, box_length)
    dis_sol_corr = dis_solvent_correction(water_density, water_quadrapol,
                                          tot_charge)

    return numpy.array((net_chg_corr, dis_sol_corr, rip))


[docs]def cleanup(topdir=None):
    """
    Cleanup function to be run atexit
    """
    #Garbage collect objects in case we hit the cancel button and htere are
    #any open Structure{Reader,Writer} objects, which will run fh.close() in
    #the __del__ method.
    import gc
    gc.collect()
    #See if we are running inside of maestro
    if topdir:
        import shutil
        shutil.rmtree(topdir)


if __name__ == '__main__':
    import atexit
    import tempfile
    from optparse import OptionParser
    usage = "$SCHRODINGER/run fep_net_charge.py [-i] input_cms input_ene"
    logger.setLevel(log.DEBUG)

    parser = OptionParser(usage)
    parser.add_option("-i",
                      "--is_lambda0",
                      action="store_false",
                      dest="is_lambda0",
                      default=True,
                      help="set it to false if it is lambda 1")

    opt, args = parser.parse_args(sys.argv[1:])
    if len(args) != 2:
        print(usage)
        sys.exit(1)

    topdir = tempfile.mkdtemp(prefix="fep_net_charge")

    atexit.register(cleanup, topdir=topdir)
    cwd = os.getcwd()
    os.chdir(topdir)

    input_cms = os.path.join(cwd, args[0])
    input_ene = os.path.join(cwd, args[1])
    correction = compute_net_charge_corrections(input_cms, input_ene,
                                                opt.is_lambda0)
    print("Correction terms:")
    print("[net charge corr, discrete solv corr, RIP]")
    print(correction)
    print("Sum of correction terms:", numpy.sum(correction))
    os.chdir(cwd)