Source code for schrodinger.application.desmond.fep_reporter

# -*- coding: utf-8 -*-

import os
import sys
from past.utils import old_div

import matplotlib.gridspec as gridspec
from matplotlib import pyplot as plt
import numpy

import schrodinger.application.desmond.cms as cms
import schrodinger.utils.sea as sea
from schrodinger import structure
from schrodinger.utils import fileutils
from schrodinger.utils import qapplication


[docs]class FEPReportMaker(object):
[docs] def __init__(self, basename=None, cms_fn=None, tmp_dir=None): self._load_modules() self.basename = basename self.fep_data = None self._ark_simulation = None self._ark_ligand_list = [] self._ark_protein = None self._ark_fep_pair_data = None self.Elements = [] #report elements if isinstance(cms_fn, structure.Structure): self.cms_st = cms_fn else: self.cms_st = self.get_cms(cms_fn) self.cleanup_tmp_file_list = [] self.schrod_tmp_dir = tmp_dir self._ark_rest_rx = None if os.path.isfile(basename + '.sid'): self.fep_sid = sea.Map(open(self.basename + '.sid', 'r').read()) self.parse_fep_data()
def _load_modules(self): self.app = qapplication.get_application() from schrodinger.ui.sequencealignment.sequence_viewer import \ SequenceViewer self.SeqViewer = SequenceViewer from schrodinger.ui.sequencealignment.globals import ANNOTATION_RESNUM from schrodinger.ui.sequencealignment.globals import ANNOTATION_SSBOND from schrodinger.ui.sequencealignment.globals import COLOR_POSITION self.COLOR_POSITION = COLOR_POSITION self.ANNOTATION_SSBOND = ANNOTATION_SSBOND self.ANNOTATION_RESNUM = ANNOTATION_RESNUM import schrodinger.application.desmond.report_helper as rh self.rh = rh
[docs] def get_cms(self, cms_fn): ''' try to find cms file ''' fn = None if cms_fn and os.path.isfile(cms_fn): fn = cms_fn elif os.path.isfile(self.basename + '_replica0-out.cms'): fn = self.basename + '_replica0-out.cms' elif os.path.isfile(self.basename + '.cms'): fn = self.basename + '.cms' if fn: return cms.Cms(fn) else: return None
[docs] def parse_fep_data(self): ''' Given an ark object, parse the data ''' if "Keywords" not in self.fep_sid: self.error("No keywords presend in FEP sid file (%s.sid)." %\ self.basename) kw_list = self.fep_sid['Keywords'] for m in kw_list: if 'FEPSimulation' in m: self.read_FEPSimulation(m['FEPSimulation']) if 'ProteinInfo' in m: self.read_ProteinInfo(m['ProteinInfo']) if 'LigandInfo' in m: self.read_LigandInfo(m['LigandInfo']) if 'BinCenter' in m: self.read_FEP_REST_energetics(m) if 'Replica' in m: self.read_REST_exchanges(m['Replica'])
[docs] def read_FEPSimulation(self, ark_obj): self._ark_simulation = ark_obj
[docs] def read_ProteinInfo(self, ark_obj): self._ark_protein = ark_obj
[docs] def read_LigandInfo(self, ark_obj): self._ark_ligand_list.append(ark_obj)
[docs] def read_FEP_REST_energetics(self, ark_obj): self._ark_fep_pair_data = ark_obj
[docs] def read_REST_exchanges(self, ark_obj): self._ark_rest_rx = ark_obj
[docs] def report(self): self.init_report() self.reportSimulationDetails() self.reportProteinDetails() self.reportLigandsDetails() self.reportFinalResults() self.reportReplicaPairEnergies() self.reportReplicaExchanges() self.doc.build(self.Elements, canvasmaker=self.rh.NumberedCanvas) self.cleanupTempFiles()
[docs] def cleanupTempFiles(self): for tmp_file in self.cleanup_tmp_file_list: if os.path.exists(tmp_file): os.remove(tmp_file)
[docs] def init_report(self): pdf_filename = self.basename + '.pdf' self.doc = self.rh.platypus.SimpleDocTemplate(pdf_filename) self.doc.title = u'FEP/REST Report by Schrodinger Inc.' self.doc.author = "Dmitry Lupyan, PhD" self.doc.leftMargin = 30. self.doc.rightMargin = 20. self.doc.topMargin = 10. self.doc.bottomMargin = 10. self.rh.report_add_logo(self.Elements) self.rh.header(self.Elements, "Free Energy Perturbation Report")
[docs] def reportFinalResults(self): Ele = self.Elements if not self._ark_fep_pair_data or \ 'dGForward' not in list(self._ark_fep_pair_data) or \ 'Bennett' not in list(self._ark_fep_pair_data['dGForward']): return ark_obj = self._ark_fep_pair_data['dGForward']['Bennett'] dG = ark_obj['dG'].val dG_bootstrap_std = ark_obj['Bootstrap_std'].val # dG_analytical_std = ark_obj['Analytical_std'].val self.rh.header(Ele, "Free Energy difference is: %.2f ± %.3f kcal/mol" %\ (dG, dG_bootstrap_std))
[docs] def reportReplicaExchanges(self): if self._ark_rest_rx is None: return Ele = self.Elements self.rh.new_page(Ele) self.rh.report_add_logo(Ele) self.rh.header(Ele, "Replica Exchange Monitor") self.rh.pargph(Ele, "<u>Exchange Density</u>") self.rh.add_spacer(Ele) try: image = self.getRESTDensityPlot() Ele.append(image) text = \ ''' Each replica is color coded and the plot shows how it occupes different lambda windows during the course of the simulation. Ideally each replica will sample all lambda windows uniformly, however non-uniform sampling is sufficient in most of the instances. ''' self.rh.pargph(Ele, text) except: self.rh.pargph(Ele, '<i>Plot could not be created.</i>')
[docs] def reportReplicaPairEnergies(self): Ele = self.Elements self.rh.new_page(Ele) self.rh.report_add_logo(Ele) self.rh.header(Ele, "Convergence and Overlap") self.rh.pargph(Ele, '<u>Free Energy Convergence</u>') self.rh.add_spacer(Ele) try: image = self.getEnergyConvergencePlot() Ele.append(image) except: self.rh.pargph(Ele, '<i>Plot could not be created.</i>') try: self.add_replicas_dF_table(\ self._ark_fep_pair_data['dGForward']['Bennett']) except: self.rh.pargph(Ele, '<i>Table could not be created.</i>') self.rh.pargph( Ele, "The total free energy differences between the two ligands (∆G in kcal/mol) are " "plotted as a function of time. Three plots show the accumulated data during different time window " "schemes: forward; reverse; and sliding window. The tables report the associated bootstrap and analytical " "errors estimates for the corresponding free energies.")
[docs] def add_replicas_dF_table(self, ark_obj): replica_name = [ self.rh.get_pargph('&lambda; pair:', color='#888888', fontsize=11) ] replica_dF = ['Forward dF:'] replica_dE1 = ['Bootstrap STD:'] replica_dE2 = ['Analytical STD:'] nreplicas = 0 for i, m in enumerate(ark_obj['dF_Per_Replica']): name = "%i,%i" % (i, i + 1) replica_name.append(name) val = m.val replica_dF.append('%.2f' % float(val[0])) replica_dE1.append('%.3f' % float(val[1])) replica_dE2.append('%.3f' % float(val[2])) nreplicas += 1 # add total dG replica_name.append('Total') replica_dF.append('%.2f' % float(ark_obj['dG'].val)) replica_dE1.append('%.3f' % float(ark_obj['Bootstrap_std'].val)) replica_dE2.append('%.3f' % float(ark_obj['Analytical_std'].val)) nreplicas += 1 if ark_obj['dG'].val < 0: color = self.rh.rlcolors.green else: color = self.rh.rlcolors.orange table = [replica_name, replica_dF, replica_dE1, replica_dE2] width = [1.2] + (nreplicas * [0.5]) nfields = len(width) nrows = len(table) style = [('BOTTOMPADDING', (0, 1), (-1, -1), 1), ('TOPPADDING', (0, 1), (-1, -1), 1), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('ALIGN', (0, 0), (0, nrows - 1), 'LEFT'), ('TEXTCOLOR', (0, 0), (nfields - 1, 0), self.rh.gray), ('TEXTCOLOR', (0, 0), (0, nrows - 1), self.rh.gray), ('BOX', (-1, 0), (-1, -1), 0.4, color)] self.rh.add_vtable(self.Elements, table, style, width)
[docs] def add_overlapEnergyTable(self, data): name = [ self.rh.get_pargph('&#x3bb; pair:', color='#888888', fontsize=10) ] for i, s in enumerate(data): name.append(self.rh.get_pargph(s[0], color=self.rh.colors[i])) center = ['Center at:'] + ['%.3f' % s[1] for s in data] integral = ['Integral:'] + ['%.3f' % s[2] for s in data] table = [name, center, integral] nreplicas = len(data) width = nreplicas * [0.61] width = [0.7] + (width) nfields = len(width) nrows = len(table) style = [('BOTTOMPADDING', (0, 1), (-1, -1), 1), ('TOPPADDING', (0, 1), (-1, -1), 1), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('ALIGN', (0, 0), (1, nrows - 1), 'LEFT'), ('TEXTCOLOR', (0, 0), (nfields - 1, 0), self.rh.gray), ('TEXTCOLOR', (0, 0), (0, nrows - 1), self.rh.gray)] self.rh.add_vtable(self.Elements, table, style, width)
[docs] def getRESTDensityPlot(self): fig = plt.figure(figsize=(7.5, 5)) ax_hist = fig.add_subplot(111) nreplicas = len(self._ark_rest_rx) bottom = numpy.zeros(nreplicas) for replica in self._ark_rest_rx: ireplica = replica['Number'].val name = self.rh.replica_name(ireplica) dist = numpy.array(replica['HistoryDistribution'].val) color = self.rh.colors[ireplica] ax_hist.bar(list(range(nreplicas)), dist, label=name, color=color, width=1, edgecolor=None, linewidth=0, align='center', bottom=bottom, alpha=0.8) bottom += dist ax_hist.set_xlim([0 - 0.5, nreplicas - 0.5]) ax_hist.set_xticks(list(range(nreplicas))) ax_hist.set_ylim([0, max(bottom)]) ax_hist.set_yticklabels([]) ax_hist.set_xlabel('Lambda window', size='small') self.rh.change_plot_colors(ax_hist) #ax_hist.legend(loc=9, bbox_to_anchor=(0.5,-0.1),ncol=12, ax_hist.legend(loc=8, bbox_to_anchor=(0.5, 1.02), ncol=12, borderaxespad=0., borderpad=0.3, handlelength=1, handletextpad=0.2, title='Replica Names', columnspacing=0.4, fancybox=True, prop={'size': 8}) temp_plot = self.getTempImageFilename() fig.savefig(temp_plot, bbox_inches='tight', dpi=300) size = fig.get_size_inches() (sx, sy) = self.rh.aspectScale(size[0], size[1], 7.0, 6.0) img = self.rh.platypus.Image(temp_plot, sx * self.rh.inch, (sy + 0.7) * self.rh.inch) return img
[docs] def getOverlapEnergyPlot(self): bin_centers = numpy.array(self._ark_fep_pair_data['BinCenter'].val) nbins = self._ark_fep_pair_data['NumberBins'].val range_max = self._ark_fep_pair_data['Range_max'].val range_min = self._ark_fep_pair_data['Range_min'].val bin_width = 0.8 * (old_div((range_max - range_min), nbins)) ark_pairs = self._ark_fep_pair_data['ReplicaPairs'] gs = gridspec.GridSpec(2, 1, height_ratios=[0.7, 0.3]) fig = plt.figure(figsize=(7.5, 3.0)) ax = fig.add_subplot(gs[0]) ax_diff = fig.add_subplot(gs[1], sharex=ax) overlap_data = [] for rp in ark_pairs: fwd = rp['Forward'].val rev = rp['Reverse'].val ovp = rp['Overlap'].val repA = rp['repB'].val repB = rp['repA'].val color = self.rh.colors[repA] ax.bar(bin_centers, fwd, alpha=0.5, color=color, linewidth=1, align='center', width=bin_width) ax.bar(bin_centers, rev, alpha=0.5, color=color, linewidth=0, align='center', width=bin_width) label = r'$\lambda$(%i,%i)' % (repA, repB) label_table = '%i,%i' % (repA, repB) ax_diff.bar(bin_centers, ovp, alpha=0.75, align='center', width=bin_width, color=color, edgecolor=None, linewidth=1, label=label) integral = numpy.sum(ovp) overlap_data.append( (label_table, rp['OverlapCenter'].val, integral)) #ax.set_xlabel(r'$\Delta$E (kcal/mol)', size = 'small') ax.set_ylabel('Norm. Histogram', size='small') ax.yaxis.set_major_locator(plt.MaxNLocator(6)) #ax.axes.get_xaxis().set_visible(False) [t.set_visible(False) for t in ax.xaxis.get_ticklabels()] ax_diff.set_ylabel('Overlap', size='small') ax_diff.yaxis.set_major_locator(plt.MaxNLocator(4)) ax_diff.set_xlabel(r'$\Delta$E (kcal/mol)', size='small') ''' ax_diff.legend(loc=9, bbox_to_anchor=(0.5,-0.3),ncol=11, borderaxespad=0., borderpad=0.2, handlelength=1, handletextpad=0.2, columnspacing=0.4, fancybox=True, prop={'size':8}) ''' ax.set_xlim((range_min, range_max)) for plot in [ax, ax_diff]: self.rh.change_plot_colors(plot) temp_plot = self.getTempImageFilename() fig.savefig(temp_plot, bbox_inches='tight', dpi=300) size = fig.get_size_inches() (sx, sy) = self.rh.aspectScale(size[0], size[1], 7.5, 5.5) img = self.rh.platypus.Image(temp_plot, sx * self.rh.inch, sy * self.rh.inch) return overlap_data, img
[docs] def getEnergyConvergencePlot(self): global y_min global y_max y_min = 1000000 y_max = -1000000 def fill_error(ax, x, y, ye): global y_min global y_max y_arr = numpy.array(y) ye_arr = numpy.array(ye) min_fill = y_arr - ye_arr max_fill = y_arr + ye_arr ax.fill_between(x, y_arr - ye_arr, y_arr + ye_arr, facecolor='#888888', edgecolor='None') y_min = min(y_min, min(min_fill)) y_max = max(y_max, max(max_fill)) start_time = self._ark_fep_pair_data['dGForward']['StartTime_ns'].val end_time = self._ark_fep_pair_data['dGForward']['EndTime_ns'].val dG_forward = self._ark_fep_pair_data['dGForward']['dG'].val dG_forward_err = self._ark_fep_pair_data['dGForward']['dG_error'].val dG_reverse = self._ark_fep_pair_data['dGReverse']['dG'].val dG_reverse_err = self._ark_fep_pair_data['dGReverse']['dG_error'].val dG_sliding = self._ark_fep_pair_data['dGSliding']['dG'].val dG_sliding_err = self._ark_fep_pair_data['dGSliding']['dG_error'].val timestep = old_div((end_time - start_time), len(dG_forward)) time = numpy.arange(start_time, end_time, timestep) fig = plt.figure(figsize=(7.5, 2.0)) ax_frw = plt.subplot(131) ax_rev = plt.subplot(132, sharey=ax_frw) ax_sld = plt.subplot(133, sharex=ax_frw, sharey=ax_frw) ax_frw.plot(time, dG_forward, color='#990000') ax_frw.set_xlabel("Simulation Time (nsec)", size='small') ax_frw.set_ylabel(r"$\Delta$G (kcal/mol)", size='small') fill_error(ax_frw, time, dG_forward, dG_forward_err) #reverse plot should start with end time dG_reverse.reverse() dG_reverse_err.reverse() ax_rev.plot(time, dG_reverse, color='#990000') fill_error(ax_rev, time, dG_reverse, dG_reverse_err) ax_rev.invert_xaxis() ax_rev.set_xlabel("Reverse Simulation Time (nsec)", size='small') plt.setp(ax_rev.get_yticklabels(), visible=False) time = numpy.arange(start_time, end_time, old_div((end_time - start_time), len(dG_sliding))) ax_sld.plot(time, dG_sliding, color='#990000') fill_error(ax_sld, time, dG_sliding, dG_sliding_err) ax_sld.set_xlabel("Sliding Time (nsec)", size='small') ax_sld.set_ylabel(r"$\Delta$G (kcal/mol)", size='small', rotation=270) ax_sld.yaxis.tick_right() ax_sld.yaxis.set_label_position("right") ax_frw.set_ylim((y_min, y_max)) ax_frw.set_xlim((start_time, end_time + timestep)) ax_rev.set_xlim((end_time + timestep, start_time)) for ax in [ax_frw, ax_rev, ax_sld]: self.rh.change_plot_colors(ax) temp_plot = self.getTempImageFilename() fig.savefig(temp_plot, bbox_inches='tight', dpi=300) size = fig.get_size_inches() (sx, sy) = self.rh.aspectScale(size[0], size[1], 7.5, 2.5) img = self.rh.platypus.Image(temp_plot, sx * self.rh.inch, sy * self.rh.inch) return img
[docs] def reportLigandsDetails(self): Ele = self.Elements for ark_obj in self._ark_ligand_list: ligand_number = ark_obj['LigandNumber'].val self.rh.add_spacer(Ele) self.rh.pargph(Ele, '<u>Ligand <b>#%i</b> Information</u>' %\ (ligand_number)) smiles_p = self.rh.add_and_parse_SMILES(ark_obj['SMILES'].val) natoms_str = "%i (total) %i (heavy)" %\ (ark_obj['NumberAtoms'].val, ark_obj['NumberHeavyAtoms'].val) charge = ark_obj['Charge'].val charge = '+' + str(charge) if charge > 0 else str(charge) resname = ark_obj['PDBResidueName'].val resasl = ark_obj['ASL'].val atomic_mass = ark_obj['AtomicMass'].val mol_formula = ark_obj['MolecularFormula'].val nfragments = ark_obj['NumberFragments'].val nrotat_bonds = ark_obj['NumberRotatableBonds'].val try: lig_2d_image = self.getLigandImage(resasl, ligand_number) except: lig_2d_image = '' table = [["SMILES", smiles_p, ''], [ "PDB Name (ASL)", resname + ' (' + resasl.strip() + ')', lig_2d_image ], ["No. of Atoms", natoms_str, ''], ["Atomic Mass", "%5.3f au" % (atomic_mass), ''], ["Charge", charge, ''], ["Mol. Formula", mol_formula, ''], ["No. of Fragments", nfragments, ''], ["No. of Rot. Bonds", nrotat_bonds, '']] nrows = len(table) width = [1.5, 1.5, 3.8] style = [('SPAN', (2, 1), (2, nrows - 1)), ('SPAN', (1, 0), (2, 0)), ('VALIGN', (2, 1), (2, 1), 'MIDDLE'), ('BOTTOMPADDING', (0, 0), (-1, -1), 1), ('TOPPADDING', (0, 0), (-1, -1), 1), ('TEXTCOLOR', (0, 0), (0, nrows - 1), self.rh.gray)] self.rh.add_vtable(Ele, table, style, width)
[docs] def reportProteinDetails(self): ark_obj = self._ark_protein if not ark_obj: return Ele = self.Elements self.rh.add_spacer(Ele) self.rh.pargph(Ele, '<u>Protein Information</u>') self.rh.add_spacer(Ele) table = [[ 'Tot. Residues', 'Prot. Chain(s)', 'Res. in Chain(s)', 'No. Atoms', 'No. Heavy Atoms', 'Charge' ]] charge = ark_obj['FormalCharge'].val if charge > 0: charge = '+' + str(charge) chain_names = str(ark_obj['ChainsNames'].val).strip()[1:-1] res_numb = str(ark_obj['ChainsResidues'].val).strip()[1:-1] table = [[ 'Tot. Residues', 'Prot. Chain(s)', 'Res. in Chain(s)', 'No. Atoms', 'No. Heavy Atoms', 'Charge' ]] table.append([ ark_obj['NumberResidues'].val, chain_names, res_numb, ark_obj['NumberAtoms'].val, ark_obj['NumberHeavyAtoms'].val, charge ]) nfields = len(table[0]) style = [('BOTTOMPADDING', (0, 1), (-1, -1), 1), ('TOPPADDING', (0, 1), (-1, -1), 1), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('TEXTCOLOR', (0, 0), (nfields - 1, 0), self.rh.gray)] self.rh.add_table(Ele, table, style, 1.1) prot_str = self.get_protein() if prot_str: #move me to try block prot_seq_image = self.getSequenceViewerImage(prot_str) try: Ele.append(prot_seq_image) except: prot_seq_image = None
[docs] def getSequenceViewerImage(self, prot_str): ''' Generate protein sequence image, given a protein structure Returns a platipus image ''' #seq_viewer = SequenceViewer() seq_viewer = self.SeqViewer() seq_viewer.incorporateStructure(prot_str) seq_viewer.wrapped = True seq_viewer.has_ruler = False seq_viewer.display_identity = False seq_viewer.display_boundaries = True seq_viewer.chain_name_only = True seq_viewer.hide_empty_lines = True seq_viewer.crop_image = True seq_viewer.wrapped_width = 90 seq_viewer.setColorMode(self.COLOR_POSITION) seq_viewer.addAnnotation(self.ANNOTATION_SSBOND, remove=True) seq_viewer.addAnnotation(self.ANNOTATION_RESNUM) temp_seq_file = self.getTempImageFilename() size = seq_viewer.saveImage(temp_seq_file) (sx, sy) = self.rh.aspectScale(size[0], size[1], 7.0, 5.0) protein_img = self.rh.platypus.Image(temp_seq_file, sx * self.rh.inch, sy * self.rh.inch) return protein_img
[docs] def getTempImageFilename(self): if not self.schrod_tmp_dir: self.schrod_tmp_dir = fileutils.get_directory_path(fileutils.TEMP) temp_seq_file = fileutils.get_next_filename( os.path.join(self.schrod_tmp_dir, "image_tmp.png"), "") self.cleanup_tmp_file_list.append(temp_seq_file) return temp_seq_file
[docs] def getLigandImage(self, asl_str, lig_num): if not self.cms_st: return None ligand_idx = self.cms_st.select_atom(asl_str) if len(ligand_idx): lig_st = self.cms_st.extract(ligand_idx) temp_file = self.getTempImageFilename() size = self.rh.generate_ligand_2d_image(temp_file, lig_st, ret_size=True) (sx, sy) = self.rh.aspectScale(size[0], size[1], 3., 2.25) ligand_img = self.rh.platypus.Image(temp_file, sx * self.rh.inch, sy * self.rh.inch) return ligand_img else: return None
[docs] def get_protein(self): if not self.cms_st: return None prot_idx = self.cms_st.select_atom('(protein)') if len(prot_idx): return self.cms_st.extract(prot_idx) else: return None
[docs] def reportSimulationDetails(self): ark_obj = self._ark_simulation Ele = self.Elements self.rh.pargph(Ele, '<u>Simulation Details</u>') self.rh.add_spacer(Ele) self.rh.pargph(Ele, "<font color='#888888'>Jobname</font>: %s" %\ ark_obj['JobName'].val) self.rh.pargph(Ele, "<font color='#888888'>Entry title</font>: %s" %\ ark_obj['EntryTitle'].val) self.rh.add_spacer(Ele) temperature_str = '%0.1f' % ark_obj['TemperatureK'].val forcefield = ark_obj['ForceFields'].val table = [[ 'Job Type', 'Ensemble', 'Force Field', 'Temp. [K]', 'Sim Time [ns]', 'No. Atoms', 'No. Waters' ]] table.append([ ark_obj['JobType'].val, ark_obj['Ensemble'].val, forcefield, temperature_str, ark_obj['TotalSimulationNs'].val, ark_obj['TotalAtoms'].val, ark_obj['NumberWaters'].val ]) nfields = len(table[0]) style = [('BOTTOMPADDING', (0, 1), (-1, -1), 1), ('TOPPADDING', (0, 1), (-1, -1), 1), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('TEXTCOLOR', (0, 0), (nfields - 1, 0), self.rh.gray)] self.rh.add_table(Ele, table, style, 0.9)
[docs] def error(self, msg): print(str(msg))
[docs] def str(self, str_in): """ This is to remove the "'s in ARK returned strings enclosed in double quotes """ return str(str_in).strip("\"")
if __name__ == '__main__': frm = FEPReportMaker(basename=sys.argv[1], cms_fn=sys.argv[1] + 'out-cms') frm.report()