Source code for schrodinger.structutils.interactionfp

"""

Calculate structural interaction fingerprints.

The fingerprints are similar to those described in Deng et al, J. Med Chem
(2004), 47, 337.

Copyright Schrodinger, LLC. All rights reserved.

"""

# Contributors: Quentin McDonald, Woody Sherman, Blaine Bell

import csv
import os
import subprocess

import numpy

from schrodinger.infra import canvas
from schrodinger.structutils import analyze
from schrodinger.structutils import measure
from schrodinger.structutils.interactions import hbond
from schrodinger.utils import csv_unicode
from schrodinger.utils import fileutils


[docs]class StructuralInteractionFingerprintGenerator(object): """ A class which generates Structural Interaction Fingerprints. Based on the work described in: J. Med. Chem., 47 (2), 337-344, 2004. 'Structural Interaction Fingerprint (SIFt): A Novel Method for Analyzing Three-Dimensional Protein-Ligand Binding Interactions' """ # ASL definitions for various residue sets: POLAR_RESIDUES = \ "res. ARG, ASP, GLU, HIS, ASN, GLN, LYS, SER, THR, ARN, ASH, GLH, HID, HIE, LYN" HYDROPHOBIC_RESIDUES = \ "res. PHE, LEU, ILE, TYR, TRP, VAL, MET, PRO, CYS, ALA, CYX" AROMATIC_RESIDUES = "res. PHE, TYR, TRP, TYO" CHARGED_RESIDUES = "res. ARG, ASP, GLU, LYS, HIP, CYT, SRO, TYO, THO" # Number of bits per residue: NUM_BITS_PER_RESIDUE = 9 # Bit positions for each interaction: CONTACT_POS = 0 BACKBONE_POS = 1 SIDECHAIN_POS = 2 POLAR_POS = 3 HYDROPHOBIC_POS = 4 ACCEPTOR_POS = 5 DONOR_POS = 6 AROMATIC_POS = 7 CHARGED_POS = 8 ANY_CONTACT = "Any Contact" BACKBONE_INTERACTION = "Backbone Interaction" SIDECHAIN_INTERACTION = "Sidechain Interaction" POLAR_RESIDUE = "Polar Residues" HYDROPHOBIC_RESIDUE = "Hydrophobic Residues" HYDROGEN_BOND_ACCEPTOR = "Hydrogen Bond Acceptor" HYDROGEN_BOND_DONOR = "Hydrogen Bond Donor" AROMATIC_RESIDUE = "Aromatic Residue" CHARGED_RESIDUE = "Charged Residue" INTERACTION_NAMES = [ ANY_CONTACT, BACKBONE_INTERACTION, SIDECHAIN_INTERACTION, POLAR_RESIDUE, HYDROPHOBIC_RESIDUE, HYDROGEN_BOND_ACCEPTOR, HYDROGEN_BOND_DONOR, AROMATIC_RESIDUE, CHARGED_RESIDUE ]
[docs] def __init__(self): """ Constructor - no args """ self.reset() # The cutoff distance for automatically determining the receptor # region for heavy atom-heavy atom contacts: self.cutoff_dist = 4.0 # The cutoff distance for automatically determining the receptor # region for contact involving hydrogen atoms: self.h_cutoff_dist = 2.5 # The hydrogen bonding parameters: self.hbond_distance = 2.5 self.hbond_donor_angle = 120.0 self.hbond_acceptor_angle = 90.0 # self.tracker is a dictionary. The keys are three-tuples of # (residue ID, ligand Entry ID, contact type) and the values are a list # of atom numbers of ligand ID that have "contact type" with residue ID. self.tracker = {}
[docs] def reset(self): """ Clear all the lists of fingerprints and associated data """ # The list of ChmBitsetObjects representing the FPs: self.fp_list = [] # The list of ligand IDs, either positions in a file or entry IDs self.ligand_ids = [] # The list of ligand titles: self.ligand_titles = [] # List of all CT-level properties: self.ligand_props = [] # A dictionary which associated fingerprints with ligand IDs self.fp_map = {} # List of _Residue for residues associated with each FP position # NOTE: When non-conformers are aligned, these are residues form # the reference structure. self.residues = [] # The list of residues IDs associated with each position in the # fingerprints - Chain:Resnum:InsCode self.res_ids = [] # The list of residue types associated with each position # (from reference structure, if aligning non-conformer compelxes) self.residue_types = [] # The minimum and maximum residue index which has actually contributed # to setting an interaction in the fingerprint self.min_res = None self.max_res = None # The receptor structure and ID: self.receptor_st = None self.receptor_id = None # Interactions: start with them all on: self.include_contact = True self.include_backbone = True self.include_sidechain = True self.include_polar = True self.include_hydrophobic = True self.include_acceptor = True self.include_donor = True self.include_aromatic = True self.include_charged = True
[docs] def setHbondParameters(self, dist, donor_angle, acceptor_angle): """ Set the hydrogen bonding parameters, maximum distance, minimum donor angle and mi mum acceptor angle """ self.hbond_distance = dist self.hbond_donor_angle = donor_angle self.hbond_acceptor_angle = acceptor_angle
[docs] def setInteractionCutoff(self, cutoff=4.0, h_cutoff=2.5): """ Sets the maximum interaction cutoff. No atom interactions beyond this distance will be counted """ self.cutoff_dist = cutoff self.h_cutoff_dist = h_cutoff
[docs] def setIncludeContact(self, include): """ Set whether we will include the generalized contact bit """ self.include_contact = include
[docs] def setIncludeBackbone(self, include): """ Set whether we will include the backbone contact bit """ self.include_backbone = include
[docs] def setIncludeSidechain(self, include): """ Set whether we will include the side-chain contact bit """ self.include_sidechain = include
[docs] def setIncludePolar(self, include): """ Set whether we will include the polar residue contact bit """ self.include_polar = include
[docs] def setIncludeHydrophobic(self, include): """ Set whether we will include the hydrophobic residue contact bit """ self.include_hydrophobic = include
[docs] def setIncludeAcceptor(self, include): """ Set whether we will include the hydrogen bond acceptor contact bit """ self.include_acceptor = include
[docs] def setIncludeDonor(self, include): """ Set whether we will include the hydrogen bond donor contact bit """ self.include_donor = include
[docs] def setIncludeCharged(self, include): """ Set whether we will include the charged residue contact bit """ self.include_charged = include
[docs] def setIncludeAromatic(self, include): """ Set whether we will include the aromatic residue contact bit """ self.include_aromatic = include
[docs] def getMinRes(self): """ Return the first residue index for which an interaction is detected """ return self.min_res
[docs] def getMaxRes(self): """ Return the last residue index for which an interaction is detected """ return self.max_res
[docs] def getLigandID(self, idx): """ Return the ID for the idx'th ligand """ return self.ligand_ids[idx]
[docs] def getLigandTitle(self, idx): """ Returns the title for the idx'th ligand (if it exists) """ ret = self.ligand_titles[idx] if ret is not None: return ret else: return ""
[docs] def getResidueType(self, idx): """ Returns the PDB residue type for the idx'th residue """ return self.residue_types[idx]
[docs] def getResidueID(self, idx): """ Return the ID for the idx'th residue """ return self.res_ids[idx]
[docs] def getReceptorID(self): """ Returns the ID (usually an entry ID) associated with the structure """ return self.receptor_id
[docs] def setReceptorStructure(self, receptor_st, id): """ Set the structure to be treated as the receptor and setup some lists and sets of atoms we'll use in calculating subsequent interactions. If non-conformer proteins are used, use this method to set the reference structure. """ self.receptor_st = receptor_st self.receptor_id = id # Calculate atom sets for residue interactions: backbone_list = analyze.evaluate_asl(self.receptor_st, "backbone") self.backbone_set = set(backbone_list) polar_list = analyze.evaluate_asl(self.receptor_st, self.POLAR_RESIDUES) self.polar_set = set(polar_list) hydrophobic_list = analyze.evaluate_asl(self.receptor_st, self.HYDROPHOBIC_RESIDUES) self.hydrophobic_set = set(hydrophobic_list) aromatic_list = analyze.evaluate_asl(self.receptor_st, self.AROMATIC_RESIDUES) self.aromatic_set = set(aromatic_list) charged_list = analyze.evaluate_asl(self.receptor_st, self.CHARGED_RESIDUES) self.charged_set = set(charged_list) self.residues = [] self.residue_types = [] self.res_ids = [] for chain in self.receptor_st.chain: for res in chain.residue: res_id = "%s:%d:%s" % (res.chain, res.resnum, res.inscode) self.res_ids.append(res_id) self.residues.append(res) self.residue_types.append(res.pdbres)
[docs] def generateFingerprint( self, ligand, id, receptor_region, ligand_title=None, nonpolar_hydrogens=False, receptor_st=None, aligned_residues=None, ): """ Generate a structural interaction fingerprint for the given ligand with id. The receptor_region parameter is a list of receptor atom numbers corresponding to the receptor residues that are to be considered as interacting with the ligand. No other residues will be considered (their bits in the fingerprint will all be 0). If receptor_region is None then it will be calculated automatically The fingerprints generated are stored in internal lists and can be accessed via other methods (or written to disk ). :type ligand: Structure class object :param ligand: the ligand structure to calculate fingerprints for :type id: str :param id: the entry id of the ligand in the project table :type receptor_region: None or list :param receptor_region: custom list of atoms in the receptor site. If None or empty list, the receptor set in setReceptorStructure() will be used. :type ligand_title: str :param ligand_title: the name of the ligand being calculated :type nonpolar_hydrogens: bool :param nonpolar_hydrogens: True if nonpolar hydrogens should be included in the calculation, False if not. :type aligned_residues: list(structure._Residue|None) :param aligned_residues: List of residues from this complex that correspond to """ if receptor_st is None: if self.receptor_st is None: raise Exception("The receptor structure has not yet been set.") receptor_st = self.receptor_st residues = self.residues else: # Different receptor is specified, assert aligned_residues is not None assert len(aligned_residues) == len(self.residues) residues = aligned_residues # NOTE: residues is a list of _Residue objects to consider for the # fingerprint. # Remove non-polar hydrogens from the ligand if requested if nonpolar_hydrogens: ligand_set = ligand.getAtomIndices() else: asl = 'not atom.mtype H1' ligand_set = analyze.evaluate_asl(ligand, asl) # If no region is specified then calculate one now. This will allow # us to calculate both a receptor and speed up the calculation: if receptor_region is None: rec_atoms = receptor_st.atom_total complex_st = receptor_st.copy() complex_st.extend(ligand) cutoff = max(self.cutoff_dist, self.h_cutoff_dist) lig_asl = '(a.num > %i)' % rec_atoms asl = '(fillres within %f %s) AND NOT %s' % (cutoff, lig_asl, lig_asl) if not nonpolar_hydrogens: asl += ' AND NOT atom.mtype H1' receptor_set = set(analyze.evaluate_asl(complex_st, asl)) else: receptor_set = set(receptor_region) self.ligand_ids.append(id) self.ligand_titles.append(ligand_title) self.ligand_props.append(dict(ligand.property)) on_bits = {} for res_cnt, residue in enumerate(residues): res_id = self.res_ids[res_cnt] if residue is None: # There is no residue at this position in the receptor after # aligning it to reference receptor. continue res_atoms = residue.getAtomIndices() if res_atoms[0] not in receptor_set: # This residue is not part of the receptor (e.g. a solvent # or ligand residue). continue # Examine all the ligand/residue interactions: res_index = res_cnt * self.NUM_BITS_PER_RESIDUE for iratom in res_atoms: ratom = receptor_st.atom[iratom] ratom_is_h = ratom.element == 'H' for ligindex in ligand_set: latom = ligand.atom[ligindex] d = measure.measure_distance(ratom, latom) if ratom_is_h or latom.element == 'H': cutoff = self.h_cutoff_dist else: cutoff = self.cutoff_dist if d < cutoff: # Update the minimum and maximum residue numbers if (self.min_res is None or res_cnt < self.min_res): self.min_res = res_cnt if (self.max_res is None or res_cnt > self.max_res): self.max_res = res_cnt # There is a contact, turn on res_index +1 bit_index = "%d" % (res_index + self.CONTACT_POS) if self.include_contact: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.CONTACT_POS), set()).add(ligindex) # Check backbone: if int(ratom) in self.backbone_set: bit_index = "%d" % (res_index + self.BACKBONE_POS) if self.include_backbone: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.BACKBONE_POS), set()).add(ligindex) else: bit_index = "%d" % (res_index + self.SIDECHAIN_POS) if self.include_sidechain: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.SIDECHAIN_POS), set()).add(ligindex) # Check residue type: if int(ratom) in self.polar_set: bit_index = "%d" % (res_index + self.POLAR_POS) if self.include_polar: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.POLAR_POS), set()).add(ligindex) if int(ratom) in self.hydrophobic_set: bit_index = "%d" % (res_index + self.HYDROPHOBIC_POS) if self.include_hydrophobic: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.HYDROPHOBIC_POS), set()).add(ligindex) if int(ratom) in self.aromatic_set: bit_index = "%d" % (res_index + self.AROMATIC_POS) if self.include_aromatic: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.AROMATIC_POS), set()).add(ligindex) if int(ratom) in self.charged_set: bit_index = "%d" % (res_index + self.CHARGED_POS) if self.include_charged: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.CHARGED_POS), set()).add(ligindex) if hbond.match_hbond(latom, ratom, self.hbond_distance, self.hbond_donor_angle, self.hbond_acceptor_angle): if ratom.atomic_number == 1: bit_index = "%d" % (res_index + self.DONOR_POS) if self.include_donor: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.DONOR_POS), set()).add(ligindex) else: bit_index = "%d" % (res_index + self.ACCEPTOR_POS) if self.include_acceptor: on_bits[bit_index] = 1 self.tracker.setdefault( (res_id, id, self.ACCEPTOR_POS), set()).add(ligindex) # We now have a list of "on" bits in on_bits.keys(). Convert this into # a canvas fingerprint and save it to the list on_string = ",".join(list(on_bits)) fp = chm_sparse_bitset_from_string(on_string) self.fp_list.append(fp) self.fp_map[id] = fp
[docs] def writeFPFile(self, file_name, all_props=False): """ Write the stored fingerprints to a Canvas FP file given by 'file_name' :param all_props: Whether to also export all CT-level properties. :type all_props: bool """ if not self.fp_list: return if all_props: # First write a .csv file with the properties, as we have no way to # directly write a binary FP file with properties from Python csv_name = file_name + '_temp_csv' self.writeCSVFile(csv_name, all_props=True, for_binaryFP=True) # Now use the canvasCSV2FPBinary utility to convert csv to fp cnvt_cmd = [ os.path.join(os.environ['SCHRODINGER'], 'utilities', 'canvasCSV2FPBinary') ] cnvt_cmd.extend(['-icsv', csv_name, '-o', file_name]) converter = subprocess.Popen(cnvt_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) out, err = converter.communicate() if converter.returncode: msg = "Fingerprint export failed: could not convert CSV file to binary fingerprint file." raise RuntimeError('%s\n%s\n%s' % (msg, out, err)) # Remove file left behind by the conversion process fileutils.force_remove(csv_name) else: # Do not include CT-level properties fp_out = canvas.ChmCustomOut32('1') fp_out.open(file_name) for i, fp in enumerate(self.fp_list): fp_out.write(fp, str(self.ligand_ids[i])) fp_out.close()
[docs] def writeCSVFile(self, file_name, all_props=False, for_binaryFP=False): """ Write the stored fingerprints to a CSV file given by `file_name` :param all_props: Whether to also export all CT-level properties. :type all_props: bool :param for_binaryFP: Whether to add "BIT" prefix to the binary column headers. :bype for_binaryFP: bool """ header_strings = [ "contact", "backbone", "sidechain", "polar", "hydrophobic", "acceptor", "donor", "aromatic", "charged" ] if len(self.fp_list): with csv_unicode.writer_open(file_name) as ofile: csv_writer = csv.writer(ofile) # Write the header row, which has the title and description # of each bit. If non-conformer proteins are used, the residues # listed are those of the reference structure. row = [] row.append("Title") ibit = 0 for res in self.res_ids: hres = res.replace(":", "") for i in range(self.NUM_BITS_PER_RESIDUE): ibit += 1 if for_binaryFP: # If CSV file will be passed onto # canvasCSV2FPBinary, column names must be integers # with BIT prefix, which is important in order to # distinguish the FB columns from the CT-level # property columns. colname = 'BIT%06i' % ibit else: colname = "%s_%s" % (hres.rstrip(), header_strings[i]) row.append(colname) if all_props: # Add header for the other CT-level properties: all_prop_names = self.getPropertyNames() row += all_prop_names csv_writer.writerow(row) # Now for each ligand write out a row representing the # on and off bits: for ilig, fp in enumerate(self.fp_list): row = [] row.append(self.ligand_titles[ilig]) res_index = 0 for res in self.res_ids: for bit in range(self.NUM_BITS_PER_RESIDUE): if fp.positionOf(res_index + bit) >= 0: row.append("1") else: row.append("0") res_index += self.NUM_BITS_PER_RESIDUE if all_props: st_props = self.ligand_props[ilig] for propname in all_prop_names: row.append(st_props.get(propname, "")) csv_writer.writerow(row)
[docs] def writeInteractionsFile(self, file_name, all_props=False): """ Write ligand atom indices involved in interactions to a CSV file given by `file_name` :param file_name: name of CSV file :type file_name: str :param all_props: Whether to also export all CT-level properties. :type all_props: bool """ header_strings = [ "contact", "backbone", "sidechain", "polar", "hydrophobic", "acceptor", "donor", "aromatic", "charged" ] # CSV file won't be written if no FPs were generated. if not self.fp_list: return with csv_unicode.writer_open(file_name) as fh: csv_writer = csv.writer(fh) # Write the header row, which has the title and description # of each bit. If non-conformer proteins are used, the residues # listed are those of the reference structure. row = ["Title"] assert len(header_strings) == self.NUM_BITS_PER_RESIDUE for res in self.res_ids: hres = res.replace(":", "") for i in range(self.NUM_BITS_PER_RESIDUE): colname = "%s_%s" % (hres.rstrip(), header_strings[i]) row.append(colname) if all_props: # Add header for the other CT-level properties: all_prop_names = self.getPropertyNames() row += all_prop_names csv_writer.writerow(row) # Now for each ligand write out a row representing the # on and off bits: for ilig, (lig_id, title) in enumerate( zip(self.ligand_ids, self.ligand_titles)): row = [title] for res_id in self.res_ids: for itype in range(len(self.INTERACTION_NAMES)): atoms_str = self.tracker.get((res_id, lig_id, itype), '') row.append(atoms_str) if all_props: st_props = self.ligand_props[ilig] for propname in all_prop_names: row.append(st_props.get(propname, "")) csv_writer.writerow(row)
[docs] def __len__(self): """ Return the length of the fingerprint list """ return len(self.fp_list)
def __getitem__(self, idx): """ Return the fingerprint given by 'idx'. If this is an integer then return the idx'th item. If this is a string then treat it as an ligand ID """ if type(idx) == type(1): return self.fp_list[idx] else: return self.fp_map[idx] def __iter__(self): """ An iterator for the fingerprints """ for fp in self.fp_list: yield fp
[docs] def getInteractionMatrix(self, which_interaction): """ Returns a tuple containing: i) matrix of 0s and 1s for the specified interaction. The matrix is only generated between the first and last residue which had an interaction with any ligand 2) An array of length N where each element is the total number of bits turned on across all ligands the current interaction and N is the number of residues which have an interaction 3) An array of length M where each element is the number of bits turned on for the i'th ligand across all residues and M is the number of Ligands Returns None, None, None if no interactions were found. """ num_ligands = len(self.ligand_ids) if self.max_res is None or self.min_res is None: return None, None, None num_residues = (self.max_res - self.min_res) + 1 matrix = numpy.zeros((num_ligands, num_residues), numpy.double) res_freq = numpy.zeros(num_residues, numpy.double) lig_freq = numpy.zeros(num_ligands, numpy.double) # Figure out the bit offset. This will be used to determine # which interaction is being plotted: bit_offset = self.INTERACTION_NAMES.index(which_interaction) if bit_offset is None: raise Exception("Unknown interaction type %s" % which_interaction) res_index = self.min_res * self.NUM_BITS_PER_RESIDUE for ires in range(self.min_res, self.max_res + 1): for ilig in range(len(self.ligand_ids)): fp = self.fp_list[ilig] if fp.positionOf(res_index + bit_offset) >= 0: matrix[ilig, ires - self.min_res] = 1.0 res_freq[ires - self.min_res] += 1 lig_freq[ilig] += 1 res_index += self.NUM_BITS_PER_RESIDUE return (matrix, res_freq, lig_freq)
[docs] def getFingerprintString(self, ligand_index, residue_index): """ Returns a string which summarizes the interactions between between ligand 'ligand_index' and residue 'residue_index' """ fp = self.fp_list[ligand_index] ret_list = [] res_index = residue_index * self.NUM_BITS_PER_RESIDUE if fp.positionOf(res_index) >= 0: ret_list.append("Cont.") if fp.positionOf(res_index + self.BACKBONE_POS) >= 0: ret_list.append("Back.") if fp.positionOf(res_index + self.SIDECHAIN_POS) >= 0: ret_list.append("Side.") if fp.positionOf(res_index + self.POLAR_POS) >= 0: ret_list.append("Pol.") if fp.positionOf(res_index + self.HYDROPHOBIC_POS) >= 0: ret_list.append("Hyd.") if fp.positionOf(res_index + self.ACCEPTOR_POS) >= 0: ret_list.append("Acc.") if fp.positionOf(res_index + self.DONOR_POS) >= 0: ret_list.append("Don.") if fp.positionOf(res_index + self.AROMATIC_POS) >= 0: ret_list.append("Aro.") if fp.positionOf(res_index + self.CHARGED_POS) >= 0: ret_list.append("Chg.") if len(ret_list): return ",".join(ret_list) else: return ""
[docs] def getPropertyNames(self): """ Returns sorted list of property names. :return: ligand properties :rtype: list[str] """ all_prop_names = set() for st_props in self.ligand_props: all_prop_names.update(list(st_props)) return sorted(all_prop_names)
[docs]def chm_sparse_bitset_from_string(on_string): """ Convert the on_string to a canvas fingerprint :param on_string: ',' or ' ' separated string of keys whose bits should be set :type on_string: `str` :return: finger print bitset :rtype: `canvas.ChmSparseBitset` """ # CANVAS-5675 suggested workaround for bad bit set positionOf results # when an empty string is used. if not on_string: return canvas.ChmSparseBitset() return canvas.ChmSparseBitset.fromString(on_string)