Source code for schrodinger.protein.captermini

"""
Module for capping uncapped terminal residues in a protein structure by
adding NME or ACE caps as appropriate. Also has functionality for adding a
C-terminal oxygen in places where this is missing.

Usage for capping: capped_st = cap_termini(input_st)

Usage for adding C-terminal oxygens: capped_st = add_terminal_oxygens(input_st)

Copyright Schrodinger, LLC. All rights reserved.

"""

from schrodinger import structure
from schrodinger.protein import sequence
from schrodinger.structutils import build
from schrodinger.structutils.interactions import hbond

INSCODES = [
    ' ', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
]

DONTCAP, CAPC, CAPN, CAPBOTH = list(range(4))


[docs]def count_atom_hbonds(st, atom): """ Return the number of hydrogens bonds that the given atom is involved in. """ return len(hbond.get_hydrogen_bonds(st, [atom]))
[docs]def res_can_be_capped(res): """ Determines whether a fragment is able to be capped. Returns CAPN if specified residue can be N-capped, CAPC if C-capped; DONTCAP if it can't be capped. """ # A PDB residue has PDB atom names of: # " CA " # " N " # " C " # " O " # Find it if this residue has a free N or C terminal if not res.getAlphaCarbon() or not res.getBackboneOxygen(): # Not a standard residue return DONTCAP nitrogen = res.getBackboneNitrogen() carbonyl_c = res.getCarbonylCarbon() if not nitrogen or not carbonyl_c: # Not a standard residue return DONTCAP n_bonded = False for neighbor in nitrogen.bonded_atoms: if neighbor.pdbname == " C ": # N is bonded to another residue n_bonded = True c_bonded = False for neighbor in carbonyl_c.bonded_atoms: if neighbor.pdbname in [" N ", " N1 "]: # C is bonded to another residue c_bonded = True # Valid residue, contains alpha-carbon, nitrogen, and C=O: if not c_bonded and not n_bonded: # Both terminals are un-capped return CAPBOTH elif not c_bonded: # C terminal return CAPC # doesn't need to be capped elif not n_bonded: # N terminal return CAPN else: return DONTCAP # residue does not need to be capped
def _find_hydrogen_to_replace(st, atom): """ Return atom number of a hydrogen bound to atom. If no hydrogens are present, perform htreat, and return one of the new hydrogens while deleting all other added hydrogens (if more than 1 was added). Return None if can't find any after adding hydrogens either. """ # Ev:88825 If the nitrogen has a positive charge, neutralize it, so # that we don't end up with N+ and 3 Hydrogens if atom.atomic_number == 7 and atom.formal_charge == 1: atom.formal_charge = 0 if len(atom.bond) > 3: # Delete the extra hydrogen (if this atom has hydrogens): for n in atom.bonded_atoms: if n.atomic_number == 1: st.deleteAtoms([n.index]) # NOTE: The <nitrogen> atom object will automatically # renumber itself in the case that the deleted atom # index is lower than its own. break # Add hydrogens to the atom (in case they are missing), otherwise we # won't have a hydrogen to replace: build.add_hydrogens(st, atom_list=[int(atom)]) # Search again and return the highest numbered hydrogen PYTHON-2797 hydrogens = [at2 for at2 in atom.bonded_atoms if at2.atomic_number == 1] if hydrogens: # In practice, seems like the the last atom in the "hydrogens" list # is always highest-numbered, but just in case, we do the sort: highest_h = max(hydrogens, key=lambda at: at.index) return highest_h # For some reasons still no hydrogens. Most often this means that this # residue already has some kind of a cap (e.g. " OXT"). return None def _increment_inscode(inscode): """ Returns next inscode above <inscode> """ # Ev:64830 try: return INSCODES[INSCODES.index(inscode) + 1] except: raise Exception("Could not determine good INS code for cap residue") ############################################################################### ##### Class for adding NME and ACE caps to uncapped protein termini ####### ###############################################################################
[docs]class CapTermini:
[docs] def __init__(self, st, verbose=False, frag_min_atoms=150): """ Add caps to uncapped terminal residues in the input structure 'st'. Returns a list of residues capped frag_min_atoms - peptide fragments with less than this number of atoms will not be capped (default 150). Set to 0 to cap all fragments. """ self.verbose = verbose self._frag_min_atoms = frag_min_atoms self._capped_residues = [] # for self.cappedResidues() self._cap_residues = [] # for self.capResidues() self._analyzed_residues = [] self._output_st = None # Save the original atom numbers (for debugging): for atom in st.atom: atom.property['i_ppw_original_index'] = atom.index # Make a list of residue names in use in the structure: # list of [chain, resname, inscode] self.used_residues = [] for res in st.residue: resid = [res.chain, res.resnum, res.inscode] self.used_residues.append(resid) # Go through all sequences, capping each one. any_capped = self.capSequences(st) if any_capped: # If any sequence was capped, reorder the CT by connectivity # (Originally needed for Prime, Ev:58331) st = build.reorder_protein_atoms_by_sequence(st) self._output_st = st
[docs] def capSequences(self, st): """ Cap both ends of each sequence in the given structure. """ c_ca_atoms = [] n_ca_atoms = [] for seqi, seq in enumerate(sequence.get_structure_sequences(st)): seq_atoms = seq.getAtomIndices() if len(seq_atoms) < self._frag_min_atoms: # Skip small fragments (ligands) continue # Find the termini to cap: for res in seq.residue: res_type = res_can_be_capped(res) if res_type == DONTCAP: continue # Save an atom object from the residue, as it will be # automatically re-numbered when other caps are added. res_ca = res.getAlphaCarbon() if res_type == CAPC or res_type == CAPBOTH: c_ca_atoms.append(res_ca) if res_type == CAPN or res_type == CAPBOTH: n_ca_atoms.append(res_ca) # Ev:64830 In order for resname & inscodes for the caps # to be correct, must cap C termini of all fragments before # capping any N termini. any_capped = False for res_ca in c_ca_atoms: res = res_ca.getResidue() if self.verbose: print('CapTermini: Attempting to C cap sequence %i (%i atoms)' % \ (seqi, len(seq_atoms))) self._addCCap(res) any_capped = True for res_ca in n_ca_atoms: res = res_ca.getResidue() if self.verbose: print('CapTermini: Attempting to N cap sequence %i (%i atoms)' % \ (seqi, len(seq_atoms))) self._addNCap(res) any_capped = True return any_capped
[docs] def outputStructure(self): return self._output_st
[docs] def cappedResidues(self): """ Returns residue strings for residues that were capped. """ return self._capped_residues
[docs] def capResidues(self): """ Returns residue strings for the added cap residues. """ return self._cap_residues
[docs] def findOxygenToReplace(self, st, c_atom): """ Check whether c_atom is bound to 2 terminal oxygens. If so, determine which one to replace with a cap, and return its atom index. Return None if can't find 2 terminal oxygens. """ # Sometimes there is an oxygen (often " OXT") that is already capping # the residues. If this is the case replace it. Since in this case # the " OXT" and the " O " oxygens are equivalent (hydoxylate), # replace the one which is trans (or the one that make no h-bonds). ca_atom = None terminal_oxygens = [] for n in c_atom.bonded_atoms: # if an oxygen is protonated, remove that hydrogen if n.element == "O" and len(n.bond) == 2: h_atom = [a for a in n.bonded_atoms if a.element == "H"] if len(h_atom) > 0: st.deleteAtoms(h_atom) # terminal oxygens if n.element == "O" and len(n.bond) == 1: terminal_oxygens.append(n.index) # backbone C-alpha if n.pdbname == " CA ": ca_atom = n # Ev:69692, If there is an OXT (O-) terminal atom, replace it: # Ev:120504 Sometimes the " OXT" hydrgen is double-bonded, # and sometimes the names " OXT" and " O " were flipped. if len(terminal_oxygens) == 2: ox1 = terminal_oxygens[0] ox2 = terminal_oxygens[1] ox1_hbonds = count_atom_hbonds(st, ox1) ox2_hbonds = count_atom_hbonds(st, ox2) replace_ox = None other_ox = None if ox2_hbonds > ox1_hbonds: replace_ox = ox1 other_ox = ox2 print( "Replacing oxygen %i because it makes makes fewer H-bonds" % replace_ox) elif ox1_hbonds > ox2_hbonds: replace_ox = ox2 other_ox = ox1 print( "Replacing oxygen %i because it makes makes fewer H-bonds" % replace_ox) else: # Replace the oxygen that is trans: n_atom = None for n in ca_atom.bonded_atoms: if n.pdbname == " N ": n_atom = n break # Figure out which oxygen has the fewest angle: torsion1 = abs(st.measure(n_atom, ca_atom, c_atom, ox1)) torsion2 = abs(st.measure(n_atom, ca_atom, c_atom, ox2)) if torsion1 > torsion2: replace_ox = ox1 other_ox = ox2 else: replace_ox = ox2 other_ox = ox1 print("Replacing oxygen %i because it is trans" % replace_ox) # Make sure this bond is single-order, and the other is double: st.getBond(c_atom, replace_ox).order = 1 st.getBond(c_atom, other_ox).order = 2 st.atom[other_ox].formal_charge = 0 # Fix for Ev:124473 st.atom[other_ox].retype() # Fix for Ev:124473 st.atom[other_ox].pdbname = " O " # make sure correct name return st.atom[replace_ox] return None
def _getLowerName(self, residue): """ Return resnum & inscode corresponding with the residue that would be lower in sequence than the specified residue. """ # Ev:64830 chain = residue.chain resnum = residue.resnum # Name cap residue as <resnum-1:' '> if possible # If not, name it <resnum-1:A>, <resnum-1:B>, etc. new_inscode = ' ' # Keep incrementing inscode until OK: while True: newresid = [chain, resnum - 1, new_inscode] if newresid not in self.used_residues: return newresid # new_inscode is already in use new_inscode = _increment_inscode(new_inscode) def _getHigherName(self, residue): """ Return resnum & inscode corresponding to a residue which would be one higher in sequence than the input residue. """ # Ev:64830 chain = residue.chain resnum = residue.resnum # Name the cap residue <resnum:A> or <resnum:B>, etc. new_inscode = _increment_inscode(residue.inscode) # Keep incrementing inscode until OK: while True: newresid = [chain, resnum, new_inscode] if newresid not in self.used_residues: return newresid # new_inscode is already in use new_inscode = _increment_inscode(new_inscode) def _addCCap(self, residue): """ Add a NMA residue to the ' C ' atom of the specified residue """ st = residue._st # Find the N atom of the terminal: carbon = residue.getCarbonylCarbon() if not carbon: raise Exception("No ' C ' in terminal residue!") if self.verbose: i = carbon.property['i_ppw_original_index'] print("CapTermini: Capping C termini: %s, atom %s" % (str(residue), i)) # Figure out which hydrogen to replace: replace_atom = self.findOxygenToReplace(st, carbon) if not replace_atom: replace_atom = _find_hydrogen_to_replace(st, carbon) if not replace_atom: print( "WARNING captermini: No hydrogen to replace with NMA fragment on C atom %s" % int(carbon)) print(" Skipping the group") return # Figure out what resnum, inscode to give to the new cap: chain, new_resnum, new_inscode = self._getHigherName(residue) capres, renumber_map = self.attachCap(residue, carbon, replace_atom, 'NMA') capres.chain = chain capres.resnum = new_resnum capres.inscode = new_inscode # NOTE: The <carbon> atom object will automatically renumber itself # in the case that the deleted atom index is lower than its own. # adjust the omega torsion (deviant definition of omega) self.adjustDihedral(st, residue.getBackboneOxygen(), carbon, capres.getBackboneNitrogen(), capres.getAlphaCarbon()) self.resCapped(residue, capres)
[docs] def adjustDihedral(self, st, atom1, atom2, atom3, atom4): """ Adjust dihedral between the original residue and the cap to 0 degrees. """ if atom1 and atom2 and atom3 and atom4: st.adjust(0, atom1, atom2, atom3, atom4) else: # Should never happen, unless there are other bugs print('ERROR captermini: Could not adjust dihedral for bond:', (atom1, atom2, atom3, atom4))
[docs] def resCapped(self, residue, capres): """ Record this capping in the internal lists. """ self._capped_residues.append(str(residue)) self._cap_residues.append(str(capres)) print("CapTermini: Added %s Cap: %s" % (capres.pdbres.strip(), str(capres))) capresid = [capres.chain, capres.resnum, capres.inscode] self.used_residues.append(capresid)
[docs] def attachCap(self, residue, fromatom, replace_atom, fragname): """ Attaches the specified fragment and returns the new Residue object """ st = residue._st # We must determine what atoms were added in this step. # We will do this by marking each existing atom with a property for atom in st.atom: atom.property['b_capres_existed'] = True # Save the residue info for the residue (atom numbers may change): chain = residue.chain resnum = residue.resnum inscode = residue.inscode # Break any zero-order bonds to replace_atom (Ev:87195): delete_bonds_to = [ bond.atom2.index for bond in replace_atom.bond if bond.order == 0 ] for atom2 in delete_bonds_to: st.deleteBond(replace_atom, atom2) renumber_map = build.attach_fragment(st, fromatom.index, replace_atom.index, 'peptide_caps', fragname) # The new fragment will have the same residue name and number but # different resname: capres_atoms = [] for atom in st.atom: if 'b_capres_existed' not in atom.property: # Make sure we don't include atoms from <residue> capres_atoms.append(atom.index) else: del atom.property['b_capres_existed'] cap_res = structure._Residue(st, resnum, inscode, chain, capres_atoms) return cap_res, renumber_map
def _addNCap(self, residue): """ Add an ACE residue to " N " atom of the specified residue """ st = residue._st # Find the N atom of the terminal: nitrogen = residue.getBackboneNitrogen() if not nitrogen: raise Exception("No ' N ' in terminal residue!") if self.verbose: i = nitrogen.property['i_ppw_original_index'] print("CapTermini: Capping N termini: %s, atom %s" % (str(residue), i)) # Figure out which hydrogen to replace: replace_atom = _find_hydrogen_to_replace(st, nitrogen) if not replace_atom: print( "WARNING captermini: No hydrogen to replace with ACE fragment on N atom %s" % int(nitrogen)) print(" Skipping the group") return # Figure out what resnum, inscode to give to the new cap: chain, new_resnum, new_inscode = self._getLowerName(residue) capres, renumber_map = self.attachCap(residue, nitrogen, replace_atom, 'ACE') capres.chain = chain capres.resnum = new_resnum capres.inscode = new_inscode # NOTE: The <nitrogen> atom object will automatically renumber itself # in the case that the deleted atom index is lower than its own. # Adjust the charges of the nitrogen to which the cap is attached, # in case it was set to something else previously: nitrogen.partial_charge = 0 nitrogen.solvation_charge = 0 st.retype() # Make sure all MacroModel types are correct # adjust the omega torsion (deviant definition of omega) self.adjustDihedral(st, residue.getAlphaCarbon(), nitrogen, capres.getCarbonylCarbon(), capres.getBackboneOxygen()) self.resCapped(residue, capres)
[docs]def cap_termini(st): """ Cap the termini on the specified st Function interface for CapTermini class """ return CapTermini(st).outputStructure()
[docs]def add_terminal_oxygens(st, frag_min_atoms=150): """ Add OXT oxygen to the C-terminal of each poly-peptide chain. A hydrogen will first be added to the residue and converted to an oxygen. The bond length is not adjusted, and a minimization would be in order to fix this. :param st: Structure to add oxygens to :type st: Structure :param frag_min_atoms: Minimal atoms required for fragment to be considered a poly-peptide chain :type frag_min_atoms: int :return: List of capped residues :rtype: List[_Residue] """ capped_residues = [] for seq in sequence.get_structure_sequences(st): seq_atoms = seq.getAtomIndices() if len(seq_atoms) < frag_min_atoms: # Skip small fragments (ligands) continue for residue in seq.residue: res_type = res_can_be_capped(residue) if res_type not in {CAPC, CAPBOTH}: continue carbon = residue.getCarbonylCarbon() if not carbon: continue # Find hydrogen to transform to oxygen hxt_atom = _find_hydrogen_to_replace(st, carbon) if hxt_atom is None: continue hxt_atom.pdbname = ' OXT' hxt_atom.atomic_number = 8 hxt_atom.formal_charge = -1 oxygen = residue.getBackboneOxygen() if oxygen is not None: color = oxygen.color.rgb_float else: # Red-ish default color color = (0xff, 0x2e, 0x2e) hxt_atom.setColorRGB(*color) hxt_atom.retype() capped_residues.append(residue) return capped_residues
# For debugging purposes: if __name__ == '__main__': import sys verbose = ('-v' in sys.argv) st = structure.Structure.read(sys.argv[1]) capter = CapTermini(st, verbose=verbose, frag_min_atoms=0) st = capter.outputStructure() capped_residues = capter.cappedResidues() print('Capped residues:', capped_residues) print('Capped file:', sys.argv[2]) st.write(sys.argv[2]) # EOF