Source code for schrodinger.application.prime.primefix

"""
Functions and classes for Prime molecule preparation.

It is deprecated as starting with Suite2008, Prime no longer needs structure
"pre-fixing".

The wrapper for this module is in: `$SCHRODINGER/mmshare-v*/python/scripts/primefix.py`

Copyright Schrodinger, LLC. All rights reserved.

"""

# Contributors: Matvey Adzhigirey, Woody Sherman

import sys

import schrodinger.protein.findhets as findhets
import schrodinger.structure as structure
import schrodinger.utils.log as log

logger = log.get_output_logger("schrodinger.application.prime.primefix")
logger.setLevel(log.WARNING)
#logger.setLevel(log.DEBUG)

# Het residue names that are to be excluded from X01, X02... renaming
excluded_hets = ["NAD", "NAG", "SO4"]

alkali_metals = [3, 11, 19, 37, 55, 87]
alkaline_earth_metals = [4, 12, 20, 38, 56, 88]
lanthanide_series = [
    21, 39, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71
]
actinide_series = [
    89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103
]
transition_metals = [
    22, 23, 24, 25, 26, 27, 28, 29, 30, 40, 41, 42, 43, 44, 45, 46, 47, 48, 72,
    73, 74, 75, 76, 77, 78, 79, 80, 104, 105, 106, 107
]
metalloids = [32, 33, 51, 52, 84, 85]
other_metals = [13, 31, 49, 50, 81, 82, 83]

all_metals = alkali_metals + alkaline_earth_metals + lanthanide_series + actinide_series + transition_metals + metalloids + other_metals


[docs]def is_metal(atom): """Returns True if the supplied atomic_number is for a metal. """ return atom.atomic_number in all_metals
# Dictionary of residue names to give to the metals after breaking bonds: metal_pdbnames = { 11: ' NA ', # NA Sodium +1 12: ' MG ', # MG Magnesium +2 19: ' K ', # K Potassium +1 20: ' CA ', # CA Calcium +2 25: ' MN ', # MN Manganese +2, +3, +4, +5, +6, +7 26: ' FE ', # FE Iron +2 +3 27: ' CO ', # CO Cobalt +2 +3 28: ' NI ', # NI Nickel +2 +3 29: ' CU ', # CU Copper +1 +2 +3 30: ' ZN ', # ZN Zinc +2 48: ' CD ', # CD Cadmium +2 No MacroModel parameters 80: ' HG ', # HG Mercury +1 +2 No MacroModel parameters } # Dictionary that defines possible ionization states for metals: # Key is the atomic number. metal_charges = { 11: [1], # NA Sodium +1 12: [2], # MG Magnesium +2 19: [1], # K Potassium +1 20: [2], # CA Calcium +2 25: [2, 3, 4, 5, 6, 7], # MN Manganese +2, +3, +4, +5, +6, +7 26: [2, 3], # FE Iron +2 +3 27: [2, 3], # CO Cobalt +2 +3 28: [2, 3], # NI Nickel +2 +3 29: [1, 2, 3], # CU Copper +1 +2 +3 30: [2] # ZN Zinc +2 # 48:[2] CD Cadmium +2 No MacroModel parameters # 80:[1, 2] HG Mercury +1 +2 No MacroModel parameters } metal_default_charges = { 11: +1, # NA Sodium 12: +2, # MG Magnesium 19: +1, # K 20: +2, # CA 25: +2, # MN Manganese 26: +2, # FE Iron except in FeS clusters +3 27: +2, # CO Cobalt 28: +2, # NI Nickel 29: +3, # CU Copper 30: +2, # ZN Zinc 48: +2, # CD Cadmium 80: +2, # HG Mercury } ###############################################################################
[docs]def fix_formal_charge(atom): """ Ev:58049 For the specified atom, counts the number of bonds to it, and if that number doesn't not match with the formal charge for the atom, the formal charge will be adjusted. Assumes that hydrogens have been added already """ # FIXME: Limited support currenlty. I figured it would be easier to make a # table for now than it would be to put in all the general rules # Element; bond count; charge table = { 'C': { 1: None, 2: None, 3: +1, 4: 0, 5: None, }, 'N': { 2: -1, 3: 0, 4: +1, }, 'S': { 0: -2, 1: -1, 2: 0, }, } # Get the subtable for the element: subtable = table.get(atom.element) if not subtable: # Unsupported element: return # Count the number of bonds to the atom: num_bonds = 0 for bond in atom.bond: num_bonds += bond.order # Determine what the charge should be adjusted to: charge = subtable.get(num_bonds) if charge is not None and charge != atom.formal_charge: atom.formal_charge = charge atom.retype() msg = "Atom %i: %s (%i bonds) set charge to %i" %\ (int(atom), atom.element, num_bonds, charge) logger.debug(msg)
[docs]class FixForPrime: """ Wrapper class to house the combination of methods for fixing a structure for Prime. Raises RuntimeError if it can't determine a chain name, or general Exception if can't name a hydrogen because of PDB atom name field size restrictions. Call instance.fixAll() after creating the instance to drive all fixing methods. """
[docs] def __init__(self, st): self.st = st self.reassign_resnum = { } # atoms to reassign resnums of (key: atom number, value: any) self.chains_used = []
[docs] def fixMetals(self, fix_pdbnames=False): """ Sets bond order between all metals and other atoms to 0 and gives the metal a new residue number and name. Also sets the charges of metals to appropriate values. For Iron-sulfur clusters, sets the charge of iron to +3 and the charge of sulfurs to -1. Optionally corrects the metal pdbnames. Also every -SH that is within 3A of a metal is marked with a negative charge and gets its hydrogen removed. """ # List of atom numbers for metals: metal_atoms = [] for a in self.st.atom: if not is_metal(a): continue # Not a metal ai = int(a) metal_atoms.append(ai) if fix_pdbnames: # Give the metal a new PDB Name: if a.atomic_number in metal_pdbnames: a.pdbname = metal_pdbnames[a.atomic_number] else: msg = 'primefix: No pdbname for metal #%i (atom %i)' % ( a.atomic_number, ai) logger.warning(msg) # Break bonds between this metal and other atoms: bonds_broken = False bound_to_sulfur = False neighbors = [b.atom2 for b in a.bond] for n in neighbors: ni = int(n) bond = self.st.getBond(a, n) bond.order = 0 msg = "Atom %i: bond to atom %i was made zero-order (broken)." % ( ai, ni) logger.debug(msg) bonds_broken = True # Adjust the formal charge on the other atom: fix_formal_charge(n) # This is an Iron bound to Sulfur: if a.element == 'Fe' and n.element == 'S': bound_to_sulfur = True if bonds_broken: # it's a new residue now: #msg = "Atom %i: metal, broke all bonds to it" % ai #logger.debug(msg) # Assign it a unique resnum later: self.reassign_resnum[int(a)] = None if bound_to_sulfur: a.formal_charge = +3 # Set charge of Iron in FeS clusters to +3 a.atom_type = 80 # Set atom type to f3, corresponding to Iron +3. ##### ^--- taken care of by a.retype() in the future msg = "Atom %i: Fe, changed charge to +3" % ai logger.debug(msg) else: # Assign a default charge (if any): oldcharge = a.formal_charge valid_charges = metal_charges.get(a.atomic_number, []) default_charge = metal_default_charges.get(a.atomic_number) if (oldcharge not in valid_charges) and default_charge and ( default_charge != oldcharge): a.formal_charge = default_charge msg = "Atom %i: metal, changed charge to %i" % ( ai, default_charge) logger.debug(msg) # Give -SH sulfurs next to metals a negative charge: hydrogens_to_delete = [] for sulfur in self.st.atom: if sulfur.element != 'S': continue bonds = 0 hydrogen = None for n in sulfur.bonded_atoms: bonds += 1 if n.element == 'H': hydrogen = n # Skip Sulfurs that are already negatively charged: if bonds == 1 and sulfur.formal_charge == -1: continue # Skip Sulfurs with 2 bonds (no hydrogens): elif bonds == 2 and not hydrogen: continue for metalnum in metal_atoms: dist = self.st.measure(metalnum, sulfur) if dist <= 2.7: # Angstroms sulfur.formal_charge = -1 if hydrogen: hydrogens_to_delete.append(int(hydrogen)) msg = "Atom %i(S): Assign negative charge (near metal)" % int( sulfur) logger.debug(msg) break # Go to next Sulfur if hydrogens_to_delete: self.st.deleteAtoms(hydrogens_to_delete) self.st.retype() # Correct the Mmod atom types. self.st.retype() # sometimes needed twice
[docs] def fixSpecialResidues(self): """ Assigns PDB atom names, and charges for residue names: HOH, DOD, TIP, SPS ASP, GLU NMA, NME ACE HEM, HEC Changes residue name from DOD, TIP, or SPC to HOH """ changed_waters = [] for res in self.st.residue: pdbres = res.pdbres.strip() # According to specification PDB residue names should always contain 3 characters # PDB atom names, on the other hand, are 4 characters long # Fix waters - resname to 'HOH' and fix pdb atoms names if pdbres in ['HOH', 'DOD', 'TIP', 'SPC']: changed_water = False for a in res.atom: if pdbres != 'HOH': a.pdbres = 'HOH' changed_water = True if a.chain == " ": a.chain = "W" changed_water = True if a.element == "O" and a.pdbname != ' O ': a.pdbname = ' O ' changed_water = True elif a.pdbname == " H1 ": a.pdbname = "1H " changed_water = True elif a.pdbname == " H2 ": a.pdbname = "2H " changed_water = True if changed_water: changed_waters.append(res.resnum) # Remove Asp/Glu formal charge from the double bonded oxygen: elif pdbres == "ASP" or pdbres == "GLU": for a in res.atom: if a.pdbname in [' OE1', ' OE2']: # check atom name if len(a.bond ) == 1: # Only one neighbor - no hydrogen bound if a.bond[1].order == 2: # double bonded a.formal_charge = 0 msg = "Atom %i(O) of %s: set charge to %i" % ( int(a), pdbres, 0) logger.debug(msg) else: # single bonded a.formal_charge = -1 msg = "Atom %i(O) of %s: set charge to %i" % ( int(a), pdbres, 0) logger.debug(msg) # Fix terminal caps elif pdbres == 'NMA' or pdbres == 'NME': for a in res.atom: a.pdbres = 'NMA' if a.pdbname == ' C ': # check atom name a.pdbname = ' CA ' msg = "Atom %i(C) of NMA: set pdbname to ' CA '" % int( a) logger.debug(msg) break elif pdbres == 'ACE': for a in res.atom: if a.pdbname == ' CA ': # check atom name a.pdbname = ' CH3' msg = "Atom %i(C) of ACE: set pdbname to ' CH3'" % int( a) logger.debug(msg) break # Fix charges on HEM Nitrogens that were bonded to Fe. # Also change the PDB residue name of Iron to ' FE '. # Make Fe atoms of Hemes orange (so that prepwizard recognizes them as a hets) # At this point the bonds to FE are already broken elif pdbres in ['HEM', 'HEC']: for a in res.atom: if a.pdbname in [' FE ', 'FE ']: # Iron a.pdbres = ' FE ' self.reassign_resnum[int( a)] = None # Mark for resnum to be reassigned a.property['i_m_pdb_convert_problem'] = 4 # Mark orange msg = "Atom %i(Fe) of %s: set pdbres to ' FE '" % ( int(a), pdbres) logger.debug(msg) elif a.pdbname in [' N B', ' N D']: # Nitrogens needing 0 charge a.formal_charge = 0 # a.retype() # Reset the MMod atom type msg = "Atom %i(N) of %s: set charge to 0" % (int(a), pdbres) logger.debug(msg) elif a.pdbname in [' N A', ' N C']: # Nitrogens needing -1 charge a.formal_charge = -1 # a.retype() # Reset the MMod atom type msg = "Atom %i(N) of %s: set charge to -1" % (int(a), pdbres) logger.debug(msg) if changed_waters: msg = "Water residues %s: Changed chain:pdbres to W:HOH" % changed_waters logger.debug(msg)
[docs] def fixDNACaps(self): """ Prime atom parameters need to be different for terminal DNA bases than the atom parameters for the DNA bases that are in the center of the string. This is accomplished by giving terminal bases a new residue name. Ev:60484 XHL is the 5' cap (-OH bound to the phosphate of the base) POT is the 3' cap (a phosphate cap) Protocol: Find all XHL&POT caps and rename the cap and the residue bound to it """ cap_base_dict = {} # key: cap residue; value: base residue for cap_res in self.st.residue: cap_pdbres = cap_res.pdbres.strip() if cap_pdbres in ['HXL', 'POT']: cap_done = False for atom in cap_res.atom: if cap_done: break for neighbor in atom.bonded_atoms: base_pdbres = neighbor.pdbres.strip() if base_pdbres not in ['HXL', 'POT']: # Atom is part of the neighboring base: if len(base_pdbres) != 1: print( 'WARNING: Invalid residue (%s) bound to %s' % (base_pdbres, cap_pdbres)) else: base_res = neighbor.getResidue() cap_base_dict[cap_res] = base_res cap_done = True break # Do the actual retyping in a separate loop: for cap_res, base_res in cap_base_dict.items(): base_pdbres = base_res.pdbres.strip() if base_pdbres == 'HXL': new_pdbres = base_pdbres + '5T' else: # POT new_pdbres = base_pdbres + '3T' msg = "Residues %s[%s] and %s[%s] " % ( base_res, base_pdbres, cap_res, cap_res.pdbres.strip()) base_res.pdbres = new_pdbres cap_res.pdbres = new_pdbres cap_res.resnum = base_res.resnum cap_res.chain = base_res.chain # Just in case msg += "merged into a new residue %s[%s]" % (cap_res, new_pdbres) logger.debug(msg)
[docs] def reassignHets(self, het_atom_groups): """ Assign chain, pdbres, and resnum to het groups. Raises RuntimeError if it can't determine a chain name, or general Exception if can't name a hydrogen because of PDB atom name field size restrictions. """ # Determine an available chain: hetchain = None # or if taken, then 'Y', 'Z', 'W', 'V' back... preferred_het_chain = ['X', 'Y', 'Z', 'W', 'V', 'U', 'T', 'S', 'R', 'Q'] for c in preferred_het_chain: if c not in self.chains_used: hetchain = c break if not hetchain: raise RuntimeError( 'Could not find next available chain name for het groups') # Iterate over het groups: hetnum = 0 for het_atoms in het_atom_groups: hetnum += 1 #print "HET %i:" % hetnum, het_atoms unique_counter = {} # Iterate over atoms in a het group: for ai in het_atoms: a = self.st.atom[ai] # Set resname to X01, X02, etc: if hetnum < 10: pdbres = 'X0%i' % hetnum else: pdbres = 'X%i' % hetnum # Back up the original residue debugrmation: if not a.property.get('s_ppw_pdbres_bu' ): # first time running fix_for_prime() a.property['s_ppw_pdbres_bu'] = a.pdbres a.property['s_ppw_chain_bu'] = a.chain a.property['i_ppw_resnum_bu'] = a.resnum # Set the new residue information: a.chain = hetchain a.pdbres = pdbres a.resnum = hetnum #### Set unique PDB name for each atom in this residue ### # set unique_counter to 0 if it does not yet exist for a given element e = a.element if e in unique_counter: unique_counter[e] += 1 else: unique_counter[e] = 1 num = unique_counter[e] # assign new pdbname to each atom of a residue if num >= 100: if e == 'H' and num < 1000: new_pdbatom = "%iH%i" % (num[0], num[1:2]) else: # Non-hydrogen with >= 100 OR H with >= 1000 atoms: raise Exception( "Problem setting unique PDB atom name; # of element [%s] exeeds limits" % e) else: new_pdbatom = "%2s%-2s" % (e.upper(), str(num)) a.pdbname = new_pdbatom msg = "Atoms %s: assigned a new residue of %s:%i[%s]" % ( het_atoms, hetchain, hetnum, pdbres) logger.debug(msg)
[docs] def reassignResNums(self): """ Assigns new residue numbers as needed, making sure a residue number only appears in a chain once. self.reassign_resnum currently contains only a list of metals to which bonds were broken. """ # Find all residues that are in the structure that do NOT need to be reassigned: resnums_used = { } # key: chain; value: dict of [key resnum, value True if used] for a in self.st.atom: if not int(a) in self.reassign_resnum: chain = a.chain if chain not in resnums_used: # First time we see this chain resnums_used[chain] = {} resnums_used[chain][a.resnum] = True # Assign new residue number to atoms that were put into reassign_resnum # dict earlier. A residue number can occur in each chain only once. for ai in self.reassign_resnum: a = self.st.atom[ai] chain = a.chain # Find the first unused resnum in the chain: if chain in resnums_used: resnum = 0 while True: resnum += 1 if resnum not in resnums_used[chain]: a.resnum = resnum # Reassign the resnum for this atom msg = "Atom %i: assigned a new resnum of %i" % (ai, resnum) logger.debug(msg) resnums_used[chain][ resnum] = True # Update the list - this resnum is now used break else: # This is the only residue in the chain resnum = 1 a.resnum = resnum # Reassign the resnum for this atom resnums_used[chain] = { resnum: True } # Udate the list - this resnum is now used
[docs] def renumberByMolecule(self): """ Break up the structure into multiple structures - one molecule each Recombined the structures into one, adding a molecule at a time. The purpose of this is so that all atoms from each molecule appear together in the connection table. """ mol_sts = [] for mol in self.st.molecule: mol_sts.append(mol.extractStructure()) combined_st = mol_sts[0] for st in mol_sts[1:]: combined_st.extend(st) self.st = combined_st
[docs] def fixPdbresAndPdbnames(self): """ Justifies PDB residue name. Changes all PDB atom names to upper case. """ for res in self.st.residue: oldr = res.pdbres # Strip 4th char (if any), right justify, and append a space: pdbres = oldr.rjust(3)[:3] + ' ' if pdbres != oldr: # Setting pdbres will justify properly in r2006-1 release res.pdbres = pdbres msg = "%s: changed pdbres from '%s' to '%s'" % (res, oldr, pdbres) logger.debug(msg) # Capitalize the PDB atom names: changed_pdbnames = False for a in res.atom: pdbname = a.pdbname upper = pdbname.upper() if pdbname != upper: changed_pdbnames = True a.pdbname = upper if changed_pdbnames: msg = "%s: capitalized PDB atom names" % res logger.debug(msg) return
[docs] def fixHets(self): """ Calls reassignHets on identified het atoms. """ hets = findhets.find_hets(self.st, include_metals=False, include_hydrogens=True, excluded_hets=excluded_hets) # Assign resnum/pdbname/ to het atoms: self.reassignHets(hets) return
[docs] def fixAll(self): """ Driver method that makes all the fixes. """ self.fixPdbresAndPdbnames() self.fixMetals() self.fixSpecialResidues() self.fixDNACaps() self.fixHets() # Assign residue numbers to atoms in reassign_resnum dict: self.reassignResNums() self.renumberByMolecule() self.st.retype() # for some reason needs to be called twice Ev:51529 self.st.retype() # for some reason needs to be called twice Ev:51529 return
[docs]def fix_for_prime(st): """ Preferred way to use the FixForPrime class. Fixes structure.Structure for Prime calculations. This is the preferred API for scripts implementing primefix.py. 1. Assign a unique, single residue name, residue number, and chain name to all ligand residues. Make sure that new name doesn't conflict with amino acid resname. Back up the old residue name as s_ppw_resname_bu, s_ppw_resnum_bu 2. Left-justify the 3-character residue name. 3. Set pdbname to all upper case. 4. Delete all bonds to metals and assign formal charges to the metal and previously attached atoms. 5. Water molecules should have a residue name of "HOH " and pdb atom names " O ", "1H " and "2H ". Currently, residues TIP, TIP3, TIP4, SPC, and HOH and DOD are treated. 6. FeS clusters are fixed with the appropriate bonds anf formal charges. 7. Terminal caps are fixed (rename "NME " to "NMA ", pdbname " C " of NMA to " CA ", and pdbname " CA " of ACE to " CH3"). 8. Atoms from the same molecule are placed together in the connection table. NOTE: Prime can still fail if a residue is not in order of connectivity. If this is the case in your structure, it should be exported as a PDB file and then reimported as a Maestro file before running this script. """ ffp = FixForPrime(st) ffp.fixAll() return
if __name__ == '__main__': if len(sys.argv) < 3: print("Usage: primefix.py <input.mae> <output.mae>") sys.exit(0) infile = sys.argv[1] outfile = sys.argv[2] written = False for st in structure.StructureReader(infile): fix_for_prime(st) if not written: st.write(outfile) written = True else: st.append(outfile) #EOF