Source code for schrodinger.protein.remediate

"""
Module for remediating names in PDB structures - converting PDBv2 residue and
atom names to PDBv3 convention.

Used by:
  psp-src/python/modules/PXfiles.py
  mmshare/python/scripts/residue_scanning_backend.py

Copyright Schrodinger, LLC. All rights reserved.

"""

import json
import os
import re
import sys

from schrodinger import structure
from schrodinger.infra import mm

atom_exch = None


[docs]def init(): global atom_exch if atom_exch: return with open(os.path.join(os.path.dirname(__file__), "remediate.json")) as fh: d = json.load(fh) atom_exch = d["atom_exch"] atom_exch.update(d["schrod_atom_exch"])
init()
[docs]def remediate_ct(ct): if mm.M2IO_PDB_FORMAT_VERSION in list(ct.property): if len(ct.property[mm.M2IO_PDB_FORMAT_VERSION]) > 0 and ct.property[ mm.M2IO_PDB_FORMAT_VERSION][0] == "3": # Already remediated. return # Fix residue names. for residue in ct.residue: if residue.pdbres in [' A ', ' C ', ' G ', ' T ']: convert = True for atom in residue.atom: if atom.pdbname in [' O2*', ' O2\'']: # It's RNA, not DNA. convert = False break if convert: new_pdbres = ' D' + residue.pdbres[2:] for atom in residue.atom: atom.pdbres = new_pdbres # Fix atom names. for atom in ct.atom: pdbres = atom.pdbres[0:3] pdbname = atom.pdbname if pdbres in [' DA', ' DC', ' DG', ' DT', ' A', ' C', ' G', ' U']: # Replace any * with '. pdbname = pdbname.replace('*', '\'') #--make any left-justified residue names right-justified------------------ if re.match(r'([a-zA-Z]) ', pdbres): pdbres = re.sub(r'(.)\s\s', r' \g<1>', pdbres) elif re.match(r'([a-zA-Z][a-zA-Z]) ', pdbres): pdbres = re.sub(r'(..)\s ', r' \g<1>', pdbres) #------------------------------------------------------------------------- identity = pdbname + ' ' + pdbres if identity in atom_exch: new_identity = atom_exch[identity] pdbname = new_identity[0:4] pdbres = new_identity[5:] pdbres += ' ' #if pdbname != atom.pdbname: # print 'Changing atom name |%s| to |%s| for residue %s.' % (atom.pdbname,pdbname,pdbres) #if pdbres != atom.pdbres: # print 'Changing residue name |%s| to |%s|.' % (atom.pdbres,pdbres) atom.pdbname = pdbname atom.pdbres = pdbres return
if __name__ == '__main__': input = sys.argv[1] output = sys.argv[2] ct = structure.StructureReader.read(input) remediate_ct(ct) ct.write(output)