Source code for schrodinger.protein.nonstandard_residues

import copy
import os

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.job.util import hunt
from schrodinger.Qt import QtCore
from schrodinger.structutils import build
from schrodinger.structutils import smiles
from schrodinger.utils import fileutils
from schrodinger.utils import preferences

RES_DIR = os.path.join(hunt('mmshare', dir='data'), 'res')

# These are standard names for AA backbone atoms, as assigned by pdbname.py:
CA_NAME = " CA "
C_NAME = " C  "
N_NAME = " N  "
O_NAME = " O  "
OXT_NAME = " OXT"
BACKBONE_NAMES = (CA_NAME, N_NAME, C_NAME, O_NAME, OXT_NAME)

# 1-letter residue code property:
CODE_PROP = "s_biolum_residue_code"
LOCKED_PROP = "b_biolum_residue_locked"
ALIGNS_WITH_PROP = "s_biolum_aligns_with_code"

_residue_database = None


[docs]def get_residue_database(*, load_saved=True): """ Get the global nonstandard residue database. If there is an error loading the saved database file, an empty (default) database will be returned. """ global _residue_database if _residue_database is None: _residue_database = ResidueDatabase() if load_saved: dbfile = get_saved_db_file() try: _residue_database.readDatabaseFile(dbfile) except RuntimeError: pass return _residue_database
[docs]def get_default_user_dbfile(): """ Return the path to the default user DB file. :return: Default path to user DB file. :rtype: str """ data_dir = fileutils.get_directory_path(fileutils.USERDATA) return os.path.join(data_dir, 'custom_nsr.maegz')
[docs]def get_saved_db_file(): """ Return the path to the database file that is saved in Maestro's prefs or, if no preference is specified, a default location in the user's userdata directory., :return: Saved DB filepath to be used :rtype: str """ prefs = preferences.Preferences(preferences.SCRIPTS) prefs.beginGroup("non_standard_residues", toplevel=True) db_file = prefs.get('ab-database-file', None) if db_file is None: # No preference specified, use default custom NSR file. db_file = get_default_user_dbfile() return db_file
[docs]def strip_caps(st): """ Strip any ACE & NMA caps, and add an OXT cap. """ # Strip ACE and NMA capping groups (works with all built-in residues): residues = list(structure.get_residues_by_connectivity(st.molecule[1])) if len(residues) != 3: # Should never happen, but check just in case raise ValueError("Expecting 3 residues") st = residues[1].extractStructure(copy_props=True) c_atom = find_atom(st, C_NAME) # Add an OXT cap: oxt_atom = st.addAtom('O', 0.0, 0.0, 0.0, "red11") oxt_atom.pdbname = OXT_NAME oxt_atom.formal_charge = -1 # Add OXT to the same residue: oxt_atom.chain = c_atom.chain oxt_atom.resnum = c_atom.resnum oxt_atom.inscode = c_atom.inscode oxt_atom.pdbres = c_atom.pdbres build.connect(st, [oxt_atom.index], [c_atom.index]) # TODO add a function for adding an OXT cap to the captermini module, and # use it here and in peptide_descriptors.py return st
[docs]def find_atom(st, name): """ Find the atom with the given PDB name. None is returned if no such atom is present. """ for atom in st.atom: if atom.pdbname == name: return atom return None
[docs]def flip_isomer(st): """ Invert the isomerism of the alpha carbon. So D residue gets converted to L and vice versa. """ st = st.copy() ca_atom = find_atom(st, CA_NAME) n_atom = find_atom(st, N_NAME) c_atom = find_atom(st, C_NAME) mm.mmstereo_atom_invert_chirality(st.handle, ca_atom.index, n_atom.index, c_atom.index) return st
[docs]def generate_smiles(st, use_annotations=False): """ Generate the SMILES string for the given structure. :type use_annotations: bool :param use_annotations: Whether to use annotations instead of 3D geometry (set if the structure is 2D). """ if use_annotations: stereo = smiles.STEREO_FROM_ANNOTATION else: stereo = smiles.STEREO_FROM_GEOMETRY return smiles.SmilesGenerator(unique=True, stereo=stereo).getSmiles(st)
[docs]class ResidueDatabase(QtCore.QObject): """ A collection of AminoAcid objects. Included built-in residues. For now, just a collection of database-related functions. """ residuesChanged = QtCore.pyqtSignal() _built_in_residues = None
[docs] def __init__(self, db_file=None): super().__init__() if db_file is not None: self._readDatabaseFile(db_file) else: self.reset() self._amino_acid_dict = dict() self.residuesChanged.connect(self._amino_acid_dict.clear)
[docs] def getAminoAcid(self, name): """ Retrieve an amino acid by PDB residue name :param name: PDB residue name :type name: str :return: Amino acid object :rtype: Optional[AminoAcid] """ if not self._amino_acid_dict: aa_dict = dict() for aa in self.amino_acids: aa_dict[aa.name] = aa self._amino_acid_dict.update(aa_dict) return self._amino_acid_dict.get(name)
@property def amino_acids(self): return self._amino_acids @amino_acids.setter def amino_acids(self, value): self._amino_acids = list(value) self.residuesChanged.emit()
[docs] @classmethod def get_built_in_residues(cls): """ Return a list of built-in residues AminoAcid objects from the installation. """ if cls._built_in_residues: return cls._built_in_residues amino_acids = [] # NOTE: modified_amino_acids.bld is excluded, see SHARED-3979 for fname in ('peptide.bld', 'nonstandard_peptide.bld'): for st in structure.StructureReader(os.path.join(RES_DIR, fname)): # Strip ACE and NMA capping groups (works with all built-in residues): st = strip_caps(st) is_standard = (fname != 'nonstandard_peptide.bld') if not is_standard: res = st.atom[1].getResidue() name = res.pdbres.strip() if name == 'BAL': # "BALA", has a non-standard backbone continue aa = AminoAcid(st, built_in=True, standard=is_standard) amino_acids.append(aa) cls._built_in_residues = amino_acids return amino_acids
[docs] def readDatabaseFile(self, db_file): """ Read the specified database file, and return a ResidueDatabase (which is basically a list of AminoAcid objects, including built-in residues). Raises RuntimeError on failure. """ # Make a copy of the built-in AminoAcid objects: amino_acids = copy.deepcopy(self.get_built_in_residues()) builtin_aa_names = set(aa.name for aa in amino_acids) if os.path.exists(db_file): try: for st in structure.MaestroReader(db_file): aa = AminoAcid(st) if aa.name not in builtin_aa_names: amino_acids.append(aa) except Exception as err: raise RuntimeError("Failed to read: %s\n\n%s" % (db_file, err)) # Set the "Mutate to" checkboxes to the saved values: mut_to_file = db_file + '.mutateto' if os.path.isfile(mut_to_file): try: with open(mut_to_file) as fh: mut_to_enabled = fh.read().split() except Exception as err: raise RuntimeError("Failed to read: %s\n\n%s" % (mut_to_file, err)) else: mut_to_enabled = [] for aa in amino_acids: aa.mutate_to = (aa.name in mut_to_enabled) self.amino_acids = amino_acids
[docs] def reset(self): self.amino_acids = copy.deepcopy(self.get_built_in_residues())
[docs] def saveDatabase(self, amino_acids, db_file=None): # emits residuesChanged self.amino_acids = amino_acids if db_file is None: db_file = get_saved_db_file() self.exportDatabase(self.amino_acids, db_file)
[docs] @staticmethod def exportDatabase(amino_acids, db_file): """ Export the amino acids database. :param amino_acids: Amino acids to export. :type amino_acids: list[AminoAcid] :param db_file: Database file path. :type db_file: str """ # Write a file with names of amino acids whose 'mutate_to' was set. mutate_to_set_aa_names = [aa.name for aa in amino_acids if aa.mutate_to] mutate_to_file_name = f'{db_file}.mutateto' with open(mutate_to_file_name, 'w') as fh: fh.write(" ".join(mutate_to_set_aa_names)) if len(amino_acids) > 0: with structure.MaestroWriter(db_file) as writer: for aa in amino_acids: writer.append(aa.st) else: # Create an empty file. open(db_file, 'w').close()
[docs]class AminoAcid(object): """ Class representing an amino acid (residue) row in the table. """
[docs] def __init__(self, st, built_in=False, standard=False): self.st = st self.built_in = built_in self.standard = standard # Whether one of 20 standard AAs self.st_changed() self.mutate_to = False
@property def name(self): """ AminoAcid.name will return the PDB residue name for this AA. """ return self.st.atom[1].pdbres.strip() @name.setter def name(self, value): """ Setting AminoAcid.name will update the CT. """ if self.built_in: raise RuntimeError("Built-in residues can not be modified") for atom in self.st.atom: atom.pdbres = value @property def code(self): """ AminoAcid.code will return the 1-letter code for this residue. """ if self.built_in: return self.st.atom[1].pdbcode else: return self.st.property.get(CODE_PROP, "X") @code.setter def code(self, value): """ Setting AminoAcid.code will update the CT. """ if self.built_in: raise RuntimeError("Built-in residues can not be modified") self.st.property[CODE_PROP] = value @property def aligns_with(self): default = "" if self.built_in: return default return self.st.property.get(ALIGNS_WITH_PROP, default) @aligns_with.setter def aligns_with(self, value): if self.built_in: raise RuntimeError("Built-in residues can not be modified") if value and value not in structure.RESIDUE_MAP_1_TO_3_LETTER: raise ValueError(f"{value} is not a standard residue") self.st.property[ALIGNS_WITH_PROP] = value @property def description(self): """ AminoAcid.description will return the title of the CT. """ return self.st.title @description.setter def description(self, value): """ Setting AminoAcid.description will update the CT. """ if self.built_in: raise RuntimeError("Built-in residues can not be modified") self.st.title = value @property def locked(self): """ AminoAcid.code will return whether the residue is locked from editing. """ return self.st.property.get(LOCKED_PROP, False) @locked.setter def locked(self, value): """ Setting AminoAcid.locked will update the CT. """ if self.built_in: raise RuntimeError("Built-in residues can not be modified") self.st.property[LOCKED_PROP] = value
[docs] def st_changed(self): """ Must be called every time the structure is modified. Will update the SMILES string and the .isomer property. """ self.smiles = generate_smiles(self.st) self._determine_isomer()
def _determine_isomer(self): """ Determine whether this residue is L or D. """ # Since D/L is different from R/S, we need to strip the side-chain # before reading the chirality of the alpha carbon: # FIXME there may be a better way to do it, but this approach is fairly # simple and works with both 3D and 2D(with annotations) structures. copy_st = self.st.copy() ca_atom = find_atom(copy_st, CA_NAME) if not ca_atom: isomer = "None" else: # atoms at the base of the side-chain(s): sc_atoms = [] for atom in list(ca_atom.bonded_atoms): if atom.pdbname not in (N_NAME, C_NAME) and atom.element != 'H': sc_atoms.append(atom) if not sc_atoms or len(sc_atoms) > 1: # This residue has no side-chain (GLY) or has 2 "side-chains" isomer = "None" else: # Atom at the base of the side-chain (usually a beta carbon) first_sc_atom = sc_atoms[0] # Delete all bonds between the first side-chain atom # (usually beta carbon) to the rest of the side-chain: for atom in list(first_sc_atom.bonded_atoms): if atom != ca_atom: copy_st.deleteBond(first_sc_atom, atom) # This determine the stereochemistry of the backbone, ignoring # any side-chain atoms (except beta carbon): mol_st = ca_atom.getMolecule().extractStructure() pattern = generate_smiles(mol_st) if '@@' in pattern: isomer = 'D' elif '@' in pattern: isomer = 'L' else: isomer = 'None' self.isomer = isomer @property def identifier(self): """ Return an ID string for this residue, which includes the residue name, isomer, and description. """ istr = '(%s)' % self.isomer if self.isomer else '' return "%s%s; Description: %s" % (self.name, istr, self.description)
[docs] def is_editable(self): return not self.built_in and not self.locked
[docs]def filter_residues_mutating_to_custom(residues): """ :param residues: a list of residues :type residues: list[non_standard_residues.AminoAcid] :return: the elements of `residues` that are non-standard and for which "mutate to" has been enabled :rtype: list[non_standard_residues.AminoAcid] """ return [res for res in residues if res.mutate_to and not res.standard]
[docs]def get_db_residues(): """ :return: a list of residues from the non-standard residue database :rtype: list[non_standard_residues.AminoAcid] """ db = get_residue_database() return [res for res in db.amino_acids]
[docs]def prepare_res_st_for_prime(st): """ Create a copy of the specified residue structure that has been prepared for use in Prime. This includes removing the OXT group (-OH), and setting the title of the CT to the 3-letter residue name. :param st: a residue structure :type st: _structure.Structure :return: a copy of `st` that has been prepared for use in Prime :rtype: _structure.Structure """ st = st.copy() # Prime requires that the -OH cap not be present on the C terminal: oxt_atom = find_atom(st, " OXT") del_atoms = [oxt_atom.index] + [ atom.index for atom in oxt_atom.bonded_atoms if atom.element == 'H' ] st.deleteAtoms(del_atoms) build.add_hydrogens(st) # TODO Add a function to captermini module for removing an OXT cap, # and use it here and in peptide_descriptors.py # Prime requires that the title of the CT be equal to the # 3-letter code (instead of the description): st.title = st.atom[1].pdbres.strip() return st