Source code for schrodinger.protein.residue

# -*- coding: utf-8 -*-

import copy
import functools
import itertools
import types
import typing
import weakref
from enum import Enum

import decorator

from schrodinger import structure
from schrodinger.models import json
from schrodinger.protein import annotation
from schrodinger.protein import constants
from schrodinger.protein import nonstandard_residues
from schrodinger.protein import properties

_UNKNOWN_NAME = "Unknown"
_DEFAULT_SIMILARITY = 0.0
_HOMOLOGY_GROUPS = {
    "D": 1,
    "E": 1,
    "R": 2,
    "K": 2,
    "H": 2,
    "G": 3,
    "A": 3,
    "V": 3,
    "I": 3,
    "L": 3,
    "M": 3,
    "F": 4,
    "Y": 4,
    "W": 4,
    "S": 5,
    "T": 5,
    "N": 5,
    "Q": 5,
    "C": 6,
    "P": 7
}


[docs]def check_if_comparable(default_return=0): """ Decorator to return early from residue comparison methods. Returns `default_return` if: - the other residue is a gap - the residues are different types (e.g. protein/DNA or DNA/RNA) - either residue is an unknown residue """ @decorator.decorator def dec(func, self, other, *args, **kwargs): if (other.is_gap or type(self.type) is not type(other.type) or self.type.name == _UNKNOWN_NAME or other.type.name == _UNKNOWN_NAME): return default_return return func(self, other, *args, **kwargs) return dec
[docs]class ResidueChainKey(typing.NamedTuple): """ Key to partially identify a structured residue relative to its chain """ # TODO MSV-2379 account for items in WHResidue.d_hash resnum: int inscode: str
[docs]class ResidueKey(typing.NamedTuple): """ Key to partially identify a structured residue. Order and items based on `MaestroStructureModel._getKey` """ entry_id: int chain: str resnum: int inscode: str
[docs] def chainKey(self): return ResidueChainKey(self.resnum, self.inscode)
[docs]def get_matrix_value(matrix, first, second): """ Return a similarity matrix value for a specified pair of residues. """ if (first, second) in matrix: return matrix[(first, second)] if (second, first) in matrix: return matrix[(second, first)] return _DEFAULT_SIMILARITY
[docs]def box_letter(letter): """ Map an ASCII letter to the circled Unicode variant :param letter: ASCII letter to map :type letter: str :raises ValueError: if the input is not an ASCII letter """ codepoint = ord(letter) if 97 <= codepoint <= 122: offset = 9327 elif 65 <= codepoint <= 90: offset = 9333 else: raise ValueError(f"Could not map {letter}") return chr(codepoint + offset)
[docs]def get_structure_residue_chain_key(structure_residue): """ Creates residue key relative to entry and chain for structure residue. :return: (resnum, inscode) :rtype: tuple(int, str) """ # TODO MSV-2379 account for all items in WHResidue.d_hash return (structure_residue.resnum, structure_residue.inscode)
[docs]def get_residue_key(residue, entry_id, chain): """ Creates residue key for residue. :param residue: Residue :type residue: Residue :param entry_id: Entry ID :type entry_id: str or int :param chain: Chain name :type chain: str :return: The residue key containing entry_id, chain, resnum, and inscode :rtype: ResidueKey """ # TODO MSV-2379 account for all items in WHResidue.d_hash return ResidueKey(int(entry_id), chain, *residue.getChainKey())
[docs]def get_structure_residue_key(structure_residue, entry_id): """ Creates residue key for structure residue. :param structure_residue: Structure residue :type structure_residue: schrodinger.structure._Residue :param entry_id: Entry ID :type entry_id: str or int :return: The residue key containing entry_id, chain, resnum, and inscode :rtype: ResidueKey """ # TODO MSV-2379 account for all items in WHResidue.d_hash return ResidueKey(int(entry_id), structure_residue.chain, *get_structure_residue_chain_key(structure_residue))
[docs]def get_formatted_residue(res) -> str: """ Create the formatted residue string. :param res: residue :type res: residue.Residue :return: formatted string """ return f'{res.long_code}{res.resnum}{res.inscode.strip()}'
[docs]def get_formatted_residue_range(start_res, end_res) -> str: """ Create the residue range tooltip. :param start_res: starting residue :type start_res: residue.Residue :param end_res: ending residue :type end_res: residue.Residue :return: a formatted string to be used """ return f'{get_formatted_residue(start_res)} - {get_formatted_residue(end_res)}'
[docs]def order_contiguous(residues): """ Check if a list of residues is contiguous, and put them in contiguous order if they are not. :type residues: list :param residues: List of `schrodinger.structure._Residue` objects :rtype: list :return: List of `schrodinger.structure._Residue` objects in contiguous order, or None if the residues were not contiguous. """ if not residues: return [] ordered_residues = [residues.pop(0)] old_length = -1 # Continually cycle through, finding one connected residue each time, # until we find no more connected residues while residues and len(residues) != old_length: current_length = len(residues) old_length = current_length for index in range(current_length): if ordered_residues[-1].isConnectedToResidue(residues[index]): # Connected to last residue in the ordered list ordered_residues.append(residues.pop(index)) break elif residues[index].isConnectedToResidue(ordered_residues[0]): # Connected to first residue in the ordered list ordered_residues.insert(0, residues.pop(index)) break if residues: # There remain some unconnected residues return None else: return ordered_residues
HELIX_PROPENSITY = Enum( 'HELIX_PROPENSITY', ['NoPropensity', 'Likely', 'Weak', 'Ambivalent', 'HelixBreaking']) BETA_STRAND_PROPENSITY = Enum( 'BETA_STRAND_PROPENSITY', ['NoPropensity', 'StrandBreaking', 'Ambivalent', 'StrandForming']) TURN_PROPENSITY = Enum( 'TURN_PROPENSITY', ['NoPropensity', 'TurnForming', 'Ambivalent', 'TurnBreaking']) HELIX_TERMINATION_TENDENCY = Enum( 'HELIX_TERMINATION_TENDENCY', ['NoTendency', 'HelixStarting', 'Ambivalent', 'HelixEnding']) SOLVENT_EXPOSURE_TENDENCY = Enum( 'SOLVENT_EXPOSURE_TENDENCY', ['NoTendency', 'Surface', 'Ambivalent', 'Buried']) STERIC_GROUP = Enum('STERIC_GROUP', ['NoSteric', 'Small', 'Ambivalent', 'Polar', 'Aromatic']) SIDE_CHAIN_CHEM = Enum('SIDE_CHAIN_CHEM', [ 'NoSideChainChem', 'AcidicHydrophilic', 'BasicHydrophilic', 'NeutralHydrophobicAliphatic', 'NeutralHydrophobicAromatic', 'NeutralHydrophilic', 'PrimaryThiol', 'IminoAcid' ]) RESIDUE_CHARGE = Enum('RESIDUE_CHARGE', ['Positive', 'Negative', 'Neutral']) # Color Block Tooltip Map. CB_TT_MAP = { HELIX_PROPENSITY.NoPropensity: "", HELIX_PROPENSITY.Likely: "helix-forming", HELIX_PROPENSITY.Weak: "weak helix-forming", HELIX_PROPENSITY.Ambivalent: "ambivalent", HELIX_PROPENSITY.HelixBreaking: "helix-breaking", BETA_STRAND_PROPENSITY.NoPropensity: "", BETA_STRAND_PROPENSITY.StrandBreaking: "strand-breaking", BETA_STRAND_PROPENSITY.Ambivalent: "ambivalent", BETA_STRAND_PROPENSITY.StrandForming: "strand-forming", TURN_PROPENSITY.NoPropensity: "", TURN_PROPENSITY.TurnForming: "turn-forming", TURN_PROPENSITY.Ambivalent: "ambivalent", TURN_PROPENSITY.TurnBreaking: "turn-breaking", HELIX_TERMINATION_TENDENCY.NoTendency: "", HELIX_TERMINATION_TENDENCY.HelixStarting: "helix-starting", HELIX_TERMINATION_TENDENCY.Ambivalent: "ambivalent", HELIX_TERMINATION_TENDENCY.HelixEnding: "helix-ending", SOLVENT_EXPOSURE_TENDENCY.NoTendency: "", SOLVENT_EXPOSURE_TENDENCY.Surface: "surface", SOLVENT_EXPOSURE_TENDENCY.Ambivalent: "ambivalent", SOLVENT_EXPOSURE_TENDENCY.Buried: "buried", STERIC_GROUP.Small: "small, non-interfering", STERIC_GROUP.Ambivalent: "ambivalent", STERIC_GROUP.Polar: "sticky polar", STERIC_GROUP.Aromatic: "aromatic", SIDE_CHAIN_CHEM.AcidicHydrophilic: "acidic, hydrophilic", SIDE_CHAIN_CHEM.BasicHydrophilic: "basic, hydrophilic", SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic: "neutral, hydrophobic, aliphatic", SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic: "neutral, hydrophobic, aromatic", SIDE_CHAIN_CHEM.NeutralHydrophilic: "neutral, hydrophilic", SIDE_CHAIN_CHEM.PrimaryThiol: "primary thiol", SIDE_CHAIN_CHEM.IminoAcid: "imino acid", RESIDUE_CHARGE.Positive: "positive", RESIDUE_CHARGE.Negative: "negative", RESIDUE_CHARGE.Neutral: "neutral", } # Non-standard Amino Acids Tooltip Map. NON_STD_AA_TT_MAP = { 'LYN': 'deprotonated LYS', 'ARN': 'deprotonated ARG', 'ASH': 'protonated ASP', 'GLH': 'protonated GLU', 'HID': 'δ-nitrogen protonated HIS', 'HIE': 'ε-nitrogen protonated HIS', 'HIP': 'δ and ε-nitrogen protonated HIS' } SSA_TT_MAP = { structure.SS_NONE: "None", structure.SS_LOOP: "Loop", structure.SS_HELIX: "Helix", structure.SS_STRAND: "Strand", structure.SS_TURN: "Turn", } DSSP_CODES = { "G": "3-turn helix", "H": "4-turn helix", "I": "5-turn helix", "T": "hydrogen bonded turn", "E": "extended strand in parallel and/or anti-parallel Beta sheet conformation", "B": "residue in isolated Beta-bridge", "S": "bend (non-hydrogen-bond based)", "C": "coil", }
[docs]class ElementType(object):
[docs] def __init__(self, short_code, long_code, name): self.short_code = short_code self.long_code = long_code self.name = name self.nonstandard = False
def __str__(self): return self.short_code def __repr__(self): res_type_repr = "{cls}('{short_code}', '{long_code}', '{name}')".format( cls=self.__class__.__name__, short_code=self.short_code, long_code=self.long_code, name=self.name) return res_type_repr
[docs] def makeVariant(self, long_code, short_code=None, *, nonstandard=True): """ Create a variant of an element type with a new long and short code. :param long_code: A 2+ character string representing the element type :type long_code: str :param short_code: A 1 character string representing the element type :type short_code: str :param nonstandard: Whether the variant should be considered nonstandard. If False, the residue will be generally treated identically to its standard (e.g. HIE/HIS) :type nonstandard: bool :return: The variant element type :rtype: ElementType """ element_type = copy.deepcopy(self) element_type.long_code = long_code if short_code is not None: element_type.short_code = short_code element_type.nonstandard = nonstandard return element_type
[docs]class ResidueType(ElementType):
[docs] def __init__(self, short_code, long_code, name, charge=None, hydrophobicity=None, hydrophilicity=None, helix_propensity=None, beta_strand_propensity=None, turn_propensity=None, helix_termination_tendency=None, exposure_tendency=None, steric_group=None, side_chain_chem=None, isoelectric_point=None): super(ResidueType, self).__init__(short_code, long_code, name) self.charge = charge self.hydrophobicity = hydrophobicity self.helix_propensity = helix_propensity self.beta_strand_propensity = beta_strand_propensity self.turn_propensity = turn_propensity self.helix_termination_tendency = helix_termination_tendency self.exposure_tendency = exposure_tendency self.steric_group = steric_group self.side_chain_chem = side_chain_chem self.isoelectric_point = isoelectric_point
# TODO MSV-1504 determine what nucleotides need and change parent to ElementType
[docs]class NucleotideType(ResidueType): pass
[docs]class DeoxyribonucleotideType(NucleotideType): pass
[docs]class RibonucleotideType(NucleotideType): pass
# Std Amino Acids ALANINE = ResidueType( "A", "ALA", "Alanine", hydrophobicity=1.80, hydrophilicity=-0.50, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Likely, beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, steric_group=STERIC_GROUP.Small, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic, isoelectric_point=6.00) ARGININE = ResidueType( "R", "ARG", "Arginine", hydrophobicity=-4.50, hydrophilicity=3.00, charge=RESIDUE_CHARGE.Positive, helix_propensity=HELIX_PROPENSITY.Ambivalent, beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent, turn_propensity=TURN_PROPENSITY.Ambivalent, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface, steric_group=STERIC_GROUP.Polar, side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic, isoelectric_point=10.76) ASPARAGINE = ResidueType( "N", "ASN", "Asparagine", hydrophobicity=-3.50, hydrophilicity=0.20, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Ambivalent, beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent, turn_propensity=TURN_PROPENSITY.TurnForming, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface, steric_group=STERIC_GROUP.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic, isoelectric_point=5.41) ASPARTIC_ACID = ResidueType( "D", "ASP", "Aspartic acid", hydrophobicity=-3.50, hydrophilicity=3.00, charge=RESIDUE_CHARGE.Negative, helix_propensity=HELIX_PROPENSITY.Ambivalent, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming, turn_propensity=TURN_PROPENSITY.TurnForming, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding, steric_group=STERIC_GROUP.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.AcidicHydrophilic, isoelectric_point=2.77) CYSTEINE = ResidueType( "C", "CYS", "Cysteine", hydrophobicity=2.50, hydrophilicity=-1.00, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Ambivalent, beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, steric_group=STERIC_GROUP.Small, side_chain_chem=SIDE_CHAIN_CHEM.PrimaryThiol, isoelectric_point=5.07) GLUTAMIC_ACID = ResidueType( "E", "GLU", "Glutamic acid", hydrophobicity=-3.50, hydrophilicity=3.00, charge=RESIDUE_CHARGE.Negative, helix_propensity=HELIX_PROPENSITY.Likely, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming, turn_propensity=TURN_PROPENSITY.Ambivalent, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface, steric_group=STERIC_GROUP.Polar, side_chain_chem=SIDE_CHAIN_CHEM.AcidicHydrophilic, isoelectric_point=3.22) GLUTAMINE = ResidueType( "Q", "GLN", "Glutamine", hydrophobicity=-3.50, hydrophilicity=0.20, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Likely, beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent, turn_propensity=TURN_PROPENSITY.Ambivalent, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface, steric_group=STERIC_GROUP.Polar, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic, isoelectric_point=5.65) GLYCINE = ResidueType( "G", "GLY", "Glycine", hydrophobicity=-0.40, hydrophilicity=0.00, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.HelixBreaking, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming, turn_propensity=TURN_PROPENSITY.TurnForming, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, steric_group=STERIC_GROUP.Small, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic, isoelectric_point=5.97) HISTIDINE = ResidueType( "H", "HIS", "Histidine", hydrophobicity=-3.20, hydrophilicity=-0.50, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Ambivalent, beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface, steric_group=STERIC_GROUP.Aromatic, side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic, isoelectric_point=7.59) ISOLEUCINE = ResidueType( "I", "ILE", "Isoleucine", hydrophobicity=4.50, hydrophilicity=-1.80, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Weak, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried, steric_group=STERIC_GROUP.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic, isoelectric_point=6.02) LEUCINE = ResidueType( "L", "LEU", "Leucine", hydrophobicity=3.80, hydrophilicity=-1.80, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Likely, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried, steric_group=STERIC_GROUP.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic, isoelectric_point=5.98) LYSINE = ResidueType( "K", "LYS", "Lysine", hydrophobicity=-3.90, hydrophilicity=3.00, charge=RESIDUE_CHARGE.Positive, helix_propensity=HELIX_PROPENSITY.Likely, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming, turn_propensity=TURN_PROPENSITY.Ambivalent, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Surface, steric_group=STERIC_GROUP.Polar, side_chain_chem=SIDE_CHAIN_CHEM.BasicHydrophilic, isoelectric_point=9.74) METHIONINE = ResidueType( "M", "MET", "Methionine", hydrophobicity=1.90, hydrophilicity=-1.30, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Likely, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried, steric_group=STERIC_GROUP.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic, isoelectric_point=5.74) PHENYLALANINE = ResidueType( "F", "PHE", "Phenylalanine", hydrophobicity=2.80, hydrophilicity=-2.50, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Weak, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried, steric_group=STERIC_GROUP.Aromatic, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic, isoelectric_point=5.48) PROLINE = ResidueType( "P", "PRO", "Proline", hydrophobicity=-1.60, hydrophilicity=0.00, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.HelixBreaking, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandForming, turn_propensity=TURN_PROPENSITY.TurnForming, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, steric_group=STERIC_GROUP.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.IminoAcid, isoelectric_point=6.30) SERINE = ResidueType( "S", "SER", "Serine", hydrophobicity=-0.80, hydrophilicity=0.30, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Ambivalent, beta_strand_propensity=BETA_STRAND_PROPENSITY.Ambivalent, turn_propensity=TURN_PROPENSITY.TurnForming, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, steric_group=STERIC_GROUP.Small, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic, isoelectric_point=5.58) THREONINE = ResidueType( "T", "THR", "Threonine", hydrophobicity=-0.70, hydrophilicity=-0.40, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Ambivalent, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.Ambivalent, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixStarting, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophilic, steric_group=STERIC_GROUP.Ambivalent, isoelectric_point=5.60) TRYPTOPHAN = ResidueType( "W", "TRP", "Tryptophan", hydrophobicity=-0.90, hydrophilicity=-3.40, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Weak, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.HelixEnding, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, steric_group=STERIC_GROUP.Aromatic, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic, isoelectric_point=5.89) TYROSINE = ResidueType( "Y", "TYR", "Tyrosine", hydrophobicity=-1.30, hydrophilicity=-2.30, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.HelixBreaking, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.Ambivalent, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Ambivalent, steric_group=STERIC_GROUP.Aromatic, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAromatic, isoelectric_point=5.66) UNKNOWN = ResidueType("X", "UNK", _UNKNOWN_NAME) VALINE = ResidueType( "V", "VAL", "Valine", hydrophobicity=4.20, hydrophilicity=-1.50, charge=RESIDUE_CHARGE.Neutral, helix_propensity=HELIX_PROPENSITY.Weak, beta_strand_propensity=BETA_STRAND_PROPENSITY.StrandBreaking, turn_propensity=TURN_PROPENSITY.TurnBreaking, helix_termination_tendency=HELIX_TERMINATION_TENDENCY.Ambivalent, exposure_tendency=SOLVENT_EXPOSURE_TENDENCY.Buried, steric_group=STERIC_GROUP.Ambivalent, side_chain_chem=SIDE_CHAIN_CHEM.NeutralHydrophobicAliphatic, isoelectric_point=5.96) CAPPING_GROUP = ResidueType("X", "", "Capping Group") UNKNOWN_NA = NucleotideType("N", "UNK", _UNKNOWN_NAME) #RNA ADENINE = RibonucleotideType("A", "A", "Adenine") CYTOSINE = RibonucleotideType("C", "C", "Cytosine") GUANINE = RibonucleotideType("G", "G", "Guanine") URACIL = RibonucleotideType("U", "U", "Uracil") #DNA dADENINE = DeoxyribonucleotideType("A", "DA", "Adenine") dCYTOSINE = DeoxyribonucleotideType("C", "DC", "Cytosine") dGUANINE = DeoxyribonucleotideType("G", "DG", "Guanine") dTHYMINE = DeoxyribonucleotideType("T", "DT", "Thymine") # yapf: disable STD_AMINO_ACIDS = [ ALANINE, CYSTEINE, ASPARTIC_ACID, GLUTAMIC_ACID, PHENYLALANINE, GLYCINE, HISTIDINE, ISOLEUCINE, LYSINE, LEUCINE, METHIONINE, ASPARAGINE, PROLINE, GLUTAMINE, ARGININE, SERINE, THREONINE, VALINE, TRYPTOPHAN, TYROSINE, ] STD_RNA_BASES = [ADENINE, CYTOSINE, GUANINE, URACIL] STD_DNA_BASES = [dADENINE, dCYTOSINE, dGUANINE, dTHYMINE] AMINO_ACID_VARIANTS = [ (ALANINE, [ "AIB", "ALM", "AYA", "BNN", "CHG", "CSD", ("DAL", "X"), "DHA", "DNP", "FLA", "HAC", "MAA", "PRR", "TIH", "TPQ", ]), (ARGININE, ["AGM", ("DAR", "X"), "HAR", "MMO", "ARM", "ARN", "HMR", "ACL"]), (ASPARAGINE, ["MEN", ("DSG", "X")]), (ASPARTIC_ACID, [ "DSP", "BHD", "2AS", "ASQ", "ASB", "ASA", "ASK", "ASH", "ASL", ("DAS", "X") ]), (CYSTEINE, [("BCS", "X"), "BUC", "C5C", "C6C", "CCS", "CEA", "CME", "CSO", "CSP", "CSS", "CSW", "CSX", "CY1", "CY3", "CYG", "CYM", "CYP", "CYQ", "CYX", ("DCY", "X"), "EFC", "OCS", "PEC", "PR3", "SCH", "SCS", "SCY", "SHC", "SMC", "SOC"]), (GLUTAMIC_ACID, ["GLH", "GGL", 'PCA', '5HP', ('DGL', "X"), 'CGU', 'GMA']), (GLUTAMINE, [("DGN", "X")]), (GLYCINE, ["GLZ", "SAR", 'NMC', 'GL3', 'GSC', 'MPQ', 'MSA']), (HISTIDINE, [("DHI", "X"), "HID", "HIC", "HIE", "HIP", "HSD", "HSE", "HSP", "MHS", "NEM", "NEP", "3AH"]), (ISOLEUCINE, [('DIL', "X"), 'IIL']), (LEUCINE, ["BUG", ("NLE", "X"), 'NLP', 'NLN', ('DLE', "X"), 'CLE', 'MLE']), (LYSINE, [ 'LYM', 'ALY', 'LYZ', 'LYN', 'LLY', 'LLP', 'SHR', 'TRG', ('DLY', "X"), 'KCX' ]), (METHIONINE, ["FME", "CXM", "OMT", "MSE"]), (PHENYLALANINE, ["DAH", ("DPN", "X"), "HPQ", "PHI", "PHL"]), (PROLINE, [('DPR', "X"), ('HYP', "X")]), (SERINE, ['OAS', 'MIS', 'SAC', 'SVA', 'SET', ('SEP', "X"), 'SEL', ("DSN", "X")]), (THREONINE, ["ALO", "BMT", ("DTH", "X"), "THO", ("TPO", "t")]), (TRYPTOPHAN, [("DTR", "X"), "HTR", "LTR", "TPL", "TRO"]), (TYROSINE, [("DTY", "X"), "IYR", "PAQ", ("PTR", "y"), "STY", "TYB", "TYM", "TYO", "TYQ", "TYS", "TYY"]), (VALINE, ["DIV", ("DVA", "X"), "MVA"]) ] NUCLEOBASE_VARIANTS = [ (ADENINE, ["AMP", "ADP", "ATP", "1MA", "6MA"]), (CYTOSINE, ["CMP", "CDP", "CTP", "5MC", "5HC", "5FC", "1CC", "OMC"]), (GUANINE, ["GMP", "GDP", "GTP", "1MG", "2MG", "M2G", "7MG", "OMG"]), (URACIL, ["UMP", "UDP", "UTP", ("PSU", "Ψ"), "H2U", "5MU", "DU"]), (dTHYMINE, ["TMP", "TDP", "TTP"]), (UNKNOWN_NA, ["YYG", ("I", "I"), ("DI", "DI")]) ] CAPPING_GROUP_VARIANTS = [ (CAPPING_GROUP, ['ACE', 'NMA', 'IND', 'NCO', 'ANF', 'TOSG', 'FCO', 'MPA', 'NH2']) ] # yapf: enable
[docs]def make_variants(variants): """ Helper function to create modified amino acids and modified nucleotides :param variants: A list of modified residues. The modified residue will have all the same properties as the standard one (hydophobicity, charge, etc.) but its long code (ie, its PDB residue name) will differ, and if a tuple of (string, string) is provided, both its long code and short code will differ. :param variants: list of (`ResidueType`, list of string or (string,string)) :returns: a list of residue variants :rtype: list of `ResidueType` """ non_std_residues = [] for res, variant_list in variants: for name in variant_list: if isinstance(name, tuple): three_letter, one_letter = name else: three_letter, one_letter = name, res.short_code variant = res.makeVariant(three_letter, one_letter) non_std_residues.append(variant) return non_std_residues
[docs]def merge_dicts(*dict_args): """ Merge any number of dictionaries into a single dictionary. Note that repeated keys will be silently overwritten with the last value. """ return dict(itertools.chain(*(d.items() for d in dict_args)))
[docs]def make_one_letter_map(res_list): return {res.short_code: res for res in res_list}
[docs]def make_three_letter_map(res_list): return {res.long_code: res for res in res_list}
# Charge variants have their charge set correctly below. NON_STD_AMINO_ACIDS = make_variants(AMINO_ACID_VARIANTS) AMINO_ACIDS = STD_AMINO_ACIDS + NON_STD_AMINO_ACIDS + [UNKNOWN] STD_AMINO_ACIDS_ONE_LETTER = make_one_letter_map(STD_AMINO_ACIDS) STD_AMINO_ACIDS_THREE_LETTER = make_three_letter_map(STD_AMINO_ACIDS) AMINO_ACIDS_THREE_LETTER = make_three_letter_map(AMINO_ACIDS) AMINO_ACIDS_THREE_LETTER['HIP'].charge = RESIDUE_CHARGE.Positive for aa in ['ASH', 'ARN', 'GLH', 'HID', 'HIE', 'LYN']: AMINO_ACIDS_THREE_LETTER[aa].charge = RESIDUE_CHARGE.Neutral CAPPING_GROUPS = make_variants(CAPPING_GROUP_VARIANTS) CAPPING_GROUP_ALPHABET = make_three_letter_map(CAPPING_GROUPS) AMINO_ACIDS_AND_CAPPING_GROUPS = merge_dicts(STD_AMINO_ACIDS_ONE_LETTER, AMINO_ACIDS_THREE_LETTER, CAPPING_GROUP_ALPHABET) STD_AMINO_ACIDS_AND_CAPPING_GROUPS = merge_dicts(STD_AMINO_ACIDS_ONE_LETTER, STD_AMINO_ACIDS_THREE_LETTER, CAPPING_GROUP_ALPHABET) NON_STD_NUCLEOBASES = make_variants(NUCLEOBASE_VARIANTS) NUCLEOBASES = STD_RNA_BASES + STD_DNA_BASES + NON_STD_NUCLEOBASES DNA_NUCLEOBASES = STD_DNA_BASES RNA_NUCLEOBASES = STD_RNA_BASES DNA_THREE_LETTER = make_three_letter_map(DNA_NUCLEOBASES) RNA_THREE_LETTER = make_three_letter_map(RNA_NUCLEOBASES) NA_THREE_LETTER = make_three_letter_map(NUCLEOBASES) DNA_ONE_LETTER = make_one_letter_map(DNA_NUCLEOBASES) RNA_ONE_LETTER = make_one_letter_map(RNA_NUCLEOBASES) DNA_ALPHABET = merge_dicts(DNA_THREE_LETTER, DNA_ONE_LETTER) RNA_ALPHABET = merge_dicts(RNA_THREE_LETTER, RNA_ONE_LETTER) ALL_ELEMENT_TYPES = {} for ele_type in STD_AMINO_ACIDS + CAPPING_GROUPS + STD_RNA_BASES + STD_DNA_BASES + [ UNKNOWN, UNKNOWN_NA ]: ele_type_key = ele_type.short_code, ele_type.long_code, ele_type.name ALL_ELEMENT_TYPES[ele_type_key] = ele_type _nonstandard_residue_db = None _protein_alphabet = None
[docs]def get_protein_alphabet(): """ Return a cached map of amino acid element types. Includes definitions from the nonstandard residues database. :rtype: types.MappingProxyType """ global _protein_alphabet global _nonstandard_residue_db if _nonstandard_residue_db is None: _nonstandard_residue_db = nonstandard_residues.get_residue_database() _nonstandard_residue_db.residuesChanged.connect( _invalidate_protein_alphabet) if _protein_alphabet is None: alphabet = _get_nonstandard_residues() alphabet.update(STD_AMINO_ACIDS_AND_CAPPING_GROUPS) # Add standard nonstandards (e.g. HID/HIE/HIP) alphabet.update(_get_residue_variants()) unk = UNKNOWN alphabet[unk.short_code] = unk alphabet[unk.long_code] = unk _protein_alphabet = types.MappingProxyType(alphabet) return _protein_alphabet
def _invalidate_protein_alphabet(): global _protein_alphabet _protein_alphabet = None @functools.lru_cache() def _get_residue_variants(): """ Get built-in variants of standard amino acids """ result = dict() for long_code, short in structure.RESIDUE_MAP_3_TO_1_LETTER.items(): if long_code in STD_AMINO_ACIDS_THREE_LETTER: continue std_type = STD_AMINO_ACIDS_ONE_LETTER.get(short.upper()) if std_type is not None: nonstandard = (short != std_type.short_code) variant = std_type.makeVariant(long_code, short, nonstandard=nonstandard) result[long_code] = variant return result def _get_nonstandard_residues(): """ Generate a dictionary of nonstandard amino acid element types from the nonstandard residues database. :rtype: dict """ result = dict() # The database object is cached but this may need to read it from disk db = nonstandard_residues.get_residue_database() for aa in db.amino_acids: if aa.standard: continue short_code = aa.code long_code = aa.name aligns_with = aa.aligns_with if aligns_with: base_element_type = STD_AMINO_ACIDS_ONE_LETTER[aligns_with] element_type = base_element_type.makeVariant(long_code) else: element_type = UNKNOWN.makeVariant(long_code, short_code) result[long_code] = element_type return result
[docs]def any_structured_residues(residues): """ Returns whether any of the given residues are structured. :param residues: The iterable of residues to check :type residues: iterable(residue.Residue) :return: True if the given residues contain one that is structured. :rtype: bool """ return any(res.hasStructure() for res in residues)
[docs]class AbstractSequenceElement(json.JsonableClassMixin): # This class intentionally doesn't have an __init__ method since # instantiating Gap and Residue objects needs to be as fast as possible and # calling super().__init__ would slow down their __init__s. def _getNewInstance(self): """ Helper for copying """ return self.__class__() def __copy__(self): new_res = self._getNewInstance() new_res.is_gap = self.is_gap new_res.sequence = self.sequence return new_res def __deepcopy__(self, memo): # Same as __copy__ (none of the public attrs should be deepcopied) return copy.copy(self) @property def is_res(self): """ Utility function to check whether a residue is not a gap """ return not self.is_gap @property def sequence(self): """ The sequence that this element is part of. Will be `None` if this residue is not part of a sequence. Stored as a weakref to avoid circular references. :rtype: sequence.AbstractSingleChainSequence """ if self._sequence is None: return None else: return self._sequence() @sequence.setter def sequence(self, value): if value is None: self._sequence = None else: self._sequence = weakref.ref(value)
[docs] def hasStructure(self): """ :return: Whether this element has a structure i.e. whether it has corresponding XYZ coordinates in Maestro. :rtype: bool """ seq = self.sequence seq_has_structure = seq is not None and seq.hasStructure() return self.is_res and not self.seqres_only and seq_has_structure
@property def idx_in_seq(self): if self.sequence is None: return None return self.sequence.index(self) @property def gapless_idx_in_seq(self): """ Return the index of this residue within its sequence ignoring gaps :return: Index of this residue in its sequence or None if it is not part of a sequence. :rtype: int or None """ if self.sequence is None: return None return self.sequence.index(self, ignore_gaps=True)
[docs]class Gap(AbstractSequenceElement): __slots__ = ("_sequence") is_gap = True
[docs] def __init__(self): self.sequence = None
def __str__(self): return "~"
[docs] def toJsonImplementation(self): return {'is_gap': self.is_gap}
[docs] @classmethod def fromJsonImplementation(cls, json_obj): if json_obj.get('is_gap', None) is not True: raise ValueError('Attempting to deserialize a non-gap object') return cls()
[docs]class Residue(AbstractSequenceElement): """ An amino acid residue. """ __slots__ = ( "_sequence", "type", "_inscode", "_resnum", "seqres_only", "disulfide_bond", "pred_disulfide_bond", "b_factor", "molecule_number", "pfam", # Either None or 1-char string "pred_secondary_structure", # Structure.SS_HELIX/SS_STRAND/etc "secondary_structure", # structure.SS_HELIX/SS_STRAND/etc "pred_accessibility", # predictors.SolventAccessibility "pred_disordered", # predictors.Disordered "pred_domain_arr", # predictors.DomainArrangement "area", # Solvent accessible area "composition", # Amino acid composition in profile residues. "domains", # name of the domain(s) to which the residue belongs "kinase_features", # kinase feature label "_kinase_conservation", # kinase conservation category "_descriptors", # dict mapping descriptor name to val "gpcr_segment", "gpcr_generic_number", ) # Default values for any attributes that shouldn't default to None; # must not be mutable _DEFAULT_ATTR_VALS = {"area": 0.0} is_gap = False @property def descriptors(self): if self._descriptors is None: self._descriptors = dict() return self._descriptors @property def kinase_conservation(self): if self._kinase_conservation is None: self._kinase_conservation = dict() return self._kinase_conservation
[docs] def __init__(self, element_type, inscode=None, resnum=None, seqres_only=False): """ :param element_type: The kind of the residue :type element_type: ResidueType :param inscode: The insertion code :type inscode: str :param resnum: PDB residue number :type resnum: int :param seqres_only: Whether this residue only appears in the SEQRES record of a structure. Only applies to sequences associated with a structure. :type seqres_only: bool """ self.type = element_type self._inscode = inscode self._resnum = resnum self.seqres_only = seqres_only
# Do *not* add new instance attributes here, as it will slow down # Residue instantiation. Instead, add the attribute name to __slots__ # and add the initial value to `_DEFAULT_ATTR_VALS` unless the initial # value should be None. def __getattr__(self, attr): if attr not in self.__slots__: raise AttributeError( f"'{self.__class__.__name__}' has no attribute '{attr}'") val = self._DEFAULT_ATTR_VALS.get(attr) # set the value of the attribute so that repeatedly accessing it doesn't # incur time penalties from repeated __getattr__ calls setattr(self, attr, val) return val
[docs] def toJsonImplementation(self): json_dict = { 'inscode': self.inscode, 'resnum': self.resnum, 'element_type': [ self.type.short_code, self.type.long_code, self.type.name ], 'seqres_only': self.seqres_only, } for key, val in ( ('b_factor', self.b_factor), ('secondary_structure', self.secondary_structure), ('pred_secondary_structure', self.pred_secondary_structure), ('pred_accessibility', self.pred_accessibility), ('pred_disordered', self.pred_disordered), ('pred_domain_arr', self.pred_domain_arr), ('area', self.area), ('composition', self.composition), ('kinase_features', self.kinase_features), ('_descriptors', self._descriptors), ('gpcr_segment', self.gpcr_segment), ('gpcr_generic_number', self.gpcr_generic_number), ): if val is not None: json_dict[key] = val if self._kinase_conservation is not None: # JSON requires string keys, so cast to str json_dict['_kinase_conservation'] = { str(k): v for k, v in self._kinase_conservation.items() } return json_dict
[docs] @classmethod def fromJsonImplementation(cls, json_obj): # TODO MSV-1504: separate logic for different sequence types elem_type = tuple(json_obj.pop('element_type')) try: res_type = ALL_ELEMENT_TYPES[elem_type] except KeyError: long_code = elem_type[1] res_type = get_protein_alphabet().get(long_code) if res_type is None: short_code = elem_type[0] if short_code == UNKNOWN_NA.short_code: res_type = UNKNOWN_NA.makeVariant(long_code) else: res_type = UNKNOWN.makeVariant(long_code) res = cls(element_type=res_type, inscode=json_obj['inscode'], seqres_only=json_obj['seqres_only'], resnum=json_obj['resnum']) if 'area' in json_obj: res.area = json_obj['area'] if 'composition' in json_obj: res.composition = json_obj['composition'] if 'b_factor' in json_obj: res.b_factor = json_obj['b_factor'] if 'secondary_structure' in json_obj: res.secondary_structure = json_obj['secondary_structure'] if 'pred_secondary_structure' in json_obj: res.pred_secondary_structure = json_obj['pred_secondary_structure'] from schrodinger.protein import predictors if json_obj.get('pred_accessibility'): res.pred_accessibility = predictors.SolventAccessibility.fromJson( json_obj['pred_accessibility']) if json_obj.get('pred_disordered'): res.pred_disordered = predictors.Disordered.fromJson( json_obj['pred_disordered']) if json_obj.get('pred_domain_arr'): res.pred_domain_arr = predictors.DomainArrangement.fromJson( json_obj['pred_domain_arr']) kinase_feature = json_obj.get('kinase_features') if kinase_feature is not None: res.kinase_features = annotation.KinaseFeatureLabel.fromJson( kinase_feature) res._descriptors = json_obj.get('_descriptors') kinase_conservation = json_obj.get('_kinase_conservation') if kinase_conservation is not None: kinase_conservation = { int(k): annotation.KinaseConservation.fromJson(v) for k, v in kinase_conservation.items() } res._kinase_conservation = kinase_conservation gpcr_segment = json_obj.get('gpcr_segment') gpcr_number = json_obj.get('gpcr_generic_number') res.gpcr_segment = gpcr_segment res.gpcr_generic_number = gpcr_number return res
def __str__(self): """ Returns the short code for the residue """ return str(self.type) def __repr__(self): class_fmt = "%s({parts})" % self.__class__.__name__ parts = [repr(self.type)] kwarg_list = self._getReprKwargList() for kwarg_name, kwarg_val in kwarg_list: parts.append("{0}={1}".format(kwarg_name, kwarg_val)) return class_fmt.format(parts=", ".join(parts)) def _getNewInstance(self): return self.__class__(self.type) def __copy__(self): new_res = super().__copy__() blacklist = self._getCopyBlackList() for name in type(self).__slots__: if name in blacklist: continue setattr(new_res, name, getattr(self, name)) return new_res def __deepcopy__(self, memo): new_res = copy.copy(self) blacklist = self._getCopyBlackList() for name in type(self).__slots__: if name in blacklist: continue value = getattr(self, name) setattr(new_res, name, copy.deepcopy(value, memo)) return new_res def _getCopyBlackList(self): """ A list of attributes to not copy or deepcopy. :return: Attribute names to not copy or deepcopy :rtype: set(str) """ # Don't copy type because it's intended to be a singleton # Don't copy sequence because it's set in the superclass # disulfide_bond is not copied due to the invariant that a maximum of 2 # residues may contain the same disulfide bond return {"type", "sequence", "disulfide_bond", "pred_disulfide_bond"} def _getReprKwargList(self): """ Return a list of kwargs that should be specified in the repr. :return: list of (kwarg_key, kwarg_value) pairs :rtype: list[tuple(str, str)] """ resnum = "None" if self.resnum is None else "%s" % self.resnum kwarg_list = [('inscode', "'%s'" % self.inscode), ('resnum', resnum)] if self.molecule_number is not None: kwarg_list.append(('molnum', str(self.molecule_number))) if self.seqres_only: kwarg_list.append(('seqres_only', 'True')) return kwarg_list
[docs] def getChainKey(self): # TODO MSV-2379 account for all items in WHResidue.d_hash return ResidueChainKey(self.resnum, self.inscode)
[docs] def getKey(self): """ Get a key that uniquely identifies the residue relative to structures. :return: Residue key as (entry_id, chain, inscode, resnum, molnum, pdbname), or None if residue is unparented or has no structure :rtype: ResidueKey or NoneType """ seq = self.sequence if seq is None or not self.hasStructure() or seq.entry_id is None: return None return get_residue_key(self, seq.entry_id, seq.structure_chain)
[docs] def hasSetResNum(self) -> bool: """ :return: Whether a specific resnum has been set """ return self._resnum is not None
@property def resnum(self): """ If resnum is set to None, resnum will be auto-generated from column number. """ if not self.hasSetResNum() and self.sequence is not None: return self.idx_in_seq + 1 return self._resnum @resnum.setter def resnum(self, value): self._resnum = value @property def inscode(self): """ If inscode and rescode are both set to None, the inscode will be '+'. """ if self._inscode is None: if self.hasSetResNum(): return ' ' return '+' return self._inscode @inscode.setter def inscode(self, value): self._inscode = value @property def rescode(self): if self.resnum is not None: return str(self.resnum) + self.inscode @property def short_code(self): return self.type.short_code @property def long_code(self): return self.type.long_code @property def chain(self): """ The name of the sequence chain that this residue belongs to. :rtype: str """ return "" if self.sequence is None else self.sequence.chain @property def structure_chain(self): """ The name of chain for the structure that this sequence is associated with. This is normally the same as `chain`, but it can be different if the user manually links this sequence to a structure with differing chain names. :rtype: str """ return "" if self.sequence is None else self.sequence.structure_chain @property def hydrophobicity(self): """ :return: Hydrophobicity for the ResidueType on the Kyte-Doolittle scale, if available; otherwise None. :rtype: float """ return self.type.hydrophobicity @property def hydrophilicity(self): """ :return: Hydrophilicity for the ResidueType on the Hopp-Woods scale, if available; otherwise None :rtype: float """ return self.type.hydrophilicity @property def charge(self): """ :return: charge of the ResidueType of the residue :rtype: RESIDUE_CHARGE """ return self.type.charge @property def helix_propensity(self): """ :return: Helix propensity for the ResidueType of the residue :rtype: `HELIX_PROPENSITY` """ return self.type.helix_propensity @property def beta_strand_propensity(self): """ :return: Beta-strand propensity for the ResidueType of the residue :rtype: `BETA_STRAND_PROPENSITY` """ return self.type.beta_strand_propensity @property def turn_propensity(self): """ :return: Turn propensity for the ResidueType of the residue :rtype: `TURN_PROPENSITY` """ return self.type.turn_propensity @property def helix_termination_tendency(self): """ :return: Helix termination tendency for the ResidueType of the residue :rtype: `HELIX_TERMINATION_TENDENCY` """ return self.type.helix_termination_tendency @property def exposure_tendency(self): """ :return: Solvent exposure tendency for the ResidueType of the residue :rtype: `SOLVENT_EXPOSURE_TENDENCY` """ return self.type.exposure_tendency @property def steric_group(self): """ :return: Steric group for the ResidueType of the residue :rtype: `STERIC_GROUP` """ return self.type.steric_group @property def side_chain_chem(self): """ :return: Side chain chemistry for the ResidueType of the residue :rtype: `SIDE_CHAIN_CHEM` """ return self.type.side_chain_chem @property def ss_prediction_sspro(self): """ Returns a DSSP code matching the secondary structure prediction for the residue or None. Value is calculated from the SSpro backend. """ return self._ss_prediction_sspro @property def ss_prediction_psipred(self): """ Returns a DSSP code matching the secondary structure prediction for the residue or None. Value is calculated from thePsiPred backend. """ return self._ss_prediction_psipred @ss_prediction_psipred.setter def ss_prediction_psipred(self, value): if value not in list(DSSP_CODES): raise ValueError("%s is not a valid DSSP code" % value) self._ss_prediction_psipred = value @ss_prediction_sspro.setter def ss_prediction_sspro(self, value): if value not in list(DSSP_CODES): raise ValueError("%s is not a valid DSSP code" % value) self._ss_prediction_sspro = value @property def isoelectric_point(self): """ :return: A float representing the isoelectric point value for the ResidueType of the residue """ return self.type.isoelectric_point
[docs] @check_if_comparable(default_return=_DEFAULT_SIMILARITY) def getSimilarity(self, ref_res, similarity_matrix=constants.BLOSUM62): """ Returns the similarity between the residue and a reference residue :param ref_res: The reference residue :type ref_res: `schrodinger.protein.residue.Residue` :param similarity_matrix: The scoring matrix to use :return: similarity score based on the similarity matrix :rtype: float """ return get_matrix_value(similarity_matrix, self.short_code, ref_res.short_code)
[docs] @check_if_comparable() def getBinarySimilarity(self, ref_res, similarity_matrix=constants.BLOSUM62): """ Returns if the residue and a reference residue are similar :param ref_res: The reference residue :type ref_res: `schrodinger.protein.residue.Residue` :param similarity_matrix: The scoring matrix to use :return: 1 if the similarity score is positive, otherwise 0. :rtype: int """ return 1 if self.getSimilarity(ref_res, similarity_matrix) > 0.0 else 0
[docs] @check_if_comparable() def getIdentity(self, ref_res): """ Return the identity between the residue and a reference residue :param ref_res: The reference residue :type ref_res: `schrodinger.protein.residue.Residue` :return: 1 if same as the reference residue, 0 otherwise. :rtype: int """ res_type = self.type ref_res_type = ref_res.type if (res_type.short_code == ref_res_type.short_code and res_type.name == ref_res_type.name): return 1 return 0
[docs] @check_if_comparable() def getIdentityStrict(self, ref_res): """ Return the identity between the residue and a reference residue without considering nonstandard amino acids identical to their related standard amino acid. See getIdentity for additional documentation. """ res_type = self.type ref_res_type = ref_res.type maybe_identical = self.getIdentity(ref_res) if maybe_identical: if not res_type.nonstandard and not ref_res_type.nonstandard: return maybe_identical elif res_type.nonstandard and ref_res_type.nonstandard: return int(res_type.long_code == ref_res_type.long_code) return 0
[docs] @check_if_comparable() def getConservation(self, ref_res): """ Return whether the residue and a reference residue have similar side-chain chemistry. The similarity criterion is based on "side chain chemistry" descriptor matching. :param ref_res: The reference residue :type ref_res: `schrodinger.protein.residue.Residue` :return: 1 if the residue and reference residue are have similar side chain chemistry, 0 otherwise. :rtype: int """ res_group = _HOMOLOGY_GROUPS.get(self.short_code) ref_group = _HOMOLOGY_GROUPS.get(ref_res.short_code) if res_group and ref_group and (res_group == ref_group): return 1 return 0
[docs] def getStructureResProperties(self): """ Return all properties for the corresponding structure residue's alpha carbon. Properties that apply to the whole residue are stored as atom properties on this atom. An empty dictionary will be returned if this residue doesn't have a corresponding alpha carbon. :return: A dictionary-like object containing the properties. :rtype: structure._StructureAtomProperty or dict """ seq = self.sequence if seq is None or self.seqres_only or not seq.hasStructure(): return {} struc_res = seq.getStructureResForRes(self) if struc_res is None: return {} calpha = struc_res.getAlphaCarbon() if calpha is None: return {} return calpha.property
[docs] def updateDescriptors(self, descriptors): """ Updates the descriptor dicts with new descriptor values :param descriptors: A dict mapping descriptor names to their values :type descriptors: dict[str, float] """ self.descriptors.update(descriptors)
[docs] def getDescriptorValue(self, descriptor_name): return self.descriptors.get(descriptor_name)
[docs] def getDescriptorKeys(self): return self.descriptors.keys()
[docs] def getProperty(self, seq_prop): """ Get the residue's value corresponding to the given SequenceProperty object :param seq_prop: The object describing the residue property :type seq_prop: schrodinger.protein.properties.SequenceProperty :return: The value of the sequence property :rtype: str, int, float or None """ if seq_prop.property_type == properties.PropertyType.StructureProperty: struc_props = self.getStructureResProperties() prop_val = struc_props.get(seq_prop.property_name) else: prop_val = self.getDescriptorValue(seq_prop.property_name) return prop_val
[docs]class CombinedChainResidueWrapper: """ A wrapper for a residue or gap so that res.sequence points to the combined-chain sequence and res.idx_in_seq gives the index within the combined-chain sequence. Note that these wrappers are generated as needed and the combined-chain sequence does not store any reference to the generated instances. As such, these residues should not be stored using weakrefs and should not be compared using identity. Also note that these residues will not compare equal to the split-chain residues that they wrap. """
[docs] def __init__(self, res, combined_chain_seq): """ :param res: The residue or gap to wrap :type res: AbstractSequenceElement :param combined_chain_seq: The combined-chain sequence that the residue is part of. :type combined_chain_seq: sequence.CombinedChainProteinSequence """ self._res = res self._seq = combined_chain_seq
def __getattr__(self, attr_name): return getattr(self._res, attr_name) def __dir__(self): attr_names = set(dir(self._res)) attr_names.update(self.__dict__.keys()) attr_names.update(self.__class__.__dict__.keys()) return attr_names def __eq__(self, other): if isinstance(other, CombinedChainResidueWrapper): return self._res == other._res else: return False def __hash__(self): return hash((self.__class__, self._res)) def __str__(self): return str(self._res) def __repr__(self): return f"{self.__class__.__name__}({repr(self._res)})" def __copy__(self): raise RuntimeError(f"Cannot copy {self.__class__.__name__} instances") def __deepcopy__(self, memo): raise RuntimeError(f"Cannot copy {self.__class__.__name__} instances") def _inSeq(self): """ Does this residue still exist in the sequence it was created from? This will return False if the residue has been removed from its split-chain sequence or if the chain has been removed from the combined-chain sequence. :rtype: bool """ return self._res.sequence in self._seq.chains @property def sequence(self): """ The combined-chain sequence that the residue is part of, or None if the residue has been removed from the sequence. :rtype: sequence.CombinedChainProteinSequence or None """ if self._inSeq(): return self._seq else: return None @property def idx_in_seq(self): """ This residue's index in the combined-chain sequence, or None if the residue has been removed from the sequence. :rtype: int or None """ if self._inSeq(): return self._seq.index(self) else: return None @property def split_res(self): """ The split-chain residue or gap that this residue is wrapping. :rtype: AbstractSequenceElement """ return self._res @property def split_sequence(self): """ The split-chain sequence that this residue is part or, or None if the residue has been removed from the sequence. :rtype: sequence.ProteinSequence or None """ return self._res.sequence @property def disulfide_bond(self): """ The current disulfide bond, if any, that this residue is involved in. :rtype: CombinedChainDisulfideBond or None """ if self._inSeq(): split_bond = self._res.disulfide_bond if split_bond is not None and split_bond.isValid(): combined_bond = CombinedChainDisulfideBond( split_bond, self._seq) # Make sure that neither residue is part of a chain that's been # removed from the combined sequence if all(res.sequence is not None for res in combined_bond): return combined_bond return None
[docs]class DisulfideBond: """ Representation of a disulfide bond. """
[docs] def __init__(self, res1, res2): """ :param res1: A residue in the bond :type res1: Residue :param res2: The other residue in the bond :type res2: Residue """ self._res_pair = weakref.WeakSet((res1, res2))
def __iter__(self): key = lambda r: r.idx_in_seq if r.sequence is not None else 0 return iter(sorted(self._res_pair, key=key)) @property def res_pair(self): return tuple(self)
[docs] def isValid(self): """ Check whether the disulfide bond is valid and if so, return its seqs. :return: False if the disulfide bond is invalid, the seqs otherwise. :rtype: bool or list(sequence.ProteinSequence, sequence.ProteinSequence) """ too_short = len(self.res_pair) < 2 deleted_res = any(res.sequence is None for res in self.res_pair) return not (too_short or deleted_res)
@property def is_intra_sequence(self): """ Check whether the bond is valid and intra-sequence. :return: Whether the bond is a valid, intra-sequence bond. :rtype: bool :raise ValueError: If the bond is not valid """ if not self.isValid(): raise ValueError("Bond is not valid") seq1, seq2 = [res.sequence for res in self.res_pair] return seq1 is seq2 @property def is_inter_sequence(self): """ Check whether the bond is valid and inter-sequence. :return: Whether the bond is a valid, inter-sequence bond. :rtype: bool :raise ValueError: If the bond is not valid """ if not self.isValid(): raise ValueError("Bond is not valid") seq1, seq2 = [res.sequence for res in self.res_pair] return seq1 is not seq2
[docs]class CombinedChainDisulfideBond(DisulfideBond): """ A disulfide bond in a `sequence.CombinedChainProteinSequence`. """
[docs] def __init__(self, bond, seq): """ :param bond: The split-chain disulfide bond. :type bond: DisulfideBond :param seq: The combined-chain sequence that this bond is in. :type seq: sequence.CombinedChainProteinSequence """ self._res_pair = {CombinedChainResidueWrapper(res, seq) for res in bond} self._split_seq_bond = bond
def __eq__(self, other): if isinstance(other, CombinedChainDisulfideBond): return self._split_seq_bond == other._split_seq_bond elif isinstance(other, DisulfideBond): return self._split_seq_bond == other else: return super().__eq__(other) def __hash__(self): return hash(self._split_seq_bond) @property def is_intra_sequence(self): return True @property def is_inter_sequence(self): return False
[docs]def add_disulfide_bond(res1, res2, known=True): """ Add a disulfide bond between two residues. :param res1: A residue to link with a disulfide bond :type res1: residue.Residue :param res2: Another residue to link with a disulfide bond :type res2: residue.Residue :param known: Whether the bond is a known bond or a predicted bond. :type known: bool """ bond_attr_name = 'disulfide_bond' if known else 'pred_disulfide_bond' if getattr(res1, bond_attr_name) is not None: raise ValueError(f'"{res1}" is already a part of a bond') elif getattr(res2, bond_attr_name) is not None: raise ValueError(f'"{res2}" is already a part of a bond') bond = DisulfideBond(res1, res2) for res in res1, res2: setattr(res, bond_attr_name, bond) res1.sequence.clearDisulfideBondsCache() res2.sequence.clearDisulfideBondsCache()
[docs]def remove_disulfide_bond(bond): """ Remove a disulfide bond between two residues. :param bond: The bond to disconnect :type bond: residue.DisulfideBond """ res1, res2 = bond.res_pair res1.sequence.clearDisulfideBondsCache() res2.sequence.clearDisulfideBondsCache() if bond == res1.disulfide_bond == res2.disulfide_bond: for res in bond.res_pair: res.disulfide_bond = None elif bond == res1.pred_disulfide_bond == res2.pred_disulfide_bond: for res in bond.res_pair: res.pred_disulfide_bond = None else: assert False, 'Undefined behavior'
[docs]class Nucleotide(Residue): # TODO (MSV-1504): Create proper nucleic acid domain objects pass