Source code for schrodinger.protein.buildpeptide

"""

Module for converting protein residue sequences to 3D structures.

Copyright Schrodinger, LLC. All rights reserved.
"""

from enum import Enum

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.structutils import build

SECONDARY_STRUCTURE = Enum('SECONDARY_STRUCTURE', [
    'Extended', "AlphaHelix", "PiHelix", "ThreeTenHelix", "CollagenHelix",
    "LeftHandedAlphaHelix", "ParallelPleatedSheet", "AntiparallelPleatedSheet",
    "Custom", "CustomAngles"
])

PDB_CODES = structure.RESIDUE_MAP_1_TO_3_LETTER


[docs]def get_fragment_structure(fragname): """ Return the fragment structure for the given peptide residue. :param fragname: Fragment name (e.g. "ALA") :type fragname: str :return: Structure :rtype: `structure.Structure` """ frag_handle = mm.mmfrag_new("peptide") first = True st = None while True: if first: which = mm.MM_FIRST first = False else: which = mm.MM_NEXT try: name, ct_handle = mm.mmfrag_get_fragment(frag_handle, which) except mm.MmException as e: if e.rc == mm.MMFRAG_DONE: break if name == fragname: copy_ct = mm.mmct_ct_duplicate(ct_handle) st = structure.Structure(copy_ct, True) mm.mmfrag_delete(frag_handle) if st is None: raise ValueError("Unknown fragment: %s" % fragname) return st
[docs]def grow_fragment(st, fromatom, toatom, fraggroup, fragname, direction=None): """ Grow the given fragment to the given ct, in alpha-helix configuration. :param st: Input structure :type st: `structure.Structure` :param fromatom: The "from" grow atom. :type fromatom: int :param toatom: The "to" grow atom. :type toatom: int :param fraggroup: Fragment group (e.g. "organic") :tupe fraggroup: str :param fragname: Fragment name (e.g. "Hydroxyl") :type fragname: str :param direction: Direction to grow in (e.g. "forward") :type direction: str :return: (new from atom, new to atom) for the next grow operation. :rtype: (int, int) """ mmfrag_handle = mm.mmfrag_new(fraggroup) mm.mmfrag_set_fragment_name(mmfrag_handle, fragname) # Get a list of available directions: # (For some reason needs to be done first) directions = [] d = mm.mmfrag_get_direction(mmfrag_handle, mm.MM_FIRST) directions.append(d) while True: try: d = mm.mmfrag_get_direction(mmfrag_handle, mm.MM_NEXT) except mm.MmException as e: if e.rc == mm.MMFRAG_DONE: break # exit the loop (end of list) else: directions.append(d) if direction: # If user specified direction: if direction not in directions: msg = "Direction '%s' is not available for this fragment" % direction msg += "\nAvailable directions: %s" % directions raise ValueError(msg) mm.mmfrag_set_direction(mmfrag_handle, direction) if fragname != "Hydroxyl": mm.mmfrag_set_conformation(mmfrag_handle, "Secondary_Structure:", "alpha_helix") new_fromatom, new_toatom, renumber_map = mm.mmbuild_grow( st.handle, mmfrag_handle, fromatom, toatom) # NOTE: No need to delete the renumber_map, because it's now a Python dict. mm.mmfrag_delete(mmfrag_handle) return new_fromatom, new_toatom
[docs]def build_peptide(sequence, secondary_structure=None, ss_seq=None, angles=None, cap=False): """ For each amino acid in the sequence, test to make sure that it is a valid code and build the peptide. :param sequence: Amino acid sequence of the peptide to be built. :type sequence: str :param secondary_structure: A description of how the secondary structure can be set. :type secondary_structure: enum SECONDARY_STRUCTURE :param ss_seq: If the secondary structure is Custom then this is sequence of secondary structure codes - one per residue :type ss_seq: str :param angles: If the secondary structure is Custom Angles then this is a list of pairs of Phi/Psi angles - two per residue :type ss_seq: list :param cap: Option to include capping groups. :type cap: bool :return: Initial peptide structure :rtype: `structure.Structure` """ # Translate from secondary structure codes into strings which mmfrag # understands: secondary_structure_translate = { SECONDARY_STRUCTURE.Extended: "extended", SECONDARY_STRUCTURE.AlphaHelix: "alpha_helix", SECONDARY_STRUCTURE.PiHelix: "pi_helix", SECONDARY_STRUCTURE.ThreeTenHelix: "3(10)_helix", SECONDARY_STRUCTURE.CollagenHelix: "collagen_helix", SECONDARY_STRUCTURE.LeftHandedAlphaHelix: "left_handed_alpha_helix", SECONDARY_STRUCTURE.ParallelPleatedSheet: "parallel_pleated_sheet", SECONDARY_STRUCTURE.AntiparallelPleatedSheet: "antiparallel_pleated_sheet", "H": "alpha_helix", "C": "extended", "-": "extended", "~": "extended", "E": "parallel_pleated_sheet" } # Validate the input. if not len(sequence): raise ValueError("No sequence specified to build") # For Custom we should have a ss_seq the same length # as the sequence: if (secondary_structure == SECONDARY_STRUCTURE.Custom) and (len(sequence) != len(ss_seq)): raise ValueError("The length of the secondary structure sequence" " must match the length of the peptide sequence") # For Custom Angles we should have a list of angles twice the length # of the sequence: if (secondary_structure == SECONDARY_STRUCTURE.CustomAngles) and ( 2 * len(sequence) != len(angles)): raise ValueError("Wrong length for secondary structure Phi/Psi angles") st = structure.create_new_structure() st.title = sequence from_atom = 0 to_atom = 0 for i, res in enumerate(sequence): if res in PDB_CODES: if i == 0: st = get_fragment_structure(PDB_CODES[res]) for at in st.atom: at.resnum = 1 else: original_atoms = {at.index for at in st.atom} if (secondary_structure == SECONDARY_STRUCTURE.CustomAngles or secondary_structure is None): sec_struct = "extended" elif secondary_structure == SECONDARY_STRUCTURE.Custom: # We accept both upper and lower case letters as users # copying sequences directly from an online source expect # it to be valid irrespective of the letter case. seq = ss_seq[i].upper() sec_struct = secondary_structure_translate[seq] else: sec_struct = secondary_structure_translate[ secondary_structure] atom_map = build.attach_fragment(st, from_atom, to_atom, "peptide", PDB_CODES[res], "forward(N-to-C)", "Secondary_Structure:", sec_struct) # Create a new set containing the atoms of the new structure. new_atoms = set() for at in st.atom: new_atoms.add(at.index) # The difference of the newAtoms and originalAtoms gives the # added atoms. for old_atom in original_atoms: if atom_map[old_atom] is not None: new_atoms.remove(atom_map[old_atom]) # The new atoms are in the i+1th residue. for atom in new_atoms: st.atom[atom].resnum = i + 1 matches = analyze.evaluate_smarts_canvas(st, "O=CN[C;H3]") matches = matches[0] from_atom = matches[1] to_atom = matches[2] else: raise RuntimeError("Cannot identify all amino acid codes.") if cap: # Renumber as the N-cap and first sequence residue will have the same # residue number. for res_num, res in enumerate(st.residue, start=1): res.resnum = res_num else: # The C-cap is handled by adding a hydroxyl fragment to the current # grow bond. build.attach_fragment(st, from_atom, to_atom, "organic", "Hydroxyl", "forward") # The N-cap is handled by deleting the ACE residue and repairing the # hydrogens. atoms_to_delete_n_cap = analyze.evaluate_asl(st, "res.ptype ACE") st.deleteAtoms(atoms_to_delete_n_cap) build.add_hydrogens(st) if secondary_structure == SECONDARY_STRUCTURE.CustomAngles: _apply_dihedral(st, angles[0::2], angles[1::2]) return st
############################################################################### def _apply_dihedral(st, phi_angles, psi_angles): """ For each residue in the peptide, identify those atoms that define the dihedral and apply the specified phi/psi angles. :param st: Initial peptide structure :type st: `structure.Structure` :param phi_angles: List of phi angles :type phi_angles: list :param psi_angles: List of psi angles :type psi_angles: list :return: Peptide structure with modified dihedral :rtype: `structure.Structure` """ residue_list = [res for res in st.residue] for i in range(1, len(residue_list) - 1): phi_at1 = phi_at2 = phi_at3 = phi_at4 = None psi_at1 = psi_at2 = psi_at3 = psi_at4 = None res0 = residue_list[i - 1] res1 = residue_list[i] res2 = residue_list[i + 1] for atom in res0.atom: if atom.pdbname == " C ": phi_at1 = atom.index for atom in res1.atom: if atom.pdbname == " N ": phi_at2 = atom.index psi_at1 = atom.index if atom.pdbname == " CA ": phi_at3 = atom.index psi_at2 = atom.index if atom.pdbname == " C ": phi_at4 = atom.index psi_at3 = atom.index for atom in res2.atom: if atom.pdbname == " N ": psi_at4 = atom.index atoms = [ phi_at1, phi_at2, phi_at3, phi_at4, psi_at1, psi_at2, psi_at3, psi_at4 ] if any(ai is None for ai in atoms): raise RuntimeError("Cannot identify all dihedral atoms.") phi_angle = float(phi_angles[i - 1]) psi_angle = float(psi_angles[i - 1]) st.adjust(phi_angle, phi_at1, phi_at2, phi_at3, phi_at4) st.adjust(psi_angle, psi_at1, psi_at2, psi_at3, psi_at4) return st