Source code for schrodinger.structutils.build

"""
Functions for modifying chemical structures.

Copyright Schrodinger, LLC. All rights reserved.

"""

import string
import warnings
from functools import partial
import contextlib

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import mminit
from schrodinger.infra import mmlist
from schrodinger.infra.mmbitset import Bitset
from schrodinger.structure import AtomsInRingError

from . import analyze

INCREMENT_BOND = {
    structure.BondType.Single: structure.BondType.Double,
    structure.BondType.Double: structure.BondType.Triple
}

_initializer = mminit.Initializer([mm.mmbuild_initialize, mm.mmfrag_initialize],
                                  [mm.mmbuild_terminate, mm.mmfrag_terminate])


def _find_bonded_atom_with_name(atom, name):
    """
    Find an atom with PDB name <name> attached to the atom <atom>. An atom
    object for the first matching atom is returned. If no such atom is found,
    ValueError is raised.
    """
    for b_atom in atom.bonded_atoms:
        if b_atom.pdbname == name:
            return b_atom
    raise ValueError("No bonded atom found with name: %s" % name)


def _map_to_dict(renumber_map):
    """
    Converts a renumbering map to a dictionary and deletes it;
    returns the newly created dictionary.
    """
    renumber_dict = {}

    size = mm.mmct_id_map_get_size(renumber_map)
    for oldatom in range(1, size + 1):
        newatom = mm.mmct_id_map_get(renumber_map, oldatom)
        if newatom == -1:
            newatom = None
        renumber_dict[oldatom] = newatom

    mm.mmct_id_map_delete(renumber_map)

    return renumber_dict


[docs]def connect(st, atomlist1, atomlist2): """ Perform a connect between the atoms in the CT "st" based on the heavy atoms in atomlist1 and those in atomlist2. Usually each list will contain one atom each, from different molecules. Hydrogens bound to the specified atoms are removed as necessary, and the molecule of atomlist2 will be rotated and translated such that the newly formed bond(s) are proper length, and bond angles are valid. Returns a dictionary of atom renumbering. Keys are old atom numbers, values are new atom numbers (or None if the atom was deleted, i.e. a hydrogen). """ # Ev:55942 list1 = mmlist.mmlist(atomlist1) list2 = mmlist.mmlist(atomlist2) renumber_dict = mm.mmbuild_connect(st, list1, list2) # The "renumber_map" contains the mapping arising from # any atoms which where deleted during the grow. return renumber_dict
def _valid_torsion_group(mmfrag_handle, torsion_group): """ Returns true if torsion group is a valid torsion group for mmfrag_handle """ tg = mm.mmfrag_get_torsion_group(mmfrag_handle, mm.MM_FIRST) torsion_groups = [tg] while True: try: tg = mm.mmfrag_get_torsion_group(mmfrag_handle, mm.MM_NEXT) except mm.MmException as e: if e.rc == mm.MMFRAG_DONE: break # exit the loop (end of list) else: torsion_groups.append(tg) return (torsion_group in torsion_groups) def _set_conformation( mmfrag_handle, torsion_group, conformation, ): """ Set conformation as current with in the torsion_group of fraghandle. Raises an exception if it's not a valid torsion group or conformation """ if not _valid_torsion_group(mmfrag_handle, torsion_group): msg = "Torsion group '%s' is not available for this fragment" % ( torsion_group) raise ValueError(msg) conformations = [] # Get a list of available conformations and validate the input conformations = [] conf = mm.mmfrag_get_conformation(mmfrag_handle, torsion_group, mm.MM_FIRST) conformations.append(conf) while True: try: conf = mm.mmfrag_get_conformation(mmfrag_handle, torsion_group, mm.MM_NEXT) except mm.MmException as e: if e.rc == mm.MMFRAG_DONE: break # exit the loop (end of list) else: conformations.append(conf) if conformation not in conformations: msg = "Conformation '%s' is not available for this fragment" % ( torsion_group) msg += "\nAvailable conformations: %s" % conformations raise ValueError(msg) mm.mmfrag_set_conformation(mmfrag_handle, torsion_group, conformation) return
[docs]def attach_fragment(st, fromatom, toatom, fraggroup, fragname, direction=None, torsion_group=None, conformation=None): """ Attach fragment <fragname> of group <fraggroup> to <fromatom> in place of atom <toatom>. Will use growbond atom labels in the incoming fragment. Optionally specify a <direction> string. Usually one of the following: 'forward', 'backward', 'forward(N-to-C)', 'backward(C-to-N)'. Optionally specify a torsion group and associated conformation. Usually this will be a secondary structure string such as "extended" for torsion group "Secondary_Structure:" Returns a dictionary of atom renumbering. Keys are old atom numbers, values are new atom numbers (or None if the atom was deleted, i.e. a hydrogen). :param st: structure to work on :type st: structure.Structure :param fromatom: atom on which to make the attachment :type fromatom: int :param toatom: atom to be replaced by the attachment :type toatom: int :param fraggroup: mmfrag category of the fragment to attach :type fraggroup: str :param fragname: name of the fragment to attach :type fragname: str :param direction: direction of the attachment :type direction: str :param torsion_group: a torsion group specifier :type torsion_group: str :param conformation: a conformations specifier :type conformation: str """ if not isinstance(fromatom, int) or fromatom <= 0 or fromatom > st.atom_total: raise TypeError( f"fromatom must be a valid atom number instead of {type(fromatom)}") if not isinstance(toatom, int) or toatom <= 0 or toatom > st.atom_total: raise TypeError( f"toatom must be a valid atom number instead of {type(fromatom)}") if not st.areBound(fromatom, toatom): raise ValueError("fromatom (%i) and toatom (%i) are not bonded." % (fromatom, toatom)) mmfrag_handle = mm.mmfrag_new(fraggroup) mm.mmfrag_set_fragment_name(mmfrag_handle, fragname) # Get a list of available directions: # (For some reason needs to be done first) directions = [] d = mm.mmfrag_get_direction(mmfrag_handle, mm.MM_FIRST) directions.append(d) while True: try: d = mm.mmfrag_get_direction(mmfrag_handle, mm.MM_NEXT) except mm.MmException as e: if e.rc == mm.MMFRAG_DONE: break # exit the loop (end of list) else: directions.append(d) if direction: # If user specified direction: if direction not in directions: msg = "Direction '%s' is not available for this fragment" % direction msg += "\nAvailable directions: %s" % directions raise ValueError(msg) mm.mmfrag_set_direction(mmfrag_handle, direction) # Get a list of available torsion groups and validate the input if conformation and torsion_group: _set_conformation(mmfrag_handle, torsion_group, conformation) if conformation is None: renumber_dict = mm.mmbuild_attach(st, mmfrag_handle, fromatom, toatom) else: # If conformation was specified then do a grow: new_grow_atom1, new_grow_atom2, renumber_dict = mm.mmbuild_grow( st, mmfrag_handle, fromatom, toatom) mm.mmfrag_delete(mmfrag_handle) # PYAPP-6144 mmbuild attaches most residues at a 180 degree angle # (trans), but Prolines get built cis (0 degree angle) for some reason. prev_c = st.atom[renumber_dict[int(fromatom)]] if fragname == 'PRO' and prev_c.pdbname == " C ": prev_ca = _find_bonded_atom_with_name(prev_c, " CA ") curr_n = _find_bonded_atom_with_name(prev_c, " N ") curr_ca = _find_bonded_atom_with_name(curr_n, " CA ") st.adjust(180.0, prev_ca, prev_c, curr_n, curr_ca) return renumber_dict
[docs]def grow_mult(st, mmfrag_handle, grow_atoms1, grow_atoms2): """ Grow for fragments with multiple connections, such as two-stranded DNA & RNA. The st is the structure to which to add the fragment. The frag handle is an mmfrag instance of the fragment to grow. Returns a tuple of (new-grow-atom-1, new-grow-atom-2, renumber-map). The new-grow-atom-1 and new-grow-atom-2 are lists that match the renumbered atoms corresponding to grow_atoms1 and grow_atoms2. The reunumber map is a dictionary of atom renumbering. Keys are old atom numbers, values are new atom numbers (or None if the atom was deleted, i.e. a hydrogen). """ if len(grow_atoms1) != len(grow_atoms2): raise ValueError("Number of atoms in the two grow lists must match") num_connections = len(grow_atoms1) new_grow_atom1, new_grow_atom2, renumber_dict = mm.mmbuild_grow_mult( st, mmfrag_handle, num_connections, grow_atoms1, grow_atoms2) return new_grow_atom1, new_grow_atom2, renumber_dict
[docs]def attach_structure(st, st_atom_from, st_atom_to, frag_st, frag_atom_from, frag_atom_to): """ Similar to attach_fragment(), but takes a fragment CT instead of name. From and to atoms in the fragment must also be specified. """ mm.mmbuild_ct_attach(st, st_atom_from, st_atom_to, frag_st, frag_atom_from, frag_atom_to)
[docs]def mutate(st, atom_in_res, res_type): """ Mutate the residue containing atom number 'atom_in_res' in Structure object 'st' to be of residue type 'res_type'. The residue type may be one of the 20 amino acids (including the HIS/HIP/HIE variants), a D-amino acids, or other non-standard residues. For a list of alternate residues, see the $MMSHARE/data/res/nonstandard_peptide.bld. There is no error if the residue is already of the type requested. :type st: Structure object :param st: Structure object which should be mutated. :type atom_in_res: int :param atom_in_res: Mutate the residue that this atom is in. :type res_type: str :param res_type: Desired residue type (e.g. "ALA"). :rtype: dict :return: A dictionary of renumbered atoms. Keys are old atom numbers, values are new atom numbers or None if the atom was deleted. """ # Create a new fragment handle: for libtype in ['peptide', 'nonstandard_peptide']: fh = mm.mmfrag_new(libtype) try: mm.mmfrag_set_fragment_name(fh, res_type) except mm.MmException as e: if e.rc == mm.MMFRAG_ERROR: # Unrecognized fragment; try the next library continue raise else: # Fragment found; perform the the mutation: renumber_dict = mm.mmbuild_mutate(st, fh, atom_in_res) return renumber_dict finally: mm.mmfrag_delete(fh) # If got here, then this fragment was not found in any library raise ValueError("Invalid residue type: %s\n" % res_type)
[docs]def add_hydrogens(st, treatment="All-atom with No-Lp", atom_list=None): """ Adds hydrogens to the Structure 'st'. Default treatment is suitable for nearly all purposes. Hydrogens are added in standard geometry and no attempt is made to optimize the treatment in order to maximize hydrogen bonds, etc. Parameters atom_list A list of atom indices (or _StructureAtom objects) to add hydrogens to (The default, None, means all atoms). """ mm.mmhtreat_initialize(mm.error_handler) try: treatment = mm.mmhtreat_get_treatment_index(treatment) if treatment < 0: raise Exception("%s: is an unknown Hydrogen Treatment") # From 42764: Added call to retype as input may not be correctly # typed mm.mmtype_initialize(mm.error_handler) mm.mmtype_retype(st) if atom_list is None: # list not specified # treat all atoms: mm.mmhtreat_hadd_ct(treatment, st) else: for iatom in atom_list: mm.mmhtreat_hadd(treatment, st, iatom) finally: mm.mmhtreat_terminate() mm.mmtype_terminate()
[docs]def delete_hydrogens(st): """ Deletes hydrogens from the Structure 'st'. """ hatoms = analyze.evaluate_asl(st, "atom.ele H") st.deleteAtoms(hatoms)
[docs]def delete_zobs(st): """ Delete all zero-order bonds from the given structure. """ bonds_to_delete = (b for b in st.bond if b.order == 0) delete_bonds(bonds_to_delete)
[docs]def delete_bonds_to_atoms(atoms): """ Delete all bonds to the requested atoms. Use like: build.delete_bonds_to_atoms(st.atom) build.delete_bonds_to_atoms(residue.atom) build.delete_bonds_to_atoms(map(st.atom.__getitem__, atom_indices)) :param atoms: atoms whose bonds should be deleted :type atoms: iterable of atom objects """ bonds = (b for a in atoms for b in a.bond) delete_bonds(bonds)
[docs]def delete_bonds(bonds): """ Delete the requested bonds. Bonds can be StructureBond or pairs of _StructureAtom. :param bonds: Bonds to delete :type bonds: iterable[StructureBond] or list[(_StructureAtom, _StructureAtom)] """ try: bonds = set((frozenset((b.atom1, b.atom2)) for b in bonds)) except AttributeError: if not isinstance(bonds, (tuple, list)): raise TypeError( 'Pairs of atoms only allowed if <bonds> can be iterated over twice' ) with structure.update_once(): for atom, bonded_atom in bonds: atom.deleteBond(bonded_atom)
[docs]def reorder_atoms(st, new_order): """ Create a new structure from the provided one, with atoms reordered as specified in the new_order list. :type new_order: list of ints :param new_order: The new_order list must be the same size as the number atoms in the provided structure, and atom indices begin at one. """ if (len(new_order)) != len(st.atom): raise Exception("The 'new_order' list must have %d elements." % len(st.atom)) st_copy = st.copy() # The ordering array must have natom + 1 elements for the C code, so add # [0] as a prefix. mm.mmct_ct_reorder(st_copy, st, [0] + new_order) return st_copy
[docs]def reorder_protein_atoms_by_sequence(st): """ Renumbers structure.Structure atoms by Residue order and returns a new structure object. New structure has atom numbering in sequence order via connectivity (not residue numbering) and allows behavior matching a properly ordered pdb structure. :type st: structure.Structure :param st: structure object to reorder :rtype: structure.Structure :returns: copy of input structure with atoms reordered """ mm.mmpdb_initialize(mm.error_handler) new_indices = mm.mmpdb_get_sequence_order(st, mm.MMPDB_SEQUENCE_ORDER) mm.mmpdb_terminate() new_st = st.copy() mm.mmct_ct_reorder(new_st, st, new_indices) return new_st
[docs]def reorder_water(st: structure.Structure) -> structure.Structure: """ Reorder water molecules so that they always stay at the end of the CT. A modified copy of `st` will be returned. """ WATER_SMARTS = ["[H]O[H]"] ZOB_WATER_SMARTS = ["[H]O([H])_[*]", "[H]O[H]_[*]"] flatten = lambda s: set(sum(s, [])) eval_smarts = partial(analyze.evaluate_multiple_smarts, st, unique_sets=True) zob_waters = flatten(eval_smarts(ZOB_WATER_SMARTS)) water = flatten(eval_smarts(WATER_SMARTS)) water = water - zob_waters non_water = set(range(1, st.atom_total + 1)) - water return reorder_atoms(st, sorted(non_water) + sorted(water))
[docs]@contextlib.contextmanager def MonitorAtomNumbering(st: structure.Structure): """ Watch atom renumbering within a context:: with MonitorAtomNumbering(st) as renumber_map: # do # some # horrible manipulations in c, c++, or Python # now the renumber_map will be filled in. """ atoms = list(st.atom) renumber_map = {} yield renumber_map for i, a in enumerate(atoms, start=1): try: renumber_map[i] = a.index except IndexError: # deleted renumber_map[i] = None
[docs]def desalt_structure(st): """ Returns a desalted copy of a specified structure :param st: Structure to be desalted :type st: schrodinger.structure.Structure :return: Desalted copy of the structure :rtype: schrodinger.structure.Structure """ mm.mmneut_initialize_lic(mm.error_handler) desalted_st = structure.Structure(mm.mmneut_desalt_a_ct(st)) mm.mmneut_terminate() return desalted_st
[docs]def neutralize_structure(st): """ Return a copy of the structure with each functional group neutralized. Attempts to make atoms neutral - it does not attempt to neutralize the overall charge. For instance, zwitterions will be neutralized such that all atoms have a 0 formal charge. Some atoms may not be adjusted. :param st: Structure to be neutralized, hydrogens already present :type st: schrodinger.structure.Structure :return: Neutralized copy of the structure :rtype: schrodinger.structure.Structure """ mm.mmneut_initialize_lic(mm.error_handler) neut_st = structure.Structure(mm.mmneut_a_ct(st)) mm.mmneut_terminate() return neut_st
[docs]def merge_subunit_sts(sts): """ Merge the input structure objects into one CT, while giving each chain a unique name. :type sts: Iterable of `structure.Structure` objects. :param sts: Subunit structures :rtype: tuple(structure.Structure, bool) :return: a tuple of the merged structure and a boolean indicating whether the renaming went OK (False if could not re-name chains due to running out of available names). """ available_chains = list(string.ascii_uppercase) renamed_ok = True merged_st = next(sts) for chain in merged_st.chain: available_chains.remove(chain.name) for st in sts: try: for chain in st.chain: chain.name = available_chains.pop(0) except IndexError: renamed_ok = False merged_st.extend(st) if not renamed_ok: warnings.warn( "Failed to give each subunit chain a unique name (more than 26 chains)" ) return merged_st, renamed_ok
[docs]def extract_structure(struct, indices, copy_props=False, renumber_map=False): """ Return a new structure object which contains the atoms of the struct that appear in the indices list and renumber map if requested. :param list[int] indices: Atom indices to extract :param bool copy_props: If True, then the new structure object will get structure-level properties from struct :param bool renumber_map: If True, return a also a renumbering dictionary :rtype: structure.Structure or structure.Structure, dict :return: Extracted structure if renumber_map is False. Extracted structure and renumbering dictionary, if renumber_map is True. In renumber_map, keys are atom numbers before extraction, and value for each is the new atom number """ if not renumber_map: return struct.extract(indices, copy_props=copy_props) indices = set(indices) all_indices = set(range(1, struct.atom_total + 1)) assert indices.issubset(all_indices) delete_indices = all_indices.difference(indices) new_st = struct.copy() rmap = new_st.deleteAtoms(delete_indices, renumber_map=True) if not copy_props: new_st.title = '' new_st.property.clear() return new_st, {idx: rmap[idx] for idx in sorted(indices)}
def _is_alternate_atom_property(name: str) -> bool: """Check if the atom property is of an alternative atom""" for alt_coord_name in ('_pdb_alt_', '_m_alt_', 's_pdb_altloc_chars'): if name.find(alt_coord_name) >= 0: return True return False
[docs]def remove_alternate_positions(st): """ Remove the alternate position of atoms in the structure. :param st: input structure. :type st: structure.Structure object :rtype: structure.Structure object """ for p in st.getAtomPropertyNames(): if _is_alternate_atom_property(p): st.deletePropertyFromAllAtoms(p) return st
[docs]def remove_atom_alternate_positions(atom): """ Remove the alternate position of atom :param atom: input atom :type atom: structure._StructureAtom object """ # Set occupancy to unity as the alternate position is removed. atom.property['r_m_pdb_occupancy'] = 1.0 for name in atom.property: if _is_alternate_atom_property(name): del atom.property[name]
[docs]def adjustImproperDihedral(st, value, atom1, atom2, atom3, atom4): """ Adjust an improper dihedral angle (generated, for example, from analyze.improper_dihedral_iterator) The improper dihedral angle (in degrees) made by atom1, atom2, atom3 and atom4 will be set to value and atom4 and all other atoms attached to that will be moved. NOTE: To adjust proper dihedrals, use Structure.adjust instead :type st: Structure object :param st: the structure being modified :type value: float :param value: value the internal coordinate will be set to :type atom1: int or _StructureAtom :param atom1: first atom in the coordinate :type atom2: int or _StructureAtom :param atom2: second atom in the coordinate :type atom3: int or _StructureAtom or None :param atom3: third atom in the coordinate (if None, the coordinate is a bond) :type atom4: int or _StructureAtom or None :param atom4: fourth atom in the coordinate (if None, the coordinate is an angle) :raises AtomsInRingError: if specified atoms are within a ring system. """ # Create a bitset for the moving atoms fixed_atom = atom3 moving_atom = atom4 bs = Bitset(size=mm.mmct_ct_get_atom_total(st.handle)) mm.mmct_atom_get_moving(st.handle, fixed_atom, st.handle, moving_atom, bs) if bs.get(fixed_atom): raise AtomsInRingError( "Can't do adjustment - atoms are in a ring: %i, %i" % (fixed_atom, moving_atom)) mm.mmct_atom_set_dihedral_angle(value, mm.MM_ANGLE_DEG, st.handle, atom1, st.handle, atom2, st.handle, atom3, st.handle, atom4, bs) return