Source code for schrodinger.application.matsci.amorphous

"""
Builder classes for making amorphous cells

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributor: Dave Geisen, Teng Zhang

import collections
import copy
import itertools
import logging
import math
import random
import string
import sys
from collections import OrderedDict
from collections import defaultdict
from collections import namedtuple
from past.utils import old_div

import networkx
import numpy
import psutil
import scipy.constants as constants
import scipy.spatial as spatial
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform

import schrodinger
from schrodinger import structure
from schrodinger.application.desmond.bennett import BOLTZMANN
from schrodinger.application.matsci import buildcomplex
from schrodinger.application.matsci import clusterstruct
from schrodinger.application.matsci import coarsegrain
from schrodinger.application.matsci import desmondutils
from schrodinger.application.matsci import graph
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import msprops
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import textlogger
from schrodinger.application.matsci.nano import xtal
from schrodinger.forcefield import common
from schrodinger.infra import fast3d
from schrodinger.infra import mm
from schrodinger.infra import mmbitset
from schrodinger.infra import mmlist
from schrodinger.infra import structure as infrastructure
from schrodinger.job import jobcontrol
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import ringspear
from schrodinger.structutils import smiles
from schrodinger.structutils import standard_nuclear_orientation
from schrodinger.structutils import transform

# Color constants
COLOR_BY_MOLECULE = 'molecule'
GREEN = 'green'
RED = 'red'
BLUE = 'user27'
YELLOW = 'user16'
CYAN = 'user4'
ORANGE = 'orange'
PINK = 'pink'
DARK_GREEN = 'user3'
MAGENTA = 'purple'
GREY = 'gray'
SEAFOAM = 'userM'
OLIVE = 'olive'
LIME = 'user19'
DEEPRED = 'brown'
DEEPTEAL = 'user59'
LIGHTBLUE = 'blue9'
SLATE = 'blue14'
YELLOWORANGE = 'userT'
HOTPINK = 'userN'
COLORS = [
    GREEN, RED, BLUE, YELLOW, CYAN, ORANGE, PINK, DARK_GREEN, MAGENTA, GREY,
    SEAFOAM, OLIVE, LIME, DEEPRED, DEEPTEAL, LIGHTBLUE, SLATE, YELLOWORANGE,
    HOTPINK
]

TWO_PI = 2.0 * numpy.pi
OBEY_DENSITY = 'density'
OBEY_CLASH = 'clash'
OBEY_BOTH = 'both'

SCAFFOLD_ATOM_PROP = 'b_matsci_scaffold_atom'

OPLS2005 = mm.OPLS_NAME_F14
AMCELL_NO_SYSTEM_OUT = '-amcell.maegz'

INI = 'INI'
TRM = 'TRM'
CAS = 'CAS'
MON = 'A  '
WILDCARD = '*'
DIHEDRAL_NUM = 73
CHAIN_ID_PROP = 'i_matsci_polymer_chain_id'
COUPLER_ATOM_PROP = 'b_matsci_polymer_coupler_atom'
GROWER_ATOM_PROP = 'b_matsci_polymer_grower_atom'
# Groups together information needed when joining two fragments
AttachmentAtoms = namedtuple(
    'AttachmentAtoms', ['coupler', 'coupler_marker', 'grower', 'grow_marker'])

HEADFIRST = 'headfirst'
TAILFIRST = 'tailfirst'

# Tacticity constants
TACTICITY_ISO = 'Isotactic'
TACTICITY_SYN = 'Syndiotactic'
TACTICITY_ATAC = 'Atactic'

# Atom properties
HEAD_ATOM_PROP = msprops.HEAD_ATOM_PROP
TAIL_ATOM_PROP = msprops.TAIL_ATOM_PROP
HT_ATOM_PROPS = [HEAD_ATOM_PROP, TAIL_ATOM_PROP]
CASCADER_ATOM_PROP = 'b_matsci_polymer_cascader_atom'
CASCADER_MARKER_ATOM_PROP = 'b_matsci_polymer_cascader_marker_atom'
BRANCH_ATOM_PROP = 'b_matsci_polymer_branch_atom'
BRANCH_PCT_ATOM_PROP = 'r_matsci_polymer_branch_atom_pct'
BRANCH_GEN_ATOM_PROP = 'i_matsci_polymer_branch_atom_gen'
BRANCH_MAXGEN_ATOM_PROP = 'r_matsci_polymer_branch_atom_maxgen'
CHIRAL_BB_ATOM_PROP = 'b_matsci_polymer_chiral_bb_atom'
CHIRAL_NONE_MONOMER_PROP = 'b_matsci_polymer_monomer_chiral_none'
CARB_ATOM_IDENTIFIER_PROP = 's_matsci_carbohydrate_atom_identifier'
MARKER_START_PROP = 'b_matsci_polymer_marker_start'
MONOSACC_PROP = 's_matsci_polymer_monosaccharide_type'

# Role properties
MOIETY_PROP = 's_matsci_polymer_gui_moiety'
# Properties for initiators/terminators/cascaders/monomers for communication
# between GUI and backend
# Comma-delimited list of marker atom numbers
MARKER_PROP = 's_matsci_polymer_gui_marker_list'
# Set to one of the tacticity constants
TACTICITY_PROP = 's_matsci_polymer_gui_tacticity'
# The % branching probability for branching monomers
BRANCH_PCT_PROP = 'r_matsci_polymer_gui_branch_pct'
# The max number of branching genertions
BRANCH_MAX_PROP = 'i_matsci_polymer_gui_branch_max'
# Whether the backbone should be all trans
BBTRANS_PROP = 'b_matsci_polymer_gui_bb_trans'
# Number of cascade generations
CASCADE_GEN_PROP = 'i_matsci_polymer_gui_cascade_gen'
MONOMER_ORIG_IDX_PROP2 = 'i_matsci_polymer_monomer_orig_atom_idx2'
INITIATOR = 'initiator'
TERMINATOR = 'terminator'
CASCADER = 'cascader'
MONOMERX = 'monomer_x'
RIGID_BODY_PROP = 'b_matsci_rigid_body'
DEFAULT_VDW_SCALE = 0.5

# Chirality
R_S = [msprops.CHIRAL_R, msprops.CHIRAL_S]
PROP_BY_CHIRALITY = OrderedDict()
PROP_BY_CHIRALITY[msprops.CHIRAL_R] = msprops.CHIRAL_R_MONOMER_PROP
PROP_BY_CHIRALITY[msprops.CHIRAL_S] = msprops.CHIRAL_S_MONOMER_PROP
PROP_BY_CHIRALITY[None] = CHIRAL_NONE_MONOMER_PROP
ATOM_PROP_MARKER = [
    HEAD_ATOM_PROP, TAIL_ATOM_PROP, CASCADER_ATOM_PROP, BRANCH_ATOM_PROP
]
UNIQUE_ATOM_PROP = coarsegrain.CG_UNIQUE_PARTICLE_LABEL_KEY
ATOM_PROP_MARKER_IDX = list(zip(ATOM_PROP_MARKER, [0, 1, 2, 2]))
ORIG_ATOM_IDX_PROP = 'i_matsci_orig_atom_idx'
MAX_HIT_NUM = 50

BOLTZMANN_DIS = 'Boltzmann at'
UNIFORM_DIS = 'Uniform'
MAX_VDW_SCALE = 0.5

ConnectionInfo = namedtuple('ConnectionInfo',
                            ['missed_atom_ids', 'connecting_atom_ids'])

IMMERSED = 'immersed'
CONTAINER = 'container'
INTERFACE = 'interface'

# Convert AMU to kg, then kg to g (*1000) and A3 to CM3
AMU_TO_KG = constants.physical_constants['atomic mass constant'][0]
AMU_PER_A3_TO_G_PER_CM3 = AMU_TO_KG * 1000 * (1e8)**3

logger = None


[docs]def log_debug(msg): """ Add a message to debug logger file. :type msg: str :param msg: The message to debug logging """ global logger if logger is None: logger = textlogger.create_debug_logger(__name__) logger.debug(msg)
def _add_head_and_tail_props_to_neighbors(struct, indexes): """ Mark the two given atoms with the polymer role HEAD (first atom) and TAIL (second atom). :param `schrodinger.structure.Structure` struct: The structure to mark :param list indexes: [head atom index, tail atom index] :raises RuntimeError: If len(indexes) != 2 """ if len(indexes) != 2: raise RuntimeError('indexes must have length = 2') for index, role, capper_role in zip( indexes, (msprops.HEAD, msprops.TAIL), (msprops.HEAD_CAPPER, msprops.TAIL_CAPPER)): atom = struct.atom[index] atom.property[msprops.ROLE_PROP] = capper_role for neighbor in atom.bonded_atoms: neighbor.property[msprops.ROLE_PROP] = role
[docs]def has_head_tail_roles_marked(struct): """ Check if a monomer is marked with the polymer head and tail role properties :type struct: `schrodinger.structure.Structure` :param struct: The structure to check :rtype: bool :return: True if head and tail role atoms are marked, False if not """ role_names = [ msprops.HEAD, msprops.TAIL, msprops.HEAD_CAPPER, msprops.TAIL_CAPPER ] asl = ' or '.join([f'(atom.{msprops.ROLE_PROP} {x})' for x in role_names]) role_indexes = analyze.evaluate_asl(struct, asl) if len(role_indexes) != 4: return False roles = [struct.atom[x].property[msprops.ROLE_PROP] for x in role_indexes] return all(roles.count(x) == 1 for x in role_names)
[docs]def mark_monomer_head_tail_ce_th(struct): """ Find monomers marked with the Canvas method of marking head and tail - a Ce atom marks the head and tha Th atom marks the tail. Mark the Ce atom with the HEAD role and the Th atom with the TAIL role. :type struct: `schrodinger.structure.Structure` :param struct: The structure to mark :rtype: bool :return: True if head and tail role atoms were marked, False if not """ ce_atoms = analyze.evaluate_asl(struct, 'atom.ele Ce and atom.att 1') th_atoms = analyze.evaluate_asl(struct, 'atom.ele Th and atom.att 1') if len(ce_atoms) != 1 or len(th_atoms) != 1: return False _add_head_and_tail_props_to_neighbors(struct, (ce_atoms[0], th_atoms[0])) return True
[docs]def mark_monomer_head_tail_dummy(struct): """ Find monomers marked with the dummy atom method of marking head and tail - dummy atoms mark both the head and tail. In this case there is nothing to distinguish the two position, so mark the first dummy atom with the HEAD role and the second dummy atom with the TAIL role. :type struct: `schrodinger.structure.Structure` :param struct: The structure to mark :rtype: bool :return: True if head and tail role atoms were marked, False if not """ # Note - can't use ASL for dummies, see SHARED-6642 dummies = [ x.index for x in struct.atom if x.atomic_number == -2 and x.bond_total == 1 ] if len(dummies) != 2: return False _add_head_and_tail_props_to_neighbors(struct, dummies) return True
[docs]def mark_monomer_head_tail_rx(struct): """ Find monomers marked with the polymer builder method of marking head and tail - the Rx atom property is used to identify atoms. The atom designated as R1 is marked as the HEAD role and the atom designated as R2 with the TAIL role. :type struct: `schrodinger.structure.Structure` :param struct: The structure to mark :rtype: bool :return: True if head and tail role atoms were marked, False if not """ rx_atoms, max_x_val = buildcomplex.get_marker_atom_indexes_from_structure( struct) # There must be exactly two Rx values, 1 and 2 if any((1 not in rx_atoms, 2 not in rx_atoms, len(rx_atoms) != 2)): return False # There must be only one atom of each type for atoms in rx_atoms.values(): if len(atoms) != 1: return False indexes = [rx_atoms[x][0] for x in (1, 2)] _add_head_and_tail_props_to_neighbors(struct, indexes) return True
[docs]def mark_monomer_head_and_tail(struct): """ Find monomers marked with a variety of marking head and tail atoms and mark them with the polymer role property. Note that the atoms so marked will be the atoms to remove when building a polymer - the disposable H, dummy or other atom attached to the heavy atom that remains in the monomer when the polymer is built. :type struct: `schrodinger.structure.Structure` :param struct: The structure to mark :rtype: bool :return: True if head and tail role atoms were marked, False if not """ return (has_head_tail_roles_marked(struct) or mark_monomer_head_tail_ce_th(struct) or mark_monomer_head_tail_rx(struct) or mark_monomer_head_tail_dummy(struct))
[docs]def mark_orig_atom_idx(struct): """ Mark the original atom indexes as atom property. :type struct: 'schrodinger.structure.Structure' :param struct: each atom in this structure will be marked with atom index as original atom index property. """ for atom in struct.atom: atom.property[ORIG_ATOM_IDX_PROP] = atom.index
[docs]def get_orig_atom_idx(struct): """ Return the original atom indexes. :type struct: 'schrodinger.structure.Structure' or any 'schrodinger.structure._AtomCollection' :param struct: the structure to get original atom indexes. :rtype: list of int :return: the original atom indexes of this atom collection. """ return [atom.property[ORIG_ATOM_IDX_PROP] for atom in struct.atom]
[docs]def get_missed_atom_id_groups(struct, atom_ids): """ Return the missed atom indexed grouped by bond connectivity. :type struct: 'schrodinger.structure.Structure' :param struct: the structure to get missed atom indexes groups :type atom_ids: list of int :param atom_ids: each int is an atom index of a missing atom in the polymerfragment. :rtype: list of list :return: such sublist contains atom indexes that are bonded. """ struct = struct.copy() mark_orig_atom_idx(struct) sub_struct = struct.extract(atom_ids) return [get_orig_atom_idx(mol) for mol in sub_struct.molecule]
[docs]def get_density(struct): """ Calculate the density of the struct. :type struct: `schrodinger.structure.Structure` :param struct: A structure with chorus_properties :rtype: float :return: The density of the struct in unit g/cm^3 """ chorus_properties = xtal.get_chorus_properties(struct) params = xtal.get_params_from_chorus(chorus_properties) volume = xtal.get_volume_from_params(params) return AMU_PER_A3_TO_G_PER_CM3 * struct.total_weight / volume
[docs]def get_maximum_vdw_radius(struct): """ Find the maximum VDW radius of any atom in the structure :type struct: `schrodinger.structure.Structure` :param struct: The structure to check :rtype: float :return: The maximum VDW radius of any atom """ maxrad = 0.0 for atom in struct.atom: maxrad = max(maxrad, atom.radius) return maxrad
[docs]def get_color(index): """ Get the next color in our color rotation :type index: int :param index: The index in the color rotation to get. If index is greater than the number of colors, we just start over :rtype: str :return: A color as understood by the `schrodinger.structure._StructureAtom.color` property """ true_index = index % len(COLORS) return COLORS[true_index]
[docs]def random_rotation(struct, centroid=None, max_rotation=TWO_PI, no_rotation=False, rotate_velocities=False): """ Randomly rotate struct in 3 dimensions :type struct: `schrodinger.structure.Structure` :param struct: The structure to rotate :type centroid: 3-element numpy array :param centroid: the rotation center [x, y, z] :type max_rotation: float :param max_rotation: The maximum rotation to perform :param bool no_rotation: If True, do not rotate the structure :param bool rotate_velocities: If True, besides the structure, also rotate FFIO atom velocities (if present) :rtype: list, list :return: Centroid and rotation matrix """ # Make sure the centroid is at the origin if centroid is None: centroid = transform.get_centroid(struct)[:3] struct.setXYZ(struct.getXYZ() - centroid) if no_rotation: matrix = numpy.eye(4).tolist() else: # Obtain a random vector to rotate around and the amount to rotate vec = [random.uniform(-0.5, 0.5) for x in range(3)] nvec = transform.get_normalized_vector(numpy.array(vec)) angle = random.uniform(0.0, max_rotation) # Get the rotation matrix and perform the rotation matrix = transform.get_rotation_matrix(nvec, angle) transform.transform_structure(struct, matrix) if rotate_velocities: for atom in struct.atom: vel = msutils.get_atom_ffio_velocity(atom) vel = transform.transform_atom_coordinates(vel, matrix) msutils.set_atom_ffio_velocity(atom, vel) # Move the structure back to its original centroid struct.setXYZ(struct.getXYZ() + centroid) return centroid, matrix
[docs]def prepare_building_block(struct, index, recolor=True, preserve_res=False): """ Do some prepwork on a component before adding them to the disordered cell :type struct: `schrodinger.structure.Structure` :param struct: The structure to modify :type index: int :param index: The index of this structure in the list of components :type recolor: bool :param recolor: Whether to color the molecule a single color :type preserve_res: bool :param preserve_res: Whether to preserve the current residue information """ # Set properties of each structure to make it easy to distinguish # the components in the full cell if recolor: color = get_color(index - 1) # Ensure we stay within 4 character pdbres values (MATSCI-769) if preserve_res: set_pdbres = False else: new_pdbres = 'M%d' % index new_pdbres = new_pdbres.ljust(4) old_pdbres = struct.atom[1].pdbres.strip() # Define->Molecule->Molecule type needs single residue (MATSCI-6087) set_pdbres = not old_pdbres or old_pdbres == 'UNK' or len( struct.residue) != 1 for atom in struct.atom: if recolor: atom.color = color if set_pdbres: atom.pdbres = new_pdbres atom.resnum = 1
[docs]def detect_custom_mmffld_data(structs): """ Detect if a custom FEP charge block exists in the structure :param list structs: List of `Structure` objects to check :rtype: bool :return: Whether a custom FEP charge block exists in any of the strucures """ has_data = False for struct in structs: # I'm not sure the attempt to use get_or_open_additional_data is needed # in this case - in all tested cases when reading a structure from a # file like we do here the get_unrequested call works fine, but this is # the pattern used in other places so I'm keeping it here for safety # until we better understand this. try: handle = mm.mmct_ct_m2io_get_unrequested_handle(struct) except: try: handle = mm.mmct_ct_get_or_open_additional_data(struct, True) except: handle = None if handle: has_data = bool( mm.m2io_get_number_blocks(handle, 'mmffld_custom_virt')) if has_data: break return has_data
[docs]def get_generic_end_structure(is_coarse_grain=False): """ Get a single atom structure that can be used as a terminal (initiator/terminator) moiety. :type is_coarse_grain: bool :param is_coarse_grain: whether the structure is coarse grain :rtype: `schrodinger.structure.Structure` :return: Structure of one single H or C atom with Br marker. """ struct = structure.create_new_structure() struct.addAtom('C' if is_coarse_grain else 'H', 0, 0, 0) struct.addAtom('Br', 1, 0, 0) struct.addBond(1, 2, 1) struct.property[MARKER_PROP] = '2' # Without this line the H/C gets an "unknown atom type" color buildcomplex.transmute_atom(struct.atom[1], struct.atom[1].element) return struct
[docs]def dispersity(structs): """ Return dispersity of the structures. Notes with Einstein notation (duplicated index means summation): Mn = NiMi/sum(Ni); Mw = NiMiMi/NiMi Mw/Mn = sum(Ni) * NiMiMi / (NiMi)^2 When Ni = 1, Mw/Mn = len(Mi) * MiMi /(sum(Mi))^2 :param structs: the list of structures to calculate dispersity. :type structs: list of structure.Structure :return: dispersity over all structures :rtype: float """ mols = [mol for st in structs for mol in st.molecule] mws = [mol.extractStructure().total_weight for mol in mols] mws_squared = numpy.power(mws, 2) return len(mws) * numpy.sum(mws_squared) / numpy.power(numpy.sum(mws), 2)
[docs]class FlorySchulzDistribution: """ Molecular weight distribution in linear step-growth homo polymers (single monomer type or equimolar quantities for two types of monomers). Collected and published by Polymer Properties Database (polymerdatabase.com) Refs: P. J. Flory, Journal of the American Chemical Society, 58, 1877-1885 (1936) Wallace H. Carothers, Trans. Faraday Soc., Vol. 32, pp 39-49 (1936) Paul L. Flory, Principles of Polymer Chemistry, Ithaca, New york, 1953 Paul J. Flory, Chem. Rev., Vol. 39, No. 1, 137 (1946) Story (paraphrase): Monomer conversion (p) is reactivated monomers over all initial monomers An oligomer containing x repeat units must have undergone x-1 reactions """
[docs] def __init__(self, conversion=0.5, initial_num=1000, xmer=5): """ :param conversion: reactivated monomers over all initial monomers :type conversion: float in [0, 1) :param initial_num: the initial number of monomers :type initial_num: positive int :param xmer: repeat unit number in a chain :type xmer: positive int """ self.conversion = conversion self.xmer = xmer self.initial_num = initial_num
[docs] def polydispersity(self): """ Carothers equation in step-growth polymerization, proposed by Wallace Carothers, who invented nylon in 1935. Conditions: two monomers in equimolar quantities Ref: https://en.wikipedia.org/wiki/Carothers_equation :return: polydispersity according to Carothers equation for two monomers in equimolar quantities :rtype: float """ return 1 + self.conversion
[docs] def xmerProbability(self): """ The number probability of x-mer at the specific conversion. :return: The probability of a chain is x-mer long :rtype: float """ return (1 - self.conversion) * pow(self.conversion, self.xmer - 1)
[docs] def moleculeNum(self): """ The number of molecules (polymer, oligomer, and monomers). :return: the total number of molecules at the specific conversion. :rtype: float """ return self.initial_num * (1 - self.conversion)
[docs] def degreeOfPolymerization(self): """ Degree of polymerization at the specific conversion. :return: Degree of polymerization at the specific conversion. :rtype: float """ return 1 / (1 - self.conversion)
[docs] def setConversion(self, deg_of_polym=2): """ Set the conversion based on degree of polymerization. :param dp: Degree of polymerization :type dp: int """ self.conversion = 1 - 1 / deg_of_polym
[docs] def xmerNum(self): """ The number of molecules of x-mer length at the specific conversion. :return: number of molecules :rtype: float """ return self.moleculeNum() * self.xmerProbability()
[docs] def numberDistribution(self, mer_lim=None): """ The number distribution: monomer length vs number probability. :param mer_lim: the lower and upper limits of x-mers :type mer_lim: None or a tuple of two int :return: monomer length, probability :rtype: 2d numpy array """ if mer_lim is None: mer_lim = (1, 1000000) mer_start, mer_end = list(map(int, mer_lim)) mer_num = mer_end - mer_start + 1 xmers = numpy.linspace(mer_start, mer_end, num=mer_num) probs = (1 - self.conversion) * numpy.power(self.conversion, xmers - 1) return numpy.vstack((xmers, probs)).T
[docs] def weightDistribution(self, mer_lim=None): """ The weight distribution: monomer length vs length-weighted probability. :param mer_lim: the lower and upper limits of x-mers :type mer_lim: None or a tuple of two int :return: monomer length, length-weighted probability :rtype: 2d numpy array """ xmer_prop = self.numberDistribution(mer_lim=mer_lim) xmer_prop[:, 1] *= xmer_prop[:, 0] xmer_prop[:, 1] *= (1 - self.conversion) return xmer_prop
[docs]class Box(object): """ Class that defines the region a new structure may be placed in """ X, Y, Z = list(range(3)) AXES = [X, Y, Z]
[docs] def __init__(self, vertices): """ Create a box object :type vertices: list :param vertices: A six item list that defines the min and max value of X, Y and Z. The six values in order are: Xmin, Xmax, Ymin, Ymax, Zmin, Zmax """ self.vertices = vertices
[docs] def getMinMax(self, axis): """ Get the min and max values for axis :type axis: int :param axis: One of the class constants X, Y or Z :rtype: (float, float) :return: The min and max value for the requested axis """ return self.vertices[axis * 2], self.vertices[axis * 2 + 1]
[docs] def getLargestSpan(self): """ Get the largest span of the box in the X, Y or Z direction :rtype: float :return: The largest span (delta between the min and max value) """ spans = [] for axis in self.AXES: amin, amax = self.getMinMax(axis) spans.append(amax - amin) return max(spans)
[docs] def getCentroid(self): """ Get the centroid of the box :rtype: `numpy.array` :return: The centoid of the box - a numpy array of 3 items (x, y, z) """ centroid = [] for axis in self.AXES: amin, amax = self.getMinMax(axis) centroid.append(old_div((amin + amax), 2.)) return numpy.array(centroid)
[docs] def getPointInBox(self): """ Get a point in a random position in the box :rtype: list :return: [x, y, z] coordinates of a random point in the box """ return [random.uniform(*self.getMinMax(a)) for a in self.AXES]
[docs] def isPointInBox(self, point): """ Is the given point inside the box :type point: iterable :param point: An iterable of length 3 floats that gives the x, y and z values of the point in question :rtype: bool :return: Whether the point falls within the box """ for axis in self.AXES: amin, amax = self.getMinMax(axis) if not amin <= point[axis] < amax: return False return True
[docs] def getTranslationToBox(self, point): """ Return a vector that translates the mirror image of a point inside the box :type point: iterable :param point: An iterable of length 3 floats that gives the x, y and z values of the point in question :rtype: list :return: A list of 3 floats that gives the x, y and z coordinates of the mirror image of point that lies within the box """ newpoint = [] for axis in self.AXES: amin, amax = self.getMinMax(axis) width = amax - amin value = point[axis] while value >= amax: value = value - width while value < amin: value = value + width newpoint.append(value) return [b - a for a, b in zip(point, newpoint)]
[docs] def isValidPoint(self, point): """ Check if point is a valid point - the default implementation just returns True. Subclasses may use this to weed out points that violate custom box conditions """ return True
[docs]class BoxWithInnerHull(Box): """ A cubic Box object that contains an inner non-cubic region that limits the valid volume for the components to be placed in. """
[docs] def __init__(self, hull, *args, **kwargs): """ Create a BoxWithInnerHull object :type hull: `scipy.spatial.Delaunay` :param hull: The convex hull defining the region of space the components are allowed to be placed in See parent class for additional documentation """ Box.__init__(self, *args, **kwargs) self.hull = hull
[docs] def isPointInBox(self, point, *args): """ Overrides the parent method to check to see if the given point is also inside the hull :type point: iterable :param point: An iterable of length 3 floats that gives the x, y and z values of the point in question :rtype: bool :return: Whether the point falls within the box """ raw_check = Box.isPointInBox(self, point, *args) return raw_check and self.isPointInHull(point)
[docs] def isPointInHull(self, point): """ Check to see if the point is inside the hull :type point: iterable :param point: An iterable of length 3 floats that gives the x, y and z values of the point in question :rtype: bool :return: Whether the point falls within the hull """ # This is how you check if a point is inside the complex hull return self.hull.find_simplex(point) >= 0.0
[docs] def getPointInBox(self, max_attempts=1000): """ Overrides the parent class to get a point that is inside the hull rather than just in the box :type max_attempts: int :param max_attempts: Maximum number of attempts to try to find a point that is both inside the box and the hull :rtype: list :return: List of three floats that give the xyz coordinates of a point in the hull :raise ScaffoldError: If a point can't be found inside the hull """ for ptry in range(max_attempts): point = Box.getPointInBox(self) if self.isPointInHull(point): return point raise ScaffoldError('Unable to find points inside the substrate ' 'container')
[docs] def isValidPoint(self, point): """ Overrides the parent method to check to make sure the point is inside the hull :type point: iterable :param point: An iterable of length 3 floats that gives the x, y and z values of the point in question :rtype: bool :return: Whether the point is inside the hull """ return self.isPointInHull(point)
[docs]class ScaffoldError(ValueError): """ Class for errors involving the scaffold input """
[docs]class Scaffold(object): """ A Scaffold is a structure that occupies space in a cluster of molecules but is not considered part of the system itself. Scaffolds could be: 1) A nanoparticle that has an amorphous system surrounding it or a protein surrounded by a box of water molecules, 2) A container such as a zeolyte or nanotube that holds an amorphous system within it, 3) a surface upon which a disordered system is laid down. """
[docs] def __init__(self, struct=None, volume=None): """ Create a Scaffold object :type struct: `schrodinger.structure.Structure` or None :param struct: The scaffold structure, or None if there is no scaffold for the cell. """ self.scaffold = struct if volume is None: self.volume = self.approximateVolume(self.scaffold) else: self.volume = volume if self.scaffold: self.centroid = transform.get_centroid(self.scaffold)[:3] # Mark scaffold atoms for atom in self.scaffold.atom: atom.property[SCAFFOLD_ATOM_PROP] = True self.has_rings = bool(self.scaffold.find_rings()) else: self.centroid = [0, 0, 0] self.has_rings = False self.box = None
def __bool__(self): """ Causes scaffold objects to evaluate to False if the scaffold structure evaluates to False :rtype: bool :return: A boolean cast of the scaffold property """ return bool(self.scaffold)
[docs] @staticmethod def approximateVolume(input_struct, vdw_scale=1.0, seed=None, sphere_radius=None, sphere_origin=None, buffer_len=2, return_ratio=False): """ Computes the approximate volume of the scaffold molecule using a Monte Carlo sampling algorithm. The alogorithm: 1) define a box or sphere that fully encloses the structure 2) randomly pick a point inside the shape 3) check if the point lies inside the VDW radius of an atom 4) iterate over steps 2 & 3 a bunch 5) The volume is shape_volume * fraction of points inside VDW radii :type vdw_scale: float :param vdw_scale: The VdW scale factor to apply to VdW radii when checking to see if a point is "inside" an atom :type seed: int or None :param seed: the seed for random, if None then random is not re-seeded :type sphere_radius: float :param sphere_radius: specifies to sample points in a sphere of this radius in Angstrom rather than the default which uses the smallest enclosing box :type sphere_origin: numpy.array :param sphere_origin: the origin in Angstrom of the sphere used if sampling points in a sphere :type buffer_len: float :param buffer_len: a shape buffer lengths in Angstrom :type return_ratio: bool :param return_ratio: whether to return the hit ratio as a percent :rtype: float or float, float :return: The approximate volume of a molecule in Angstrom^3 followed by the hit ratio if return_ratio is used :note: This is expensive, so is only done at class initialization. After that, call getExcludedVolume to obtain the cached volume. """ if not input_struct: return 0.0 struct = input_struct.copy() standard_nuclear_orientation.standard_nuclear_orientation(struct) # see MATSCI-7250 GO custom classes, where during STU tests # it was found that different results where obtained for different # numbers of processors, having to do with how the processes # inherit random, best to use a custom random if seed is not None: my_random = random.Random() my_random.seed(seed) else: my_random = random if sphere_radius is None: # Determine the box size mins = [min(struct.getXYZ()[:, x]) - buffer_len for x in range(3)] maxes = [max(struct.getXYZ()[:, x]) + buffer_len for x in range(3)] spans = [maxes[x] - mins[x] for x in range(3)] vol = spans[0] * spans[1] * spans[2] get_coords = lambda: numpy.array( [mins[x] + my_random.random() * spans[x] for x in range(3)]) else: # Determine the sphere size sphere_radius += buffer_len vol = 4 * numpy.pi * sphere_radius**3 / 3 mean = 0. std_dev = 1. if sphere_origin is None: sphere_origin = transform.get_centroid(struct)[:3] def get_coords(): # taken from functionalize_nanoparticle_driver.py vec = numpy.array( [my_random.gauss(mean, std_dev) for i in range(3)]) vec = (sphere_radius * my_random.uniform(0, 1)** (1. / 3.)) * transform.get_normalized_vector(vec) return vec + sphere_origin # Create a distance cell to find any atom within the max VDW radius of # a point maxrad = get_maximum_vdw_radius(struct) cell = mm.mmct_create_distance_cell(struct.handle, maxrad * vdw_scale) hits = 0 numtries = max(300 * struct.atom_total, 20000) for tries in range(numtries): coords = get_coords().tolist() neighbors_mm = mm.mmct_query_distance_cell(cell, *coords) neighbors = mmlist._mmlist_to_pylist(neighbors_mm) mm.mmlist_delete(neighbors_mm) for neighbor in neighbors: atom = struct.atom[neighbor] dist2 = sum([(a - b)**2 for a, b in zip(coords, atom.xyz)]) radius = atom.radius * vdw_scale if dist2 <= radius * radius: hits += 1 break mm.mmct_delete_distance_cell(cell) ratio = hits / numtries if return_ratio: return vol * ratio, 100 * ratio else: return vol * ratio
[docs] def getNewCell(self): """ Return a new structure that contains the scaffold molecule. If there is scaffold structure, the returned structure is empty :rtype: `schrodinger.structure.Structure` :return: A structure object that is either empty or contains the scaffold structure, depending on whether a scaffold structure exists or not. """ cell = structure.create_new_structure() if self.scaffold: cell.extend(self.scaffold) return cell
[docs] def getExcludedVolume(self): """ Get the amount of volume the scaffold occupies :rtype: bool :return: The volume computed by approximateVolume when this object was created. """ return self.volume
[docs] def defineBox(self, volume): """ The default implementation is to define a cube with all sides equal that is centered on the scaffold centroid. :type volume: float :param volume: The volume of the desired cube :rtype: `Box` :return: The Box object for this scaffold """ side = volume**(old_div(1, 3.0)) if self.scaffold: halfside = old_div(side, 2.0) vertices = [] for coord in self.centroid: vertices += [coord - halfside, coord + halfside] else: vertices = [0, side] * 3 self.box = Box(vertices) return self.box
[docs] def addPBCProperties(self, struct, volume): """ Add periodic boundary condition properties to the structure. This method overwrites any existing periodic boundary condition properties, including Desmond chorus and PDB space group properties. :type struct: `schrodinger.structure.Structure` :param struct: The structure to add the properties to :type volume: float :param volume: The volume of the cubic periodic boundary condition """ side = volume**(old_div(1, 3.0)) aval = bval = cval = side self.addDesmondPBC(struct, side) self.addSpaceGroupPBC(struct, aval, bval, cval)
[docs] def addDesmondPBC(self, struct, ax, ay=0.0, az=0.0, bx=0.0, by=None, bz=0.0, cx=0.0, cy=0.0, cz=None): """ Add properties to the structure that define the periodic boundary condition in the way Desmond wants it defined. :type struct: `schrodinger.structure.Structure` :param struct: The structure to add the properties to :type ax: float :param ax: The value of the ax box property :type ay: float :param ay: The value of the ay box property. Defaults to 0. :type az: float :param az: The value of the az box property. Defaults to 0. :type bx: float :param bx: The value of the bx box property. Defaults to 0. :type by: float :param by: The value of the by box property. If not given, this value is set the same as ax. :type bz: float :param bz: The value of the bz box property. Defaults to 0. :type cx: float :param cx: The value of the cx box property. Defaults to 0. :type cy: float :param cy: The value of the cy box property. Defaults to 0. :type cz: float :param cz: The value of the cz box property. If not given, this value is set the same as ax. """ desmondutils.store_chorus_box_props(struct, ax, ay=ay, az=az, bx=bx, by=by, bz=bz, cx=cx, cy=cy, cz=cz)
[docs] def addSpaceGroupPBC(self, struct, aval, bval, cval, alpha=90., beta=90., gamma=90., space_group='P 1'): """ Add properties to the structure that define the periodic boundary condition in the way Crystal wants it defined. :type struct: `schrodinger.structure.Structure` :param struct: The structure to add the properties to :type side: float :param side: The length of one side of the cubic periodic boundary condition """ xcl = xtal.Crystal struct.property[xcl.SPACE_GROUP_KEY] = space_group struct.property[xcl.A_KEY] = aval struct.property[xcl.B_KEY] = bval struct.property[xcl.C_KEY] = cval struct.property[xcl.ALPHA_KEY] = alpha struct.property[xcl.BETA_KEY] = beta struct.property[xcl.GAMMA_KEY] = gamma
[docs] def getPBCMinMaxes(self): """ Get the length of one side of the periodic boundary condition :type cell: `schrodinger.structure.Structure` :param cell: The current cell with structures that have already been placed :rtype: list :return: A six item list that defines the min and max value of X, Y and Z. The six values in order are: Xmin, Ymin, Zmin, Xmax, Ymax, Zmax. If not PBC box has been created, a list of all 0.0 is returned """ if self.box: mins = [] maxes = [] for axis in self.box.AXES: amin, amax = self.box.getMinMax(axis) mins.append(amin) maxes.append(amax) return mins + maxes else: return [0.0] * 6
[docs] def getPointInBox(self): """ Get a point in a random position in the box :rtype: list :return: [x, y, z] coordinates of a random point in the box """ return self.box.getPointInBox()
[docs]class BuilderWithClashDetection(object): """ The base class for the amorphous builder classes """
[docs] def __init__(self, basename='cell', backend=None, logger=None, color=None, vdw_scale=1.0): """ Create a Builder object :type basename: str :param basename: The base name for structure files created by this builder :type backend: `schrodinger.job.jobcontrol._Backend` :param backend: The job control backend we are running under, or None if not running under a backend :type logger: `logging.Logger` :param logger: The logger for this builder :type color: str or None :param color: Set to module constant COLOR_BY_MOLECULE to color the structures in the cell by molecule :type vdw_scale: float :param vdw_scale: Scale factor for VdW radii during clash detection """ self.basename = basename self.backend = backend if self.backend is None: self.backend = jobcontrol.get_backend() self.logger = logger self.vdw_scale = vdw_scale self.color = color self.has_rings = True
[docs] def log(self, msg, level=logging.INFO): """ Log a message :type msg: str :param msg: The message to log :type level: `logging` constant :param level: A log level constant from the `logging` module such as INFO, WARNING, ERROR... """ textlogger.log(self.logger, msg, level=level)
[docs] def buildingBlocksHaveRings(self): """ Override in subclasses to check if any of the building blocks have rings. If none do, it will be a waste of time to look for them in the larger structure. :rtype: bool :return: If any of the building blocks have rings """ return self.has_rings
[docs] def findRings(self, struct): """ Find all the rings in the provided structure. Since ring-finding is expensive, its best to pre-calculate them once. Since the coordinates of the ring don't matter, we can use these rings over and over as long as the atom numbering doesn't change. :type struct: `schrodinger.structure.Structure` :param struct: The structure to find the rings for :rtype: list :return: List of `schrodinger.structure._Ring` objects in the given structure. """ if self.buildingBlocksHaveRings(): return list(struct.ring) else: return []
[docs] def findRingSpears(self, ring_struct, spear_struct=None, rings=None, ring_based=True, pbc=None): """ Find all cases where a bond spears a ring :type ring_struct: `schrodinger.structure.Structure` :param ring_struct: The structure containing the rings :type spear_struct: `schrodinger.structure.Structure` :param spear_struct: The structure containing the atoms that might spear the rings. If not provided, ring_struct will be used. :type rings: list :param rings: Each item of the list is a `schrodinger.structure._Ring` object. This is the list returned by the findRings() method. If not provided, they will be calculated on the fly - which takes considerable time. If findRingSpears will be run more than once on the same structure (even if the geometry changes), the rings should be precalculated via findRings and passed in via this parameter. :type ring_based: bool :param ring_based: Whether the returned dictionary should contain keys that are atom indexes of the speared ring (True), or of the bond spearing the ring (False) :type pbc: None, infrastructure.PBC, or list :param pbc: If periodic boundary conditions should be used, provide either an infrastructure.PBC object or the parameters to construct one. Allowed constructors: * a, b, c : box lengths * a, b, c, alpha, beta, gamma box : box lengths and angles * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors :rtype: dict :return: If ring_based=True, keys are an atom index of one of the atoms in the speared ring, and values are the atom index of one of the atoms in the spearing bond. If ring_based=False, the keys/values are flipped. """ if not rings: rings = self.findRings(ring_struct) if not rings: # No rings to spear return {} spears = ringspear.check_for_spears(ring_struct, spear_struct=spear_struct, rings=rings, first_only=False, pbc=pbc) # Convert the spears list to a list of clashes clashes = {} for spear in spears: spear_at = spear.spear_indexes[0] ring_at = spear.ring_indexes[0] if ring_based: clashes.setdefault(ring_at, []).append(spear_at) else: clashes.setdefault(spear_at, []).append(ring_at) return clashes
[docs] def removeIgnoredClashes(self, all_clashes, ignored_clashes): """ Get only those clashes that are not ignored. :type all_clashes: dict :param all_clashes: All found clashes :type ignored_clashes: dict :param ignored_clashes: Clashes that should be ignored :rtype: dict :return: Those clashes in all_clashes that are not in ignored_clashes. Keys are atom indexes, values are all the atom indexes that clash with that atom """ used_clashes = {} for clashee, clashers in all_clashes.items(): if clashee not in ignored_clashes: # All of this atom's clashes are not ignored used_clashes[clashee] = clashers else: # Find if any of this atom's clashes are not ignored raw_clashers = set(ignored_clashes[clashee]) for clasher in clashers: if clasher not in raw_clashers: used_clashes.setdefault(clashee, []).append(clasher) return used_clashes
[docs] def getInfraStructure(self, struct): """ Get an infrastructure Structure object and an associated bitset struct.atom_total long :type struct: `schrodinger.structure.Structure` :param struct: The python module Structure object :rtype: tuple :return: First item of the tuple is a `schrodinger.infra.structure.Structure` object. Second item is a bitset that is struct.atom_total long """ infra_struct = infrastructure.Structure_get_structure(struct) bitset = mmbitset.Bitset(size=struct.atom_total) bitset.fill() return infra_struct, bitset
[docs] def getClashes(self, struct1, cutoff, struct2=None, pbc=None, check_14=False): """ Find clashes - either intrastructure if struct2 is None, or interstructure if struct2 is not None. :type struct: `schrodinger.structure.Structure` :param struct: The structure for intrastructure clashes or the first structure for interstructure clashes :type cutoff: float :param cutoff: The cutoff for finding clashes. This value is multipied times the sum of the VDW radii of the two atoms. If the distance is less than this scaled VDW radii sum, a clash exists :type struct2: `schrodinger.structure.Structure` :param struct2: The second structure for interstructure clashes :type pbc: None, infrastructure.PBC, or list :param pbc: If periodic boundary conditions should be used, provide either an infrastructure.PBC object or the parameters to construct one. Allowed constructors: * a, b, c : box lengths * a, b, c, alpha, beta, gamma box : box lengths and angles * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors :type check_14: bool :param check_14: If False, the atom pairs separated by 3 covalent bonds are excluded for clash check. :rtype: dict :return: keys are atom indexes in struct1 (or struct2 if defined), and values are all the atom indexes in struct1 that clash with that atom """ # Create any necessary periodic boundary condition if pbc and not isinstance(pbc, infrastructure.PBC): pbc = infrastructure.PBC(*pbc) # Set up the parameters for finding clashes hbonds = None salt_bridges = None cparams = infrastructure.ContactParams() cparams.setCutoff(cutoff) cparams.setShow1_4(check_14) # Convert to infrastructure Structures and find clashes infra_struct1, bitset1 = self.getInfraStructure(struct1) if struct2: infra_struct2, bitset2 = self.getInfraStructure(struct2) contacts = infrastructure.get_contacts(infra_struct1, bitset1, infra_struct2, bitset2, cparams, hbonds, salt_bridges, pbc) else: contacts = infrastructure.get_contacts(infra_struct1, bitset1, cparams, hbonds, salt_bridges, pbc) # Convert the returned clashes to a dict of clashes all_clashes = {} for contact in contacts: atom1 = contact.getAtom1().getIndex() atom2 = contact.getAtom2().getIndex() all_clashes.setdefault(atom1, []).append(atom2) return all_clashes
[docs] def checkForIntraStructureClashes(self, struct, scale=None, pbc=None, rings=None): """ Check for any intrastructure clashes :type struct: `schrodinger.structure.Structure` :param struct: The structure for intrastructure clashes :type scale: float :param scale: The cutoff for finding clashes. This value is multipied times the sum of the VDW radii of the two atoms. If the distance is less than this scaled VDW radii sum, a clash exists :type pbc: None, infrastructure.PBC, or list :param pbc: If periodic boundary conditions should be used, provide either an infrastructure.PBC object or the parameters to construct one. Allowed constructors: * a, b, c : box lengths * a, b, c, alpha, beta, gamma box : box lengths and angles * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors :type rings: list :param rings: The precalculated rings returned by findRings() on the structure. If not supplied, they will be calculated on the fly. :rtype: dict :return: keys are atom indexes, and values are all the atom indexes that clash with that atom. A clash may come from two atoms too close together, or a ring that is speared by a bond. """ if not scale: scale = self.vdw_scale # Get the close atom clashes clashes = self.getClashes(struct, scale, pbc=pbc) # Add in the ring spears ring_clashes = self.findRingSpears(struct, rings=rings, pbc=pbc) for at1, at2 in ring_clashes.items(): clashes.setdefault(at1, []).extend(at2) return clashes
[docs] @staticmethod def countClashes(clashes): """ Count the total number of clashes :type clashes: dict :param clashes: keys are atom indexes, values are all the atom indexes that clash with that atom """ return sum([len(x) for x in clashes.values()])
[docs] def colorByMolecule(self, struct): """ Color each molecule in struct a different color :type struct: `schrodinger.structure.Structure` :param struct: The structure to color """ for mol in struct.molecule: color = get_color(mol.number - 1) for atom in mol.atom: atom.color = color
[docs]def nearest_rotatable_bonds(struct, rings, backbone_atom_id, side_atom_ids, pre_atom_prop=BRANCH_ATOM_PROP, atom_prop=HEAD_ATOM_PROP): """ Search the side groups of one atom and find the 'first neighbor' rotatable bonds. :type struct: `schrodinger.structure.Structure` :param struct: structure to search bonds :type rings: list :param rings: List of ring atom index lists :type backbone_atom_id: int :param backbone_atom_id: the atom id of one backbone atom :type side_atom_ids: list :param side_atom_ids: list of atom ids of the side groups for the backbone_atom :type pre_atom_prop: str :param pre_atom_prop: the property to be set True for the atom in the rotatable bonds that is closer to the backbone_atom_id :type atom_prop: str :param atom_prop: the property to be set True for the atom in the rotatable bonds that is deeper in side group :rtype: list of tuple :return: each tuple is two atom indexes, indicating 'first neighbor' rotatable bonds in side groups for one atoms """ bonds_to_break = [] atom_ids_to_walk = [backbone_atom_id] side_atom_id_set = set(side_atom_ids) while atom_ids_to_walk: pre_atom = struct.atom[atom_ids_to_walk.pop(0)] for atom in pre_atom.bonded_atoms: if atom.index not in side_atom_id_set: continue if analyze.is_bond_rotatable(struct.getBond(pre_atom.index, atom.index), rings=rings, allow_methyl=True): pre_atom.property[pre_atom_prop] = True atom.property[atom_prop] = True bonds_to_break.append(tuple([pre_atom.index, atom.index])) else: atom_ids_to_walk.append(atom.index) side_atom_id_set.remove(atom.index) return bonds_to_break
[docs]class BuilderGrowInCellMixin(object): """ A mixin for classes that uses in-cell polymer grow method. """
[docs] def setRingsForOneMol(self, new_init_frag, ringnum): """ Create `_Ring` objects for each fragment in one molecule. :type new_init_frag: `PolymerFragment` :param new_init_frag: the first fragment containing initiator :type ringnum: int :param ringnum: the total number of rings of all molecules in cell """ for frag in new_init_frag.fragments(): if frag.rings_atom_ids: for ring_ids in frag.rings_atom_ids: one_ring_struct = structure._Ring(self.cell, ringnum, ring_ids, self.all_rings) self.all_rings.append(one_ring_struct) frag.rings.append(one_ring_struct)
[docs] def placeOneIniInCell(self, frag, new_chain=True): """ Place a new or dead chain fragment containing initiator into the cell. If new_chain=True, randomly pick a position in cell; make sure the position is away from pre-existing initiators; check contact of the newly added fragments against all pre-existing atoms. If contact exists, randomly pick anothor position. If new_chain=False, find a random position in the largest void in cell; check contact; if contact exists, loop over all the gridded positions in the largest void. :type frag: `PolymerFragment` :param frag: the first fragment containing initiator :type new_chain: bool :param new_chain: if True, the fragment is from a new chain; else, the fragment is from a dead chain. :rtype: bool :return: True, if the initiator fragment is successfully placed in cell """ all_xyz = self.cell.getXYZ() self.cur_frag = frag self.cur_bitset = mmbitset.Bitset.from_list(self.cell.atom_total, self.cur_frag.atom_ids) self.setDistanceCell() for new_xyz in self.getPosition(new_chain): copy_struct = frag.template_polymer.copy() random_rotation(copy_struct, centroid=numpy.array([0, 0, 0])) mol_xyz = copy_struct.getXYZ() + new_xyz all_xyz[frag.atom_gids_in_mol, :] = mol_xyz self.cell.setXYZ(all_xyz) if not self.hasClashes(): # succeed to place the INI frag into cell return True # Centroids of INI frags are aways, but newly added INI # frag has clashes with pre-existing atoms # Shouldn't hit this at the beginning of grow_together, # unless very short polymer with very large INI else: # reach the max trials to place INI frag into cell return False
[docs] def initiateMultiMols(self, frags, grid_mids, margins, indexes): """ Place multiple initiators of the fragments into the cell. :type frags: list of `PolymerFragment` :param frags: the first fragments containing initiators :param grid_mids: middle points on a grid layout :type grid_mids: list :param margins: the margin between two middle points :type margins: list :param indexes: the available grid indexes :type indexes: list :return: the fragment failed to be placed :rtype: list of `PolymerFragment` """ self.distance_cell = infrastructure.DistanceCell( self.cell.handle, self.max_distance, self.pbc) if any(x.rings for x in frags): self.ringspear_distance_cell = infrastructure.DistanceCell( self.cell.handle, ringspear.ROUGH_CUT_DISTANCE, self.pbc) # Rotate and move the molecules so that the initiators fit into the grids, # and then filter out the ones clashing with the previous structure. all_xyz = self.cell.getXYZ() xyzs = self.getPositions(len(frags), grid_mids, margins, indexes) failed_frags = [] for frag, xyz in zip(frags, xyzs): copy_struct = frag.template_polymer.copy() random_rotation(copy_struct, centroid=numpy.array([0, 0, 0])) mol_xyz = copy_struct.getXYZ() + xyz all_xyz[frag.atom_gids_in_mol, :] = mol_xyz self.cell.setXYZ(all_xyz) self.cur_bitset = mmbitset.Bitset.from_list(self.cell.atom_total, frag.atom_ids) self.cur_frag = frag if self.hasClashes(): failed_frags.append(frag) continue self.bitSetOn(frag) for nfrag in frag.next_frags: nfrag.continue_grow = True if frag.next_frags: self.frags_by_mol.append([frag.next_frags[:], 0]) continue self.cur_struct_in_cell += 1 return failed_frags + frags[len(xyzs)::]
[docs] def getGrids(self, diameter): """ Get the grid layout related things. :param diameter: the diameter of the largest initiator :type diameter: float :return: grid points, margins in three dims, available grid indexes :rtype: dict, list, list """ if self.scaffold.scaffold_mode == IMMERSED: # Without substrate, the scaffold box defines the space vecs = self.pbc.getBoxVectors() vrt = self.scaffold.box.vertices origin = numpy.array([vrt[0], vrt[2], vrt[4]]) atoms_on = list(self.existing_atom_bitset) if atoms_on: # The align the grid centroid with the existing atom centroid # bitset starts from index 1, and getXYZ() needs indexes from 0 gids = [i - 1 for i in atoms_on] origin += self.cell.getXYZ()[gids, :].mean(axis=0) elif self.scaffold.scaffold_mode == INTERFACE: # The edges of the hull parallel to the pbc.getBoxVectors() are # used to grid the void. The hull has vectors of the proper length but we # don't know which one corresponds to which PBC vector. So we find the # three hull vectors that are most closely parallel to the PBC vectors. pbc_vecs = [ x / numpy.linalg.norm(x) for x in self.pbc.getBoxVectors() ] points = self.scaffold.box.hull.points hull_vecs = numpy.array( [points[i] - points[0] for i in range(1, len(points))]) # Find the angles between each PBC vector and all hull vectors angles = [[ abs(transform.get_angle_between_vectors(y, x)) for y in hull_vecs ] for x in pbc_vecs] # Find the hull vector that is most nearly parallel to each PBC vector mangles = [min(x) for x in angles] # Indexes of the hull vectors that are parallel to the PBC vectors mangle_idxs = [x.index(y) for x, y in zip(angles, mangles)] # Each vector has the length of the corresponding Hull vector and # the direction of the PBC vector vecs = [ x * numpy.linalg.norm(hull_vecs[y]) for x, y in zip(pbc_vecs, mangle_idxs) ] origin = points[0] lens = [numpy.linalg.norm(vec) for vec in vecs] grid_num = [math.floor(x / diameter) for x in lens] num = [i if i <= 2 else i - 1 for i in grid_num] indexes = tuple( itertools.product(*[[y + 0.5 for y in range(x)] for x in num])) pts = { x: origin + numpy.dot(numpy.array(x) / num, vecs) for x in indexes } margins = [(x - y * diameter) / y for x, y in zip(lens, num)] return pts, margins, indexes
[docs] def getPositions(self, num, grid_mids, margins, indexes): """ Get positions based on grid with randomness :param num: the number of position requested :type num: int :param grid_mids: the grid middle points :type grid_mids: dict :param margins: the margins for grid points in each dimension. Intentionally design this room so that initiators can translationally move away from the grid mid points. :type margins: list of float :param indexes: the available grid indexes :type indexes: list :return: list of random points :rtype: list """ try: sel_indexes = random.sample(indexes, num) except ValueError: # The requested sample number exceeds the pool sel_indexes = indexes xyzs = [] for index in sel_indexes: xyz = grid_mids[index] xyz += numpy.array([x * random.random() for x in margins]) xyzs.append(xyz) return xyzs
[docs] def growFragmentCell(self, density, avdw_scale): """ Make a single attempt at building the cell by growing polymer fragments. :type density: float :param density: The density of the cell :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs :rtype: `schrodinger.structure.Structure` or None :return: The built cell, or None if an error occurred """ self.initTangledMethod(density) self.setCellsForTangledChain() self.copyAllStructsIntoCell() self.preparePbcAndParams(avdw_scale) self.setSubstrateSpearRings() self.setVolGraphAndBitset() components = self.getAllComponents() multi_comp = len(components) > 1 for ini_frags in components: if multi_comp and self.scaffold.scaffold_mode != CONTAINER: try: self.setIniFrags(ini_frags) except InitiationError as err: self.log(str(err)) return None else: try: self.setIniFragsOnebyOne(ini_frags) except InitiationError as err: self.log(str(err)) return None if self.grow_together and ini_frags != components[-1]: # Grow all molecules simultaneously: # continue to place INI frags into the cell until None left continue # Adjust structure of rest fragments in a molecule while self.frags_by_mol: # FIX ME: multi threads should help here due to # 1. Before chains meet each other, grow each chain in one thread, # and then check clashes only between newly added atoms on sync # 2. When only a few chains cannot are left, grow the same single # chain with different random numbers in each thread. If succeed # by one thread, abandon other attempts in different threads for frags_and_trial_num in self.frags_by_mol: (frags_in_one_mol, tried_per_mol) = frags_and_trial_num self.cur_frag = frags_in_one_mol.pop(0) self.cur_bitset = mmbitset.Bitset.from_list( self.cell.atom_total, self.cur_frag.atom_ids) self.cur_frag.hitted_num += 1 dihe_values = self.cur_frag.getDiheValues() if len(dihe_values): self.setDistanceCell() for dihe_value in dihe_values: self.cell.adjust(dihe_value, *self.cur_frag.dihedral) if self.hasClashes(): # Continue to try another dihedral value continue self.cur_frag.dihe_value = dihe_value break else: # Back move, as all dihedrals in frag lead to clashes pre_frag = self.cur_frag.pre_frag if pre_frag.is_branching: # Track other branches and delete from pre-existing self.deleteOtherBranches(pre_frag, frags_in_one_mol) if not pre_frag.dihe_value: # Dead chain: pre_frag is INI frag due to back move tried_per_mol += 1 if self.relocateFailedMol(pre_frag, frags_in_one_mol, tried_per_mol): frags_and_trial_num[1] = tried_per_mol # Continue to try fragment in next molecule continue else: self.log( f'Only {self.cur_struct_in_cell} molecules ' f'succeeded. ({self.num_structs} requested)' ) return None # The previous dihedral of pre_frag won't be considered again # why need if statement: # 1. one dihe deleted from pre_frag pool due to first time back move # 2. starting from the parent of pre_frag, pre_frag.dihe_value is set randomly # 3. the second time performing back move on pre_frag, the randomly set dihe # may not be in the pool if pre_frag.dihe_value in pre_frag.dihe_pool: pre_frag.dihe_pool.remove(pre_frag.dihe_value) pre_frag.continue_grow = False self.bitSetOff(self.existing_atom_bitset, pre_frag) frags_in_one_mol.insert(0, pre_frag) continue self.bitSetOn(self.cur_frag) for frag_of_same_generation in self.cur_frag.next_frags: frag_of_same_generation.continue_grow = True if frag_of_same_generation.in_sidegroup: # insert as the first frag to be grown next time frags_in_one_mol.insert(0, frag_of_same_generation) else: frags_in_one_mol.append(frag_of_same_generation) self.removeFinishedPolymer() self.log('Successfully grew %d structures' % self.num_structs) return self.cell
[docs] def initTangledMethod(self, density): """ Initialize some attributes for the tangled method. :param density: the density of the cell :type density: float """ self.total_tried = 0 self.cur_struct_in_cell = 0 self.existing_rings = [] self.existing_spear_rings = [] self.pre_atom_ids = set() self.frags_by_mol = [] num_per_chain = [x.num_struct for x in self.all_type_polymer_and_frags] self.num_structs = sum(num_per_chain) polymers = [x.template_polymer for x in self.all_type_polymer_and_frags] self.volume = self.getDesiredVolume(polymers, density, num_per_chain=num_per_chain) # Estimate the distance between initiators # Note: container may have inaccurate volume and self.r_per_ini_frag, # but self.r_per_ini_frag decreases automatically to solve the issue self.r_per_ini_frag = (self.volume / self.num_structs / 4 * 3 / math.pi)**(1. / 3.)
[docs] def setCellsForTangledChain(self): """ Set cells for tangle chain method. """ self.scaffold.defineBox(self.volume) self.cell = self.getNewCellStructure(self.volume) self.cell_template = self.cell.copy() self.ini_cell = self.cell_template.copy()
[docs] def copyAllStructsIntoCell(self): """ Copy all structures into the cell, but the bitsets are off. Deepcopy fragments. """ self.all_mol_fragments = collections.defaultdict(list) self.existing_rings += [x for x in self.cell.ring] self.all_rings = self.existing_rings[:] total_ringnum = sum([ one_type_polymer_and_frag.ringnum for one_type_polymer_and_frag in self.all_type_polymer_and_frags ]) molnum = 0 for atype_polym_frag in self.all_type_polymer_and_frags: for index in range(atype_polym_frag.num_struct): molnum += 1 new_init_frag = atype_polym_frag.first_frag.deepCopy(molnum) new_init_frag.adjustAtomIds(self.cell.atom_total) self.all_mol_fragments[atype_polym_frag].append(new_init_frag) struct = atype_polym_frag.template_polymer.copy() self.cell.extend(struct) self.setRingsForOneMol(new_init_frag, total_ringnum)
[docs] def preparePbcAndParams(self, avdw_scale): """ Prepare pbc, contact parameters, max distance for distance cell. :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs """ self.pbc = clusterstruct.create_pbc(self.cell) log_debug(f"PBC: {','.join(map(str, self.pbc.getBoxLengths()))}") self.cparams = infrastructure.ContactParams() self.cparams.setShow1_4(False) self.cparams.setCutoff(avdw_scale) max_radius = max(x.radius for x in self.cell.atom) self.max_distance = avdw_scale * max_radius * 2 log_debug(f"Minimum Clash Distance: {self.max_distance}")
[docs] def setSubstrateSpearRings(self): """ Set Spear Rings for the substrate. """ self.existing_spear_rings += [ ringspear.SpearRing(self.cell, ring.getAtomIndices(), pbc=self.pbc) for ring in self.existing_rings ] self.is_cg = msutils.is_coarse_grain(self.cell, by_atom=True)
[docs] def setVolGraphAndBitset(self): """ Set volume graph and bitset. """ self.vol_graph = XYZVolumeGraph(self.cell, spacing=2.0, scaffold=self.scaffold) self.existing_atom_bitset = mmbitset.Bitset(size=self.cell.atom_total) if not self.scaffold.scaffold: return for atom_id in range(1, self.scaffold.scaffold.atom_total + 1): self.existing_atom_bitset.set(atom_id)
[docs] def setIniFragsOnebyOne(self, ini_frags): """ Set the initiator fragments one by one. :param ini_frags: the initiators of some components to be placed into the cell and grow later. :type ini_frags: list of 'PolymerFragments' """ if not ini_frags: return self.log(f"Placing {len(ini_frags)} initiators...") for ini_frag in ini_frags: if not self.placeOneIniInCell(ini_frag): raise InitiationError( f'Only {self.cur_struct_in_cell} molecules ' 'are successfully placed in cell, but ' f'{self.num_structs} are requested.') # Turn on the initiator bitset, set fragment state, and register to # the fragment pool to grow self.bitSetOn(ini_frag) for frag in ini_frag.next_frags: frag.continue_grow = True if ini_frag.next_frags: self.frags_by_mol.append([ini_frag.next_frags[:], 0]) continue # Rigid body components finish growth after initiation self.cur_struct_in_cell += 1 self.log('failed molecule attempt: %i finished molecules: %i' % (self.total_tried, self.cur_struct_in_cell))
[docs] def setIniFrags(self, ini_frags): """ Set the initiator fragments in batch mode. :param ini_frags: the initiators of some components to be placed into the cell and grow later. :type ini_frags: list of 'PolymerFragments' """ if not ini_frags: return self.log(f"Placing {len(ini_frags)} initiators...") diameter = self.getIniDiameter(ini_frags) grid_mids, margins, indexes = self.getGrids(diameter) tries_per_mol = 0 while ini_frags: failed_frags = self.initiateMultiMols(ini_frags, grid_mids, margins, indexes) success_num = len(ini_frags) - len(failed_frags) if not success_num: tries_per_mol += 1 if tries_per_mol > self.tries_per_mol: raise InitiationError( f'Only {self.cur_struct_in_cell} molecules ' 'are successfully placed in cell, but ' f'{self.num_structs} are requested.') # Initiators placed into the cells, but some are not finished if # they need to grow but haven't self.log('failed molecule attempt: %i finished molecules: %i' % (tries_per_mol, self.cur_struct_in_cell)) ini_frags = failed_frags
[docs] def getIniDiameter(self, frags): """ Get the largest diameters of the initiators by computing the longest span between any pairs of atoms. :param frags: list for fragments to be placed in :type frags: list of 'PolymerFragments' :return: the largest diameters of the initiators :rtype: float """ dists = [] for frag in frags: atoms = [self.cell.atom[x] for x in frag.atom_ids] atom_xyz = [x.xyz for x in atoms] dist = pdist(atom_xyz) sdist = squareform(dist) atom_id_pair = numpy.unravel_index(numpy.argmax(sdist), sdist.shape) radius_sum = sum([atoms[i].radius for i in atom_id_pair]) max_dist = numpy.nanmax(dist) if len(dist) else 0.0 mdist = self.cparams.getCutoff() * radius_sum + max_dist dists.append(mdist) return max(dists)
[docs] def getAllComponents(self): """ Get all components to be placed in the cell. Volume, fragment number, and replica number for each component are used to divide all component into several groups. :return: Large, soft, and fewer fragments first :rtype: list of 'PolymerFragment' lists """ fragments = self.all_mol_fragments.values() if self.grow_together: return [list(itertools.chain.from_iterable(fragments)), []] counts = [len(x) for x in fragments] by_count = self.indexChanges(counts, reverse=True) mols = [x[0].template_polymer for x in fragments] radius_cubic = [sum([x.radius**3 for x in x.atom]) for x in mols] by_size = self.indexChanges(radius_cubic) frag_nums = [len(list(x[0].fragments())) for x in fragments] by_flex = self.indexChanges(frag_nums, threshold=20) by_all = [min(list(x)) for x in zip(by_count, by_size, by_flex)] components = [[] for x in range(max(by_all) + 1)] for index, fragments in zip(by_all, fragments): components[index].extend(fragments) return components
[docs] @staticmethod def indexChanges(values, threshold=10., reverse=False): """ Use the threshold as the ratio criterion to index the sudden change in the sorted sequence. :param values: input values to divide :type values: list of floats :param threshold: the ratio criterion to identify the sudden change :type threshold: float :param reverse: If True, smaller values are grouped with smaller indexes. Otherwise larger values are grouped with smaller indexes. :type reverse: bool :return: indexes are the group tokens :rtype: list of int """ sorted_vals = sorted(values, reverse=True) ratios = [1.] ratios += [x / y for x, y in zip(sorted_vals[:-1], sorted_vals[1:])] index, ratio_indexes = 0, [] for ratio in ratios: if ratio >= threshold: index += 1 ratio_indexes.append(index) indexes = [ratio_indexes[sorted_vals.index(x)] for x in values] if not reverse: return indexes max_index = max(indexes) return [max_index - x for x in indexes]
[docs] def setDistanceCell(self): """ Set DistanceCell for the clash check of current fragment. """ # Pre-existing atoms in distance_cell don't change position during # clash check for one certain fragment. self.distance_cell = infrastructure.DistanceCell( self.cell.handle, self.max_distance, self.pbc) if self.cur_frag.rings: # Ring spear uses a different cut distance self.ringspear_distance_cell = infrastructure.DistanceCell( self.cell.handle, ringspear.ROUGH_CUT_DISTANCE, self.pbc)
[docs] def writeDihedralDis(self, file_name='tmp_dis.txt'): """ Write the dihedral angle distribution of the structures in cell. """ # TEST ONLY interval = old_div(360., (DIHEDRAL_NUM - 1)) my_array = numpy.zeros((DIHEDRAL_NUM, 2)) for idx in range(DIHEDRAL_NUM): my_array[idx, 0] = idx * interval for ini_frags in self.all_mol_fragments.values(): for ini_frag in ini_frags: for frag in ini_frag.fragments(include_self=False): dihe = self.cell.measure(*frag.dihedral) % 360 my_array[int(round(dihe, interval)), 1] += 1 numpy.savetxt(file_name, my_array)
[docs] def relocateFailedMol(self, pre_frag, frags_in_one_mol, tried_per_mol, undo_crystal_links=False): """ Place one failed molecule back into the cell. :type pre_frag: {PolymerFragment} :param pre_frag: the ini fragment of one polymer :type frags_in_one_mol: list of {PolymerFragment} :param frags_in_one_mol: polymer fragment of this polymer within one polymer :type tried_per_mol: int :param tried_per_mol: failed trial number :type undo_crystal_links: bool :param undo_crystal_links: whether to unlink the chain from the crystals :rtype: bool :return: True, if successfully placed the ini fragment back """ if tried_per_mol == self.tries_per_mol: # Reach the max trials for replacing one INT. Abort this cell self.log( 'The growth of one molecule has failed %s times, reaching the ' 'the maximum attempts per molecule set by -tries_per_mol.' % self.tries_per_mol) return False self.total_tried += 1 self.log('failed molecule attempt: %i finished molecules: %i' % (self.total_tried, self.cur_struct_in_cell)) # temporarily turn off INT frag and prepare voids search self.bitSetOff(self.existing_atom_bitset, pre_frag) try: self.vol_graph.locateVoids(set(self.existing_atom_bitset)) except VolumeMemoryError as msg: self.log(str(msg)) sys.exit() if undo_crystal_links: self.undoCrystalLinks(pre_frag) self.resetOrigDihed(pre_frag) # Try to add the dead chain back to cell by rotation and relocate if self.placeOneIniInCell(pre_frag, new_chain=False): pre_frag.resetFragDihedralPool() for frag_of_same_generation in pre_frag.next_frags: frag_of_same_generation.continue_grow = True frags_in_one_mol.append(frag_of_same_generation) # Successfully relocate one dead molecule and continue self.bitSetOn(pre_frag) return True else: self.log('Failed to relocate dead chains.') return False
[docs] def hasClashes(self): """ Check whether current fragment has clashes and speared rings with pre-existing structure. :rtype: bool :return: True, if has clashes """ if infrastructure.get_contacts(self.cell, self.cur_bitset, self.distance_cell, self.existing_atom_bitset, self.cparams): # Atoms in the fragment clash with pre-exsiting atoms in cell return True if self.cur_frag.rings: if ringspear.check_for_spears(self.cell, atoms=self.pre_atom_ids, rings=self.cur_frag.rings, pbc=self.pbc, cell=self.ringspear_distance_cell, return_bool=True, distorted=self.is_cg): # Pre-exsiting atoms spear rings in the fragment return True if self.existing_rings: # FIXME: shouldn't rebuild distance cell from scratch # Temporary fix: extract substructure # Add the bonds between fragments for ring spear check if self.cur_frag.dihedral: spear_struct = self.cell.extract(self.cur_frag.atom_ids + [self.cur_frag.dihedral[2]]) else: spear_struct = structure.Structure( mm.mmct_ct_extract_atoms(self.cell.handle, self.cur_bitset), True) if ringspear.check_for_spears(self.cell, spear_struct=spear_struct, rings=self.existing_rings, pbc=self.pbc, spear_rings=self.existing_spear_rings, return_bool=True, distorted=self.is_cg): # Atoms in the fragment spear rings in pre-exsiting atoms return True return False
[docs] def removeFinishedPolymer(self): """ Remove frags_by_mols that have no polymer fragments and update finished polymer number. """ for frags_and_trial_num in reversed(self.frags_by_mol): if not frags_and_trial_num[0]: self.cur_struct_in_cell += 1 self.log( 'failed molecule attempt: %i finished molecules: %i' % (self.total_tried, self.cur_struct_in_cell)) self.frags_by_mol.remove(frags_and_trial_num)
[docs] def bitSetOn(self, frag): """ Set on the pre-existing atom bitset according to atom ids in the fragment; record rings and spear_rings according to rings in the fragment. :type frag: `PolymerFragment` :param frag: the polymer fragment providing the atom ids """ for atom_id in frag.atom_ids: self.existing_atom_bitset.set(atom_id) if self.has_rings: self.pre_atom_ids.add(atom_id) self.existing_rings += frag.rings[:] frag.spear_rings = [ ringspear.SpearRing(self.cell, ring.getAtomIndices(), pbc=self.pbc) for ring in frag.rings ] self.existing_spear_rings += frag.spear_rings[:]
[docs] def bitSetOff(self, bitset, frag): """ Set the bitset off according to the atom ids in the fragment; remove rings and spear_rings according to rings in the fragment. :type bitset: `Bitset` :param bitset: the bitset to set :type frag: `PolymerFragment` :param frag: the polymer fragment providing the atom ids """ for atom_id in frag.atom_ids: bitset.unset(atom_id) if self.has_rings: self.pre_atom_ids.remove(atom_id) for ring in frag.rings: self.existing_rings.remove(ring) for spear_ring in frag.spear_rings: self.existing_spear_rings.remove(spear_ring)
[docs] def getPosition(self, new_chain): """ Find random positions in the cell according to the following rules. If new_chain=True, randomly pick a position in cell that is away from pre-existing initiators. If new_chain=False, randomly loop over all the gridded positions in the largest void in cell. :type new_chain: bool :param new_chain: if True, the fragment is from a new chain; else, the fragment is from a dead chain. :rtype: list :return: [x, y, z], a random position in cell """ if new_chain: return self.getRandXYZ() else: return self.getRandGridXYZ()
[docs] def getRandGridXYZ(self): """ Get xyz iterator of random grid points in the largest void. :rtype: list :return: [x, y, z], a random position in cell """ for xyz in self.vol_graph.getBuriedXYZ(self.vol_graph.getLargestVoid()): # Volume graph and the scaffold box define different origin positions # Shift xyz from volume graph into the scaffold box shift_vector = self.scaffold.box.getTranslationToBox(xyz) yield [b + a for a, b in zip(xyz, shift_vector)]
[docs] def getRandXYZ(self, max_num=2000, max_failed_num=2000): """ Return a random point in the space and no other initiators and scaffold within a pre-defined raduis. :type max_num: int :param max_num: max number of xyz position returned :type max_failed_num: int :param max_failed_num: max number of attemps when finding points self.r_per_ini_frag away from scaffold and other initiator atoms. :rtype: iterator of list :return: iterator of [x, y, z], a random position in cell """ self.ini_cell.addAtom('Br', 0, 0, 0) added_atom_id = self.ini_cell.atom_total point_num = 0 while point_num < max_num: near_point = True failed_num = 0 while near_point: random_point = self.scaffold.getPointInBox() self.ini_cell.atom[added_atom_id].xyz = random_point near_point = False for atom_id in range(1, self.ini_cell.atom_total): distance = self.ini_cell.measure(added_atom_id, atom_id) if distance < self.r_per_ini_frag: near_point = True failed_num += 1 if failed_num > max_failed_num: failed_num = 0 self.r_per_ini_frag *= 0.9 break point_num += 1 yield random_point
[docs] def deleteOtherBranches(self, pre_frag, frags_in_one_mol): """ Remove the fragments from the next_frags pool and set off the bitset of moved fragments. :type pre_frag: `PolymerFragment` :param pre_frag: the branching parent polymer fragment :type frags_in_one_mol: list of `PolymerFragment` :param frags_in_one_mol: the pool of PolymerFragment to be grown """ frags_to_remove = pre_frag.next_frags[:] frags_to_remove.remove(self.cur_frag) while frags_to_remove: frag_to_remove = frags_to_remove.pop(0) if self.existing_atom_bitset.get(frag_to_remove.atom_ids[0]): self.bitSetOff(self.existing_atom_bitset, frag_to_remove) frags_to_remove += frag_to_remove.next_frags[:] else: frags_in_one_mol.remove(frag_to_remove)
[docs] def setPolymerFragments(self, vdw_scale): """ Set polymer fragments for tangled chain method. :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs """ # PolymerFragments order in this list is the order, according to which # the initiators of the structures are placed into the cell. # Rigid bodies are treated as huge initiators, and are first placed into # the cell as they can be large and won't grow bit by bit. self.all_type_polymer_and_frags = [] self.all_type_polymer_and_frags += self.getPolymerFragments( vdw_scale, get_rigid=True) self.all_type_polymer_and_frags += self.getPolymerFragments( vdw_scale, get_rigid=False)
[docs] def getPolymerFragments(self, vdw_scale, get_rigid=False, restore_template=None): """ Break polymers (if needed), prepare them, and return the corresponding PolymerFragments objects. :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs :type get_rigid: bool :param get_rigid: If True, pick those marked as rigid bodies. If False, skip those marked as rigid bodies. :type restore_template: bool or None :param restore_template: restore the template polymer to the original one. If None, debug mode determines :rtype: list of 'PolymerFragments' :return: Each item contains fragment information for one molecule. """ polymer_and_frags = [] for index, (struct, num_struct) in enumerate(zip(self.structs, self.composition)): is_rigid = bool(struct.property.get(RIGID_BODY_PROP)) if get_rigid != is_rigid: # Request rigid body, but the struct is not marked as rigid, or # Doesn't Request rigid body, but the struct is marked as rigid continue # multiple molecules may present in one ws or pt entry for mol_idx, molecule in enumerate(struct.molecule): mol_struct = molecule.extractStructure(copy_props=True) indexing = '_'.join(map(str, [index, mol_idx])) polymFrags = PolymerFragments(mol_struct, num_struct, forcefield=self.forcefield, vdw_scale=vdw_scale, temperature=self.dihe_temperature, logger=self.logger, indexing=indexing) restore_template = not schrodinger.in_dev_env( ) if restore_template is None else restore_template polymFrags.breakIntoFragments(is_rigid=is_rigid, restore_template=restore_template) prepare_building_block(polymFrags.template_polymer, index, recolor=self.recolor, preserve_res=self.preserve_res) polymer_and_frags.append(polymFrags) return polymer_and_frags
[docs]class AmorphousCellBuilder(BuilderWithClashDetection, BuilderGrowInCellMixin): """ Builder for an amorphous cell """
[docs] def __init__(self, scaffold=None, system=True, population=1, composition=None, density=0.5, amorphous_vdw_scale=1.0, obey=OBEY_DENSITY, tries_per_dc=5, tries_per_mol=5, title="", allow_increased_density=True, forcefield=OPLS2005, grow=False, dihe_temperature=None, grow_together=False, recolor=False, preserve_res=True, split_components=False, wam_type=None, **kwargs): """ Create an AmorphousCellBuilder object :type Scaffold: `Scaffold` :param Scaffold: The Scaffold object for the cell to use that defines the molecule the cell should be built around (or inside or on top of) :type system: bool :param system: Whether a Desmond system should be built with the final amorphous cell :type population: int :param population: The number of molecules to include in the amorphous cell :type composition: list :param composition: A list of the same length and order as structs which gives the number of structures of each component :type density: float :param density: The initial target density for building the cell. Does not include the scaffold volume or mass :type amorphous_vdw_scale: float :param amorphous_vdw_scale: The initial VdW scale factor to use when looking for clashes in the amorphous cell :type obey: str :param obey: A module constant indicating which initial target to keep constant when trying to build an amorphous cell - either OBEY_DENSITY to keep the density constant, OBEY_CLASH to keep the VdW scale factor constant, or OBEY_BOTH to keep both constant. The latter will prevent the builder from relaxing one of these constraints to find a cell without clashes. :type tries_per_dc: int :param tries_per_dc: The number of tries to make a cell at a given density and VdW clash scale factor before relaxing the parameter that is not specified by obey :type tries_per_mol: int :param tries_per_mol: The number of times to try placing a molecule in the cell without clases before scrapping the cell and starting over :type title: str :param title: The title to give the final cell structure :type allow_increased_density: bool :param allow_increased_density: If True and obey=OBEY_CLASH, allow attempts to build cells at increasing density as long as a cell was built successfully :type forcefield: str :param forcefield: The name of the force field to use. The default is 'OPLS_2005'. :type grow: bool :param grow: whether grow polymers via self-avoiding random walk :type dihe_temperature: float, or None :param dihe_temperature: the temperature to calculate dihedral angle distribution :type grow_together: bool :param grow_together: Grow all the molecules together if True :type recolor: bool :param recolor: Whether to recolor the components so that each molecule of the same component is the same color :type preserve_res: bool :param preserve_res: Whether to preserve the current residue information :type split_components: bool :param split_components: Whether to split system in components in the system build :type wam_type: int or None :param wam_type: One of the enums defined in workflow_action_menu.h if the results should have a Workflow Action Menu in Maestro See the parent class for additional keyword documentation """ BuilderWithClashDetection.__init__(self, **kwargs) if scaffold is None: self.scaffold = Scaffold() else: self.scaffold = scaffold self.system = system self.population = population self.composition = composition self.density = density self.avdw_scale = amorphous_vdw_scale self.obey = obey self.tries_per_dc = tries_per_dc self.tries_per_mol = tries_per_mol self.title = title self.allow_increased_density = allow_increased_density self.forcefield = forcefield self.grow = grow self.dihe_temperature = dihe_temperature self.grow_together = grow_together self.recolor = recolor self.preserve_res = preserve_res self.split_components = split_components self.wam_type = wam_type self.debug = schrodinger.in_dev_env()
[docs] def setDensityIncreaseAllowed(self, is_allowed): """ Set whether increased cell densities can be attempted after a successful cell is built using OBEY_CLASH :type is_allowed: bool :param is_allowed: Whether increased density attempts can be tried """ self.allow_increased_density = is_allowed
[docs] def build(self, generate_only=False, center=True): """ Build the cell and do all the post-processing :type generate_only: bool :param generate_only: Only generate the cell, do not write it to a file, write final density, or attempt to create a Desmond system with it :type center: bool :param center: Center the structure in the PBC unit cell, assuming the cell has its origin at 0, 0, 0 :rtype: `schrodinger.structure.Structure` or None :return: The created structure or None if no structure was found """ self.log('Creating amorphous cell') # Actually build the cell cell = self.buildCellDriver() if not cell: return if not generate_only: self.log('Final cell density = %.3f' % get_density(cell)) # Allow custom post-treatment of the raw structure before the system is # built cell = self.postTreatBuiltCell(cell) # Color if necessary if self.color == COLOR_BY_MOLECULE: self.log('Coloring the molecules...') self.colorByMolecule(cell) if center: self.log('Translating the cell in space...') # Center the structures in the unit cell (unit cell has one corner # at the origin and all other vertices in the +xyz directions) centroid = transform.get_centroid(cell) half = old_div(self.getPBCSide(cell), 2.0) center = [half, half, half] move = [x - y for x, y in zip(center, centroid)] transform.translate_structure(cell, *move) if msutils.is_coarse_grain(cell, by_atom=True): coarsegrain.set_as_coarse_grain(cell) if generate_only: return cell # Create a Desmond system or just write out this structure if self.system: self.log('Creating the Desmond system...') cms_name = desmondutils.run_system_builder( cell, self.basename, rezero_system=True, forcefield=self.forcefield, split_components=self.split_components, wam_type=self.wam_type) if cms_name: if self.backend: self.backend.addOutputFile(cms_name) self.backend.setStructureOutputFile(cms_name) else: self.log('Unable to create the Desmond system') else: amcell_file = self.basename + AMCELL_NO_SYSTEM_OUT self.log('Writing final cell to %s' % amcell_file) cell.write(amcell_file) if self.backend: self.backend.addOutputFile(amcell_file) self.backend.setStructureOutputFile(amcell_file) return cell
[docs] def postTreatBuiltCell(self, cell): """ The default implementation of this does nothing, but subclasses can use this method to modify the raw cell immediately after the last structure is added to it but before any other processing (such as centering, coloring, writing out or building a Desmond system) :type cell: `schrodinger.structure.Structure` :param cell: The built cell :rtype: `schrodinger.structure.Structure` :return: The modified cell """ return cell
[docs] def buildCell(self, density, avdw_scale): """ Make a single attempt at building the cell. :type density: float :param density: The density of the cell :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs :rtype: `schrodinger.structure.Structure` or None :return: The built cell, or None if an error occurred """ try: if self.grow: cell = self.growFragmentCell(density, avdw_scale) else: cell = self.rotateRigidBodyCell(density, avdw_scale) except ScaffoldError as msg: self.log(str(msg)) return None return cell
[docs] def buildCellDriver(self): """ Driver function for cell building. Tries multiple cells until one succeeds. Adjust initial parameters to try to create more dense or tighter class cells as needed. :rtype: `schrodinger.structure.Structure` or None :return: The built cell, or None if an error occurred """ num_structs = self.population density = self.density avdw_scale = self.avdw_scale to_density = self.obey == OBEY_DENSITY to_clash = self.obey == OBEY_CLASH to_both = not to_density and not to_clash if to_density: # Adjust the clash vdw scale to build to the specific density value = avdw_scale parameter_adjuster = self.adjustACellClashVDWScale parameter_string = 'clash vdw scale' minimum_value = 0.03 elif to_clash: # Adjust the density to build to the specific clash vdw scale value = density parameter_adjuster = self.adjustACellDensity parameter_string = 'density' minimum_value = 0.001 else: # Only attempt the given density and clash value value = density parameter_adjuster = parameter_string = minimum_value = None original_value = value if self.structs is None: # PolymerAmorphousCellBuilder doesn't have self.structs as input self.structs, self.composition = self.getBuildingBlocks(num_structs) if not self.structs: # Failed to build single polymers return None if self.grow: # Prepare building blocks for tangled chain method vdw_min = minimum_value if to_density else None vdw_scale = self.prepareTangledChainBlocks(avdw_scale, vdw_min) if vdw_scale is None: return None if to_density: value = vdw_scale else: avdw_scale = vdw_scale def attempt_cell(): """ Attempt to create the cell with the current parameters :rtype: `schrodinger.structure.Structure` or None :return: The built cell, or None if an error occurred """ cell = None msg = ('Attempt %d at a cell with density = %.3f and clash ' 'scale = %.3f') for atry in range(self.tries_per_dc): if to_density: self.log(msg % (atry + 1, density, value)) cell = self.buildCell(density, value) else: self.log(msg % (atry + 1, value, avdw_scale)) cell = self.buildCell(value, avdw_scale) if cell: self.log('Amorphous cell populated') self.log('') return cell return cell # Try at ever decreasing parameter value until we succeed in packing all # molecules self.log('Creating the amorphous cell...') cell = None while not cell: cell = attempt_cell() if not cell: # No successful cell was found - try making the value smaller # (either less dense cell, or smaller clash vdw scale) if to_both: self.log('Unable to find cell with the given density and ' 'VDW clash values') return value = parameter_adjuster(value, down=True) if value < minimum_value: self.log('Unable to find cell with %s > %.3f' % (parameter_string, minimum_value)) return successful_value = value best_cell = cell # If we succeeded at the initial values and density is the adjustable # parameter, try to increase the density value as much as possible up to # a point. if (to_clash and successful_value == original_value and self.allow_increased_density): while cell: value = parameter_adjuster(value, down=False) self.log('Increasing %s to %.2f' % (parameter_string, value)) cell = attempt_cell() if cell: best_cell = cell successful_value = value if value > 2: break cell = best_cell if to_density: # Monte Carlo and steric pack steps may change density self.log('Final %s = %.3f' % (parameter_string, successful_value)) return cell
[docs] def prepareTangledChainBlocks(self, vdw_scale, vdw_min): """ Break structures into polymer fragments and handle vdw scale adjustment. :type vdw_scale: float :param vdw_scale: the vdw scale to check clashes. :type vdw_min: float or None :param vdw_min: If float, this is the minimum allowable vdw scale after adjustment. If None, don't allow vdw adjustment. :rtype: float or None :return: If float, this is the vdw scale. """ while True: try: self.setPolymerFragments(vdw_scale) except RotationError as msg: if vdw_min is None: self.log(str(msg)) return None elif vdw_scale < vdw_min: self.log('Unable to find cell with clash vdw scale > %.3f' % vdw_scale) return None vdw_scale = self.adjustACellClashVDWScale(vdw_scale, down=True) self.log( 'Decreasing vdw scale to %.2f due to unvoidable clashes' % vdw_scale) else: return vdw_scale
[docs] def getBuildingBlocks(self, number): """ Get the structures to place into the amorphous cell. Must be overridden in the subclass to produce structures :type number: int :param number: The number of structures to return :rtype: list :return: list of `schrodinger.structure.Structure` objects to place into the cell """ return []
[docs] def rotateRigidBodyCell(self, density, avdw_scale): """ Make a single attempt at building the cell by random placing and rotating rigid structures. :type density: float :param density: The density of the cell :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs :rtype: `schrodinger.structure.Structure` or None :return: The built cell, or None if an error occurred """ volume = self.getDesiredVolume(self.structs, density, num_per_chain=self.composition) self.scaffold.defineBox(volume) cell = self.getNewCellStructure(volume) cell_rand = cell.copy() scaffold_mol_num = len(cell_rand.molecule) has_custom_mmffld = detect_custom_mmffld_data(self.structs) # A list of structures in the order the population is randomly sampled all_structs = [] for struct, num_struct in zip(self.structs, self.composition): all_structs += [struct.copy() for num in range(num_struct)] struct_order = [x for x, y in enumerate(all_structs)] # Randomize the structure order when placing them into the cell random.shuffle(struct_order) # Place the structures in the cell one by one for index, pick_idx in enumerate(struct_order): # Let the user know that provided (monomer) structure has # intra-structure clashes raw_clashes = self.checkForIntraStructureClashes( all_structs[pick_idx], scale=avdw_scale) if raw_clashes: msg = 'WARNING: molecule "%s" has intra-structure clashes.' self.log(msg % struct.title) success = self.placeStructureInCell(cell_rand, all_structs[pick_idx], avdw_scale) if not success: break if not success: self.log('Successfully placed only %d structures' % index) return None # To be consistent with the log info: "Chain 1: AAB; Chain 2: BCA", # Restore the structure order to the polymer chain build order for orig_idx, struct in enumerate(all_structs): mol_id = struct_order.index(orig_idx) + scaffold_mol_num + 1 new_xyz = cell_rand.molecule[mol_id].extractStructure().getXYZ() struct.setXYZ(new_xyz) if has_custom_mmffld: # This call is expensive, so we only do it if we have to # preserve mmffld custom charge blocks (MATSCI-8373) cell = structure.Structure( mm.mmffld_extendCTwithCustomCharges(cell, struct)) else: cell.extend(struct) return cell
[docs] def placeStructureInCell(self, cell, struct, avdw_scale, no_rotation=False, rotate_velocities=False): """ Place a structure in the cell. Make multiple rotations and translations to try to find a spot that the structure fits in with no clashes :type cell: `schrodinger.structure.Structure` :param cell: The current cell with structures that have already been placed :type struct: `schrodinger.structure.Structure` :param struct: The structure to attempt to place into the cell :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs :param bool no_rotation: If True, do not rotate the structure :param bool rotate_velocities: If True, besides the structure, also rotate FFIO atom velocities (if present) :rtype: bool :return: Whether the structure was placed in the cell successfully """ transform.translate_centroid_to_origin(struct) clashes = True attempts = 0 cell_rings = self.findRings(cell) struct_rings = self.findRings(struct) # Find all existing clashes in the structure before the PBC and before # adding to the cell - we'll ignore these as they are the result of # building the structure to the user's specifications and might have # been unavoidable raw_clashes = self.checkForIntraStructureClashes(struct, scale=avdw_scale) # Keep trying different rotations and translations until one works with # no clashes while clashes and attempts < self.tries_per_mol: attempts += 1 candidate = self.locateAndRotate( struct, no_rotation=no_rotation, rotate_velocities=rotate_velocities) candidate_rings = self.duplicateRings(struct_rings, candidate) try: clashes = self.checkForClashes(cell, candidate, avdw_scale, raw_clashes, cell_rings, candidate_rings) # Can be raised by self.getClashes (MATSCI-2620, MATSCI-4296) except ValueError as msg: self.log('Cannot detect clashes: ' + str(msg)) continue if not clashes: # Success! Add the structure to the cell cell.extend(candidate) return not clashes
[docs] def duplicateRings(self, original_rings, new_struct): """ Take a list of _Ring objects referenced to one structure and convert to a list of _Ring objects referenced to a new structure. This is much faster than finding rings in the new structure. It is implied that the new and old structure have identical bonding. :type original_rings: iterable of _Ring objects :param original_rings: The _Ring objects to duplicate :param new_struct: `schrodinger.structure.Structure` :param new_struct: The new structure the ring objects should reference :rtype: list :return: List of `schrodinger.structure._Ring` objects that reference atoms in new_structure """ new_rings = [] for ring in original_rings: atoms = ring.getAtomIndices()[:] new_ring = structure._Ring(new_struct, ring._ringnum, atoms, None) new_rings.append(new_ring) return new_rings
[docs] def locateAndRotate(self, struct, no_rotation=False, rotate_velocities=False): """ Randomly rotate a copy of struct and translate it to a position in the cell :type struct: `schrodinger.structure.Structure` :param struct: The structure to attempt to place into the cell :param bool no_rotation: If True, do not rotate the structure :param bool rotate_velocities: If True, besides the structure, also rotate FFIO atom velocities (if present) :rtype: `schrodinger.structure.Structure` :return: A copy of struct that has been randomly rotated and translated """ scopy = struct.copy() # Randomly rotate the structure centroid, rmatrix = random_rotation(scopy, no_rotation=no_rotation, rotate_velocities=rotate_velocities) # Only dump the rotation matrix in DEBUG textlogger.log(self.logger, 'Rotation matrix:', level=textlogger.logging.DEBUG) textlogger.log(self.logger, str(rmatrix), level=textlogger.logging.DEBUG) # Move to random location xyz = self.scaffold.getPointInBox() new_xyz = scopy.getXYZ() + xyz scopy.setXYZ(new_xyz) return scopy
[docs] def adjustACellDensity(self, density, down=True): """ Adjust the cell density up or down - the amount it is adjusted depends on its current value and the direction it is being adjusted :type density: float :param density: The current cell density :type down: bool :param down: Whether to adjust the density down (or up if False) :rtype: float :return: The adjusted density """ if down: if density > 0.15: density = density - 0.1 else: density = density * 0.5 else: if density < 0.1: density = density * 1.5 else: density = density + 0.1 return density
[docs] def adjustACellClashVDWScale(self, avdw_scale, down=True): """ Adjust the cell clash scaling up or down - the amount it is adjusted depends on its current value and the direction it is being adjusted :type avdw_scale: float :param avdw_scale: The current clash vdw scale factor :type down: bool :param down: Whether to adjust the scale factor down (or up if False) :rtype: float :return: The adjusted scale factor """ if down: if avdw_scale > 1: avdw_scale = avdw_scale * 0.9 elif avdw_scale <= 0.11: avdw_scale = avdw_scale - 0.03 else: avdw_scale = avdw_scale - 0.1 else: if avdw_scale < 1: avdw_scale = avdw_scale + 0.1 else: avdw_scale = avdw_scale * 1.1 return avdw_scale
[docs] def checkForClashes(self, cell, candidate, avdw_scale, raw_clashes, cell_rings, candidate_rings): """ Check for clashes between the candidate structure and structures already within the cell, and also intrastructure clashes within the candiate that are the result of the periodic boundary condition. If intrastructure clashes are found, interstructure clashes are not checked for. :type cell: `schrodinger.structure.Structure` :param cell: The current cell with structures that have already been placed :type candidate: `schrodinger.structure.Structure` :param candidate: The structure to attempt to place into the cell :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs :type raw_clashes: dict :param raw_clashes: Those clashes that exist in the candidate without accounting for the PBC :type cell_rings: list :param cell_rings: Each item of the list is a `schrodinger.structure._Ring` object returned by the findRings() method. :type candidate_rings: list :param candidate_rings: Each item of the list is a `schrodinger.structure._Ring` object returned by the findRings() method. :rtype: dict :return: Any clashes found. Keys are atom indexes values are all the atom indexes that clash with that atom """ pbc = [] prop = desmondutils.CHORUS_PROP_PREFIX + '%s%s' for letter in 'abc': for m in 'xyz': pbc.append(cell.property[prop % (letter, m)]) # Intrastructure clashes clashes = self.checkForIntraStructureClashes(candidate, scale=avdw_scale, pbc=pbc) if clashes: # We only care about those caused by the PBC pbc_only_clashes = self.removeIgnoredClashes(clashes, raw_clashes) if pbc_only_clashes: return pbc_only_clashes # Interstructure clashes clashes = self.checkForInterStructureClashes(cell, candidate, avdw_scale, pbc, cell_rings, candidate_rings) return clashes
[docs] def checkForInterStructureClashes(self, cell, candidate, avdw_scale, pbc, cell_rings, candidate_rings): """ Check for clashes of the structure with other structures in the cell, including the periodic boundary condition. Also check for ring spears. The first check that finds clashes will return those clashes and no more checks will be made. :type cell: `schrodinger.structure.Structure` :param cell: The current cell with structures that have already been placed :type candidate: `schrodinger.structure.Structure` :param candidate: The structure to attempt to place into the cell :type avdw_scale: float :param avdw_scale: The VDW scale factor for clash cutoffs :type pbc: None, infrastructure.PBC, or list :param pbc: If periodic boundary conditions should be used, provide either an infrastructure.PBC object or the parameters to construct one. Allowed constructors: * a, b, c : box lengths * a, b, c, alpha, beta, gamma box : box lengths and angles * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors :type cell_rings: list :param cell_rings: Each item of the list is a `schrodinger.structure._Ring` object returned by the findRings() method. :type candidate_rings: list :param candidate_rings: Each item of the list is a `schrodinger.structure._Ring` object returned by the findRings() method. :rtype: dict :return: Any clashes found. Keys are atom indexes values are all the atom indexes that clash with that atom """ if cell.atom_total == 0: # No interstructure clashes possible return {} # Note: Timings indicate that for systems on the order of 5000 atoms, # this get_contacts approach is competitive with using # DistanceCellIterator and # creating the distance cell once and then using that cell for each # candidate. For systems of 100,000 atoms, the DistanceCellIterator # appears to be about 25X faster (depending on how many candidates are # checked for each unchanged cell). But DistanceCellIterator doesn't use # PBC currently, so we are sticking with get_contacts for now. clashes = self.getClashes(cell, avdw_scale, struct2=candidate, pbc=pbc) if clashes: return clashes # Find ring spears where the candidate spears a cell ring clashes = self.findRingSpears(cell, spear_struct=candidate, rings=cell_rings, ring_based=False, pbc=pbc) if clashes: return clashes # Find ring spears where the cell spears a candidate ring clashes = self.findRingSpears(candidate, spear_struct=cell, rings=candidate_rings, pbc=pbc) return clashes
[docs] def getPBCSide(self, cell): """ Get the length of one side of the periodic boundary condition :type cell: `schrodinger.structure.Structure` :param cell: The current cell with structures that have already been placed :rtype: float :return: The length of one side of the periodic boundary condition. 0 is returned if no PBC has been set on the cell """ return cell.property.get('r_pdb_PDB_CRYST1_a', 0.0)
[docs] def getNewCellStructure(self, volume): """ Get a new, empty structure with the periodic boundary condition set :type volume: float :param volume: The desired volume of the periodic boundary condition :rtype: `schrodinger.structure.Structure` :return: The structure to add the properties to """ cell = self.scaffold.getNewCell() self.scaffold.addPBCProperties(cell, volume) cell.title = 'amorphous %s' % self.title return cell
[docs] def getDesiredVolume(self, structures, density, num_per_chain=None): """ Get the volume of a cube that will have the given density when the given structures are placed entirely inside it :type structures: list :param structures: The list of structures to be placed into the box :type density: float :param density: The desired density (g/cm3) :type num_per_chain: list of int :param num_per_chain: The number of each structure. If provided, this list must be of the same length as the above structure list :rtype: float :return: The desired box side """ if num_per_chain: mass = sum([ struct.total_weight * num for struct, num in zip(structures, num_per_chain) ]) else: # It's possible that not all structures have the same mass mass = sum([x.total_weight for x in structures]) volume = AMU_PER_A3_TO_G_PER_CM3 * mass / density volume = volume + self.scaffold.getExcludedVolume() return volume
[docs]class EnergySurface(object): """ Calculate energy surface, and dihedral angle probability distrubtion. """
[docs] def __init__(self, orig_dihe_values): """ Save original dihedral values and initialize energy. :type orig_dihe_values: list of float :param orig_dihe_values: The original dihedral values """ self.dihe_num = len(orig_dihe_values) self.dihe_values = numpy.array(orig_dihe_values) self.energy = numpy.zeros(self.dihe_num)
[docs] def setUniformDistribution(self): """ Set uniform distribution. """ self.torsion_probability = numpy.full(self.dihe_num, old_div(1., self.dihe_num))
[docs] def setBoltzmannDistribution(self, struct, dihedral, forcefield_num, force_constant, beta): """ Calculate torsion energy surface. If any energies on the surface are invalid, set uniform distribution. If all are valid, set Boltzmann distribution :type struct: {schrodinger.Structure.structure} :param struct: the local structure for dihedral angle energy surface :type dihedral: list of int :param dihedral: the 4 ints are the atom ids of target dihedral angle :type forcefield_num: int :param forcefield_num: the mmffld integer for the given forcefield :type force_constant: float :param force_constant: the energy constant for torsion restraints :type beta: float :param beta: 1/kT to get Boltzmann factor (E / kT) """ # minimize, rotate, restrained minimize... as L{BondRotator} # Canonicalize positions so that Minimizer works on reasonable initial # struct try: canonicalizer = fast3d.Canonicalizer() canonicalizer.canonicalizePositions(struct) except RuntimeError: # 'fragmentation failed' may happen as in canonicalizer_test.py self.setUniformDistribution() return with minimize.minimizer_context( struct=struct, no_zob_restrain=True, ffld_version=forcefield_num) as minimizr: for idx, dihe_value in enumerate(self.dihe_values): struct.adjust(dihe_value, *dihedral) minimizr.addTorsionRestraint(*(dihedral + [force_constant, dihe_value])) min_res = minimizr.minimize() self.dihe_values[idx] = minimizr.getStructure().measure( *dihedral) self.energy[idx] = min_res.potential_energy minimizr.deleteAllRestraints() if any(numpy.isinf(self.energy)): # After minimization, E = inf self.setUniformDistribution() return # Calculate Boltzmann distribution # Shift the energy minimum to zero so that all energy values are positive # and the max probability value is 1 self.energy -= min(self.energy) self.torsion_probability = numpy.exp(-self.energy * beta) # No zero probability values, and thus numpy.random.choice always generate # a full list of dihedral angle values self.torsion_probability[self.torsion_probability < 10**-10] = 10**-10 p_sum = numpy.sum(self.torsion_probability) self.torsion_probability /= p_sum
[docs]class RotationError(Exception): pass
[docs]class PolymerFragments(BuilderWithClashDetection): """ Save the struct and break it into fragments. """ LETTER_NUM = 26
[docs] def __init__(self, struct, num_struct, dihedral_min=0., dihedral_max=360., dihedral_num=DIHEDRAL_NUM, dihedral_exclude=False, only_backbone=False, vdw_scale=1., forcefield=OPLS2005, temperature=None, pop_local_clash=True, logger=None, indexing='0'): """ Prepare infomation for in-cell grow of one single polymer chain. :type struct: `schrodinger.structure.Structure` :param struct: the structure to be breaked into fragments :type num_struct: int :param num_struct: copy the struct by num_struct times when placing them into the cell :type dihedral_min: float :param dihedral_min: lower limit of dihedral angle :type dihedral_max: float :param dihedral_max: upper limit of dihedral angle :type dihedral_num: int :param dihedral_num: the number of dihedral values :type dihedral_exclude: bool :param dihedral_exclude: (0, dihedral_min) and (dihedral_max, 360) are used as dihedral angle range, if True. :type only_backbone: bool :param only_backbone: only the rotatable bonds in backbone is considered :type vdw_scale: float :param vdw_scale: VDW scale factor to use :type forcefield: str :param forcefield: The name of the force field to use. The default is 'OPLS_2005'. :type temperature: float, or None :param temperature: the temperature to calculate dihedral angle distribution using boltzmann factor. If None, a uniform distribution is used. :type pop_local_clash: bool :param pop_local_clash: whether remove dihedral angle values that lead to local clashes :type logger: `logging.Logger` :param logger: logger for output info :type indexing: `str` :param indexing: the indexing based on compositions and molecules """ self.original_polymer = struct self.num_struct = num_struct self.dihedral_min = dihedral_min self.dihedral_max = dihedral_max self.dihedral_num = dihedral_num self.dihedral_exclude = dihedral_exclude self.only_backbone = only_backbone self.vdw_scale = vdw_scale self.forcefield = forcefield self.temperature = temperature self.logger = logger self.debug = schrodinger.in_dev_env() self.backend = jobcontrol.get_backend() super(PolymerFragments, self).__init__(vdw_scale=vdw_scale, logger=self.logger) self.template_polymer = struct.copy() self.moieties = None # template_polymer will be broken into fragments, and some of them are # further instantiated as Monomer objects. setupTacticity method in # Monomer constructor throws warnings when get_chiral_atoms method # analyzes struct with 's_st_Chirality_' property msutils.remove_properties(self.template_polymer, matches=['s_st_Chirality_']) self.template_polymer_rings = self.template_polymer.find_rings( sort=True) self.ringnum = len(self.template_polymer_rings) self.has_rings = bool(self.ringnum) self.torsion_energy_surf = None # Only enable pop_local_clash for long chains with large vdw scale self.pop_local_clash = (pop_local_clash and self.template_polymer.atom_total > 1000 and self.vdw_scale > 0.6) self.setDiheInit() self.debug_file = indexing + '.mae'
[docs] def setDiheInit(self): """ Set descrete dihedral values. """ if self.dihedral_exclude: self.dihe_init = [] for dihe in numpy.linspace(self.dihedral_max - 360, self.dihedral_min, self.dihedral_num): if dihe <= 0: dihe += 360.0 self.dihe_init.append(dihe) else: self.dihe_init = list( numpy.linspace(self.dihedral_min, self.dihedral_max, self.dihedral_num)) if self.dihedral_min == 0.0: # Since dihedral range is (0, 360], convert 0 to 360 self.dihe_init.remove(self.dihedral_min) if self.dihedral_max != 360.0: self.dihe_init.append(360.0)
[docs] def breakIntoFragments(self, is_rigid=False, restore_template=True): """ Break polymer into fragments. :type is_rigid: bool :param is_rigid: set the molecule as one fragment. :type restore_template: bool :param restore_template: restore the template polymer to the original one. """ if not self.template_polymer: return if not is_rigid and self.checkAndmarkPolymerProp(): self.setAllfragments() else: # Rigid body found self.setAsOneFragment() return if self.pop_local_clash or self.temperature: self.setLocalStructs() if self.pop_local_clash: self.popLocalClashes() if self.temperature: self.torsionEnergySurf(temperature=self.temperature) # Overwrite any changes to template_polymer beyond xyz self.original_polymer.setXYZ(self.template_polymer.getXYZ()) if restore_template: self.template_polymer = self.original_polymer self.first_frag.template_polymer = self.template_polymer for atom in self.template_polymer.atom: # MATSCI-11822: remove alt_xyz so that no bonds look broken atom.alt_xyz = None atom.property.pop('r_m_pdb_occupancy', None)
[docs] def checkAndmarkPolymerProp(self): """ Check the struct and mark with necessary polymer properties. :rtype: bool :return: True, if the structure is successfully marked with polymer properties. """ # Coarsegrain structures check s_m_atom_name instead if msutils.is_coarse_grain(self.template_polymer): is_polymer = self.isCgPolymer() else: is_polymer = self.isAllAtomPolymer() if not is_polymer: # Polymer residue information is assigned to non-polymer struct, # if the non-polymer struct has rotatable bonds. is_polymer = self.setAndMarkResidues() if is_polymer: self.extractMoieties() self.updateOrigAtomIdx() self.markMoietiesProps() try: self.moieties = Moieties('', structs=self.moiety_structs.values(), one_letter=False) except MoietyError: return False return is_polymer
[docs] def setLocalStructs(self): """ Set the local structures. """ self.findNeighborFrags() self.setLocalStructAtomIds() self.saveLocalStructs()
[docs] def setAsOneFragment(self): """ Set the whole molecule as one fragment. """ self.first_frag = PolymerFragment( 0, 0, template_polymer=self.template_polymer) self.first_frag.atom_ids = list( range(1, self.template_polymer.atom_total + 1)) # MATSCI-5503: rigid molecules should be centered self.structToOriginByFrag()
[docs] def setAllfragments(self): """ Set all fragments. """ if not self.only_backbone: # Break the monomer moieties into more small moieties # Update monomer, all_moieties and residue (template polymer) for moiety in list(self.moieties.monomers.values()): moiety.setAsFragment() moiety.template_polymer = self.template_polymer for sub_moiety in moiety.breakTemplatePolymer(): moiety_pdbre = sub_moiety.pdbres.rstrip() self.moieties.monomers[moiety_pdbre] = sub_moiety self.moieties.all_moieties.append(sub_moiety) for moiety in self.moieties.all_moieties: moiety.setAsFragment() # Renumber resnum so that no two residues use the same resnum self.renumberResGetM2PMap() self.updateAllAtomIndexMaps() try: self.createFragForInitiator() except KeyError: # MATSCI-8369: renumberResGetM2PMap() modified the self.atom_idx_map_m2p # in a way that createFragForInitiator() gives a traceback when residue_num = 3. # The issue is narrowed down to the conflict between MONOMER_ORIG_ATOM_IDX_PROP # modified by breakTemplatePolymer() and empty return from getDelNextResnumByResnum() # However, I am not able to identify the exact bug and this is only a workaround # to ignore some rotatable bonds and make the builder job successful self.renumberResGetM2PMap( monomer_orig_idx_prop=MONOMER_ORIG_IDX_PROP2) self.updateAllAtomIndexMaps() self.createFragForInitiator() self.finishPolymerFrags() self.appendMissedFrags() self.assignRingAtomIds() self.first_frag.markBranchingFrag() self.first_frag.setDihedrals(self.dihe_init) self.first_frag.markSidegroupFrag() self.structToOriginByFrag()
[docs] def saveLocalStructs(self): """ Save local structs and update dihedrals according to the atom ids in newly saved local structs. """ self.local_structs = {} is_coarse_grain = msutils.is_coarse_grain(self.template_polymer) capper_terminator = self.getCapperTerminator(is_coarse_grain) smiles_generator = smiles.SmilesGenerator(wantAllH=not is_coarse_grain) for frag in self.first_frag.fragments(include_self=False): # atom ids change due to struct extraction struct, atom_id_map = self.extractWithMap( self.template_polymer, frag.local_struct_atom_ids) grow_markers = [] for atom in struct.atom: atom.property[ORIG_ATOM_IDX_PROP] = atom.index # cap atom in broken bonds with H or C atoms for end_atom_id in frag.end_atom_ids: grow_marker = atom_id_map[end_atom_id] grow_markers.append(grow_marker) grower = next(struct.atom[grow_marker].bonded_atoms).index capper_terminator.addToChain(struct, grower, grow_marker, orientation=HEADFIRST, remove_marker=False) # copy marker ORIG_ATOM_IDX_PROP to H/C capper struct.atom[struct.atom_total].property[ORIG_ATOM_IDX_PROP] = \ struct.atom[grow_marker].property[ORIG_ATOM_IDX_PROP] struct.deleteAtoms(grow_markers) # atom ids change due to adding capper atoms atom_id_map2 = {} for atom in struct.atom: orig_atom_id = atom.property.get(ORIG_ATOM_IDX_PROP) atom_id_map2[orig_atom_id] = atom.index # atom ids change due to reorder_atoms smile_str, smile_map = smiles_generator.getSmilesAndMap(struct) if smile_str not in self.local_structs: try: struct = build.reorder_atoms(struct, smile_map) self.local_structs[smile_str] = struct except: # Some invalid structs (e.g. delete some H atoms from a valid struct) # cause Exception: The 'new_order' list must have xx elements. self.local_structs[smile_str] = None frag.local_struct_smiles = smile_str continue # three-step map: struct extraction, capping with H/C; smile_map reorder_atoms dihe_atom_ids = [ smile_map.index(atom_id_map2[atom_id_map[x]]) + 1 for x in frag.dihedral ] mapped_local_struct_side_dihedrals = [] for local_struct_side_dihedral in frag.local_struct_side_dihedrals: mapped_local_struct_side_dihedrals.append([ smile_map.index(atom_id_map2[atom_id_map[x]]) + 1 for x in local_struct_side_dihedral ]) frag.local_struct_dihedral = dihe_atom_ids frag.local_struct_side_dihedrals = mapped_local_struct_side_dihedrals frag.local_struct_smiles = smile_str frag.torsion_pattern = ' '.join( [smile_str, ','.join(map(str, dihe_atom_ids))])
[docs] def popLocalClashes(self): """ Try dihedral value one by one for the target torsion in local structs, and eliminate those leading to inevitable local clashes by incomplete conformer search of local structs. """ no_clash_dihe = {} for frag in self.first_frag.fragments(include_self=False): if frag.torsion_pattern in no_clash_dihe: # This dihedral type has been calculated previously frag.dihe_rand = no_clash_dihe[frag.torsion_pattern][:] frag.pool = frag.dihe_rand[:] continue # Calculate new no-clash dihedral candidates for new type of dihedral local_struct = self.local_structs[frag.local_struct_smiles] if not local_struct: no_clash_dihe[frag.torsion_pattern] = frag.dihe_rand[:] continue dihedral = frag.local_struct_dihedral dihe_range = frag.dihe_rand[:] side_dihe_num = len(frag.local_struct_side_dihedrals) rings = local_struct.ring self.has_rings = bool(rings) # Remove dihedral values leading to unavoidable local clashes # 1. Copy the local structure around one center dihedral # 2. Set the center dihedral to one possible candidate value # 3. Randomly rotate side dihedrals in the local structure to # generate MAX_HIT_NUM conformers # 4. If all of the conformers have intra clashes, remove that # dihedral value from the center dihedral candinates for dihe_value in dihe_range: struct = local_struct.copy() struct.adjust(dihe_value, *dihedral) local_clash = self.checkForIntraStructureClashes(struct, rings=rings) if not local_clash: continue # FIXME: this functions is to slow to do full conformational search # If local struct has 5 side rotatable bonds, 73^5 generates too many # conformers, and the checkForIntraStructureClashes is too slow. # Currently randomly pick some out of all conformers for conformer_idx in range(MAX_HIT_NUM * side_dihe_num): for side_dihe_idx, side_dihe_value in enumerate( numpy.random.choice(dihe_range, side_dihe_num)): struct.adjust( side_dihe_value, *frag.local_struct_side_dihedrals[side_dihe_idx]) if not self.checkForIntraStructureClashes(struct, rings=rings): local_clash = False break if local_clash: # If MAX_HIT_NUM * side_dihe_num conformers all have clashes, # remove this value from dihedral angle candidates frag.dihe_rand.remove(dihe_value) no_clash_dihe[frag.torsion_pattern] = frag.dihe_rand[:] frag.dihe_pool = frag.dihe_rand[:] if not frag.dihe_rand: msg = ("No clash-free rotation can be found for atoms %s, " "please use a smaller VDW scale factor." % '-'.join(map(str, frag.dihedral))) raise RotationError(msg)
[docs] @common.mmffld_environment() def torsionEnergySurf(self, force_constant=10., temperature=300.): """ Sample torsion energy surface for local structs, and initialize the dihedral angle distribution using Boltzmann distribution. :type force_constant: float :param force_constant: the energy constant for torsion restraints :type temperature: float, or None :param temperature: the temperature to calculate dihedral angle distribution """ self.torsion_energy_surf = {} beta = old_div(1.0, (BOLTZMANN * temperature)) forcefield_num = mm.opls_name_to_version(self.forcefield) for frag in self.first_frag.fragments(include_self=False): if frag.torsion_pattern in self.torsion_energy_surf: # This torsion energy surf has been calculated previously frag.torsion_probability = self.torsion_energy_surf[ frag.torsion_pattern].torsion_probability.copy() # randomly sample according to the Boltzmann distribution frag.dihe_pool = list( numpy.random.choice(frag.dihe_rand, len(frag.dihe_rand), replace=False, p=frag.torsion_probability)) continue # generate energy surface for this new torsion_pattern local_struct = self.local_structs[frag.local_struct_smiles] energy_surf = EnergySurface(frag.dihe_rand) # if invalid struct, assign uniform distribution if not local_struct or desmondutils.find_forcefield_invalid_structures( [local_struct], forcefield_num): energy_surf.setUniformDistribution() else: struct = local_struct.copy() dihedral = frag.local_struct_dihedral energy_surf.setBoltzmannDistribution(struct, dihedral, forcefield_num, force_constant, beta) self.torsion_energy_surf[frag.torsion_pattern] = energy_surf frag.torsion_probability = energy_surf.torsion_probability.copy( ) # randomly sample according to the Boltzmann distribution frag.dihe_pool = list( numpy.random.choice(frag.dihe_rand, len(frag.dihe_rand), replace=False, p=frag.torsion_probability))
[docs] def extractWithMap(self, struct, indices, copy_props=False): """ Return a new structure object which contains the atoms of the current structure that appear in the specified list and a dict which maps atom ids in current structure to those in newly fextracted substructure. :type struct: `structure.Structure` :param struct: the structure to extract substructs from :type indices: list of int :param indices: the ids of atoms to be extracted :type copy_props: bool :param copy_props: whether to copy the structure level property :rtype: `structure.Structure`, dict :return: substructure, dict mapping to atom ids in substructure """ for atom_id in indices: struct.atom[atom_id].property[ORIG_ATOM_IDX_PROP] = atom_id sub_struct = struct.extract(indices, copy_props=copy_props) map_atom_id = { atom.property[ORIG_ATOM_IDX_PROP]: atom.index for atom in sub_struct.atom } return sub_struct, map_atom_id
[docs] def getCapperTerminator(self, is_coarse_grain): """ Get a terminator moiety to cap the atom in a broken bond. :type is_coarse_grain: bool :param is_coarse_grain: whether the structure is coarse grain :rtype: `Terminator` :return: terminator moiety of one single H or C atom with Br marker. """ struct = get_generic_end_structure(is_coarse_grain=is_coarse_grain) return Terminator(struct)
[docs] def findNeighborFrags(self, separated_bond_num=2): """ Search for and save neighboring fragments for each fragment. For any fragments, the bond between dihe 2nd and 3rd atoms is rotatable. If any other fragments has any atoms that are within separated_bond_num bonds away from the dihe 2nd and 3rd atoms of one certain fragment, they are considered neighbor fragments of that fragment. :type separated_bond_num: int :param separated_bond_num: criteria for defining neighbor fragments """ graph = analyze.create_nx_graph(self.template_polymer) for cur_frag in self.first_frag.fragments(include_self=False): pre_frag = cur_frag.pre_frag # Search for the most faraway parent fragment while pre_frag.pre_frag: if networkx.shortest_path_length( graph, source=cur_frag.dihedral[2], target=pre_frag.dihedral[2]) <= separated_bond_num: pre_frag = pre_frag.pre_frag else: break # Add the the most faraway parent fragment whose dihe 3rd is outside the # searching range, but dihe 4th is inside the searching range cur_frag.neighbor_frags.append(pre_frag) next_frags = pre_frag.next_frags[:] # search and save other parent fragments, itself, and child fragments while next_frags: frag = next_frags.pop() bond_num_to_dihe_2nd = networkx.shortest_path_length( graph, source=cur_frag.dihedral[1], target=frag.dihedral[2]) bond_num_to_dihe_3rd = networkx.shortest_path_length( graph, source=cur_frag.dihedral[2], target=frag.dihedral[2]) if min([bond_num_to_dihe_3rd, bond_num_to_dihe_2nd ]) <= separated_bond_num: cur_frag.neighbor_frags.append(frag) next_frags += frag.next_frags[:]
[docs] def setLocalStructAtomIds(self): """ Set the local struct atom ids and end atom ids defined by neighbor fragments. """ for cur_frag in self.first_frag.fragments(include_self=False): ini_as_neighbor = False # add atom ids in neighbor_frags to local_struct_atom_ids for frag in cur_frag.neighbor_frags: cur_frag.local_struct_atom_ids.update(frag.atom_ids) if not frag.pre_frag: ini_as_neighbor = True skip_ini_end_atom_id = set() # fine tune local_struct_atom_ids for those atoms in rotatable bonds excluded_side_frags = {cur_frag.fragnum} neighbor_frags = set(cur_frag.neighbor_frags) for frag in cur_frag.neighbor_frags: if not frag.pre_frag: for next_frag in frag.next_frags: if next_frag in neighbor_frags and \ next_frag.fragnum not in excluded_side_frags: # Bond between INI frag and next_frag is not broken # save side rotatable bonds for conformer search cur_frag.local_struct_side_dihedrals.append( next_frag.dihedral[:]) excluded_side_frags.add(next_frag.fragnum) continue # this is not INI frag if frag.dihedral[2] not in cur_frag.local_struct_atom_ids: # cur_frag is the most faraway parent fragment other than INI cur_frag.local_struct_atom_ids.update(frag.dihedral[1:3]) cur_frag.end_atom_ids.append(frag.dihedral[1]) # useful when INI has multiple R groups if not frag.pre_frag.pre_frag: # This is the dihe 3rd atom saved in a frag connected to INI skip_ini_end_atom_id.add(frag.dihedral[2]) for next_frag in frag.next_frags: if next_frag.dihedral[ 3] not in cur_frag.local_struct_atom_ids: # frag is one of the most faraway child fragments cur_frag.end_atom_ids.append(next_frag.dihedral[2]) elif next_frag.fragnum not in excluded_side_frags: # Bond between frag and next_frag is not broken # save side rotatable bonds for conformer search cur_frag.local_struct_side_dihedrals.append( next_frag.dihedral[:]) excluded_side_frags.add(next_frag.fragnum) if ini_as_neighbor: for atom_id in self.first_frag.bonded_atom_ids: if atom_id not in skip_ini_end_atom_id: cur_frag.end_atom_ids.append(atom_id)
[docs] def isCgPolymer(self): """ Coarse grain particles of one type are considerred as one type of polymer residue. If one cg structure has INI, TRM, and others as residues, consider it built from polymer builder, assuming all bonds between particles are rotatable. :rtype: bool :return: True, if the cg structure is originally from polymer builder and prepared properly. """ int_trm_set = {INI, TRM} residue_name = {} residue_number = 0 monomer_idx = 0 for atom in self.template_polymer.atom: residue_number += 1 if atom.name not in residue_name: if atom.name in int_trm_set: # INT and TRM do not change pdbres residue_name[atom.name] = atom.name else: # Monomers have A-Z as pdbres, which means 0-25 for # monomer_idx. if monomer_idx == 26: # This is not a Cg polymer as the number of monomer # types exceeds 26 return False atom_name = string.ascii_uppercase[monomer_idx].ljust(4) residue_name[atom.name] = atom_name monomer_idx += 1 atom.pdbres = residue_name[atom.name] atom.resnum = residue_number atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = 1 atom.property[HEAD_ATOM_PROP] = True # Original Monomers have TAIL_ATOM_PROP # For newly broken ones from side groups, extra markers cause no trouble atom.property[TAIL_ATOM_PROP] = True if atom.bond_total > 2: atom.property[BRANCH_ATOM_PROP] = True return len(residue_name) > 2 and (INI in residue_name) and (TRM in residue_name)
[docs] def renumberResGetM2PMap( self, monomer_orig_idx_prop=msprops.MONOMER_ORIG_ATOM_IDX_PROP): """ Loop over all the residues and create mapping from atom id in residue to that in polymer. :param monomer_orig_idx_prop: The atom property to define the original atom index :type monomer_orig_idx_prop: str """ self.connected_resnums = {} self.atom_idx_map_m2p = {} for residue_num, residue in enumerate(self.template_polymer.residue, 1): residue.resnum = residue_num self.atom_idx_map_m2p[residue_num] = {} for atom in residue.atom: # Create mapping between atom index to connected resnum self.connected_resnums.setdefault(atom.index, []) # Create map from atom id in monomer to that in polymer # xx_atom_id_monomer refers to the atom id in monomer self.atom_idx_map_m2p[residue_num][ atom.property[monomer_orig_idx_prop]] = atom.index
[docs] def updateAllAtomIndexMaps(self): """ Get information from previous polymer build for polymer grow in cell. """ self.residue_connectivity = {} self.backbone_side_atoms = {} self.backbone = {} self.backbone_to_marked_end = {} self.rotatable_bonds = {} self.setInitiatorResidue() for residue in self.template_polymer.residue: residue_type = residue.pdbres.rstrip() moiety = self.moieties.getMoiety(residue_type) residue_num = residue.resnum self.setResidueConnectivity(moiety, residue_num) self.setBackboneAndSideGroup(moiety, residue_num) if residue_type == INI or residue_type == TRM: continue self.setPathtoBranchingAtom(moiety.backbone_to_marked_end, residue_num) self.setRotatableBond(moiety.rotatable_bonds, residue_num)
[docs] def setInitiatorResidue(self): """ Save the residue containing the polymer initiator. """ for residue in self.template_polymer.residue: if residue.pdbres.rstrip() == INI: self.initiator_res = residue return
[docs] def setResidueConnectivity(self, moiety, residue_num): """ Create mapping from connected resnum pair to connected atom pair. :type moiety: `schrodinger.structure.Structure` :param moiety: one moiety used to construct part of the polymer :type residue_num: int :param residue_num: residue num """ # residue_connectivity[residue num][connected resnum] = # [atom id in residue, connected atom in connected resnum] self.residue_connectivity[residue_num] = {} for marker_id in moiety.markers: # atom in current residue bonded to another residue atom_id = self.atom_idx_map_m2p[residue_num][next( moiety.raw_struct.atom[marker_id].bonded_atoms).index] for neighbor in self.template_polymer.atom[atom_id].bonded_atoms: if neighbor.resnum != residue_num: self.residue_connectivity[residue_num][neighbor.resnum] = [ atom_id, neighbor.index ] if neighbor.resnum not in self.connected_resnums[atom_id]: self.connected_resnums[atom_id].append(neighbor.resnum)
[docs] def setBackboneAndSideGroup(self, moiety, residue_num): """ Update backbone and side group maps. :type moiety: `schrodinger.structure.Structure` :param moiety: one moiety used to construct part of the polymer :type residue_num: int :param residue_num: residue num """ self.backbone_side_atoms[residue_num] = OrderedDict() # Backbone atom id in monomer (bb_atm_id_m) for bb_atm_id_m in moiety.backbone_path_indexes: # Backbone atom id in polymer (bb_atm_id_p) bb_atm_id_p = self.atom_idx_map_m2p[residue_num][bb_atm_id_m] self.backbone_side_atoms[residue_num][bb_atm_id_p] = [] # Side atom id in monomer (side_atm_id_m) for side_atm_id_m in moiety.backbone_side_atoms[bb_atm_id_m]: side_atm_id_p = self.atom_idx_map_m2p[residue_num][ side_atm_id_m] # [residue_num][backbone_atom_id] = [side_group_atom_ids] self.backbone_side_atoms[residue_num][bb_atm_id_p].append( side_atm_id_p)
[docs] def setPathtoBranchingAtom(self, moiety_bb_to_marked_end, residue_num): """ Update maps for the path between backbone atom to atom branching point. :type moiety_bb_to_marked_end: dict :param moiety_bb_to_marked_end: Keys are backbone atom indexes, values are dictionaries with keys being the branching atom and values being the path between backbone atom and branching atom :type residue_num: int :param residue_num: residue num """ for backbone_atom_id, branching_iterm in moiety_bb_to_marked_end.items( ): polymer_backbone_atom_id = self.atom_idx_map_m2p[residue_num][ backbone_atom_id] self.backbone_to_marked_end[polymer_backbone_atom_id] = {} for branching_atom_id, path_with_ends in branching_iterm.items(): new_path_with_ends = [] for atom_id in path_with_ends: new_path_with_ends.append( self.atom_idx_map_m2p[residue_num][atom_id]) bonded_to_marker = self.atom_idx_map_m2p[residue_num][ branching_atom_id] self.backbone_to_marked_end[polymer_backbone_atom_id][ bonded_to_marker] = new_path_with_ends
[docs] def setRotatableBond(self, moiety_rotatable_bonds, residue_num): """ Update maps for the path between backbone atom to atom branching point. :type moiety_rotatable_bonds: list of tuple :param moiety_rotatable_bonds: (atom 1, atom 2) between which bond is rotatable :type residue_num: int :param residue_num: residue num """ rotatable_bonds = [] for atom0, atom1 in moiety_rotatable_bonds: rotatable_bonds.append({ self.atom_idx_map_m2p[residue_num][atom0], self.atom_idx_map_m2p[residue_num][atom1] }) # Rotatable_bonds[residue_type] = (set(atom1, atom2)...) # and the bond between atom1 and atom2 is rotatable self.rotatable_bonds[residue_num] = tuple(rotatable_bonds)
[docs] def createFragForInitiator(self): """ Create the the first polymer fragment from the initiator and append the following children of it. """ cur_resnum = self.initiator_res.resnum self.first_frag = PolymerFragment( cur_resnum, 0, template_polymer=self.template_polymer) for atom in self.initiator_res.atom: self.first_frag.atom_ids.append(atom.index) # Multiple Rx groups in INI next_resnum = list(self.getDelNextResnumByResnum(cur_resnum)) for next_resnum, [atom_id, next_atom_id] in next_resnum: sub_backbone = [] # Try to grab another atom in INI as dihedral[0] atom_id_ini0 = self.bondedAtomIdInSameRes(atom_id) if atom_id_ini0: # Add the atom connected to the terminating atom in INI sub_backbone.append(atom_id_ini0) self.first_frag.pre_backbone.append(atom_id_ini0) self.backbone_to_marked_end[atom_id_ini0] = {} # Add terminating atom in INI to sub_backbone and pre_backbone sub_backbone.append(atom_id) self.first_frag.pre_backbone.append(atom_id) backbone_atom_ids = self.resBackbone(next_resnum, next_atom_id) for idx, backbone_atom_id in enumerate(backbone_atom_ids): # Add first atom in first monomer sub_backbone.append(backbone_atom_id) self.first_frag.atom_ids.append(backbone_atom_id) self.first_frag.pre_backbone.append(backbone_atom_id) if len(sub_backbone) > 2 and (set( sub_backbone[-2::]) in self.rotatable_bonds[next_resnum] or idx == 0): # dihe 2nd-3rd is found rotatable try: dihe_4th_atom_id = backbone_atom_ids[idx + 1] except IndexError: # dihe 3rd is the last atom in monomer dihe_4th_atom_id = None self.first_frag.resnum = next_resnum # Rotatable bond found before M-M bond self.createEndSubFragment(self.first_frag, next_resnum, sub_backbone, next_resnum, dihe_4th_atom_id) break else: # dihe 1st-2rd or non-rotatable dihe 2nd-3rd # Add side atoms and continue to add more backbone atom self.first_frag.atom_ids += self.backbone_side_atoms[ next_resnum][backbone_atom_id] continue else: # When no rotatable bonds in M or struct H-C (H-M as polymer) cur_resnum = next_resnum # This for loop is to tackle branching at dihe 2nd for next_resnum, [atom_id, next_atom_id ] in self.getDelNextResnumByResnum(cur_resnum): self.first_frag.atom_ids.append(next_atom_id) backbone_atom_ids = self.resBackbone( next_resnum, next_atom_id) dihe_4th_atom_id = None if len( backbone_atom_ids) == 1 else backbone_atom_ids[1] self.first_frag.resnum = next_resnum # Rotatable M-M bond ordered_backbone = self.getInitiatorSubBackbone( sub_backbone, next_atom_id) self.createEndSubFragment(self.first_frag, next_resnum, ordered_backbone + [next_atom_id], next_resnum, dihe_4th_atom_id) for frag in self.first_frag.next_frags: self.addBranchingAtoms(frag) # save the end atom ids for ini fragment self.first_frag.bonded_atom_ids = [] for frag in self.first_frag.next_frags: self.first_frag.bonded_atom_ids.append(frag.dihedral[2])
[docs] def getInitiatorSubBackbone(self, sub_backbone, dihe_3rd): """ Return a list of atom ids: the last two atoms are bonded and the last bonds with the dihe 3rd atom. :type sub_backbone: list :param sub_backbone: the last two atoms are bound and the last one binds with the dihe 3rd atom. :type dihe_3rd: int :param dihe_3rd: the 3rd atom in a dihedral """ # Dihedral angle atoms are bonded: dihe_1st - dihe_2nd - dihe_3rd # By default sub_backbone[-2:] = [dihe_1st, dihe_2nd] if self.template_polymer.areBound(sub_backbone[-1], dihe_3rd): # No modification needed, since dihe_2nd and dihe_3rd are bonded return sub_backbone # Redefine dihe_2nd by searching the atoms in first_frag for dihe_2nd in self.first_frag.atom_ids: if self.template_polymer.areBound(dihe_2nd, dihe_3rd): # The dihe_2nd atom is in first_frag and bonds with dihe_3rd break for dihe_1st_atom in self.template_polymer.atom[dihe_2nd].bonded_atoms: if dihe_1st_atom.index != dihe_3rd: # The dihe_1st_atom binds with dihe_2nd and is not dihe_3rd break return [dihe_1st_atom.index, dihe_2nd]
[docs] def finishPolymerFrags(self): """ Generate all polymer fragments from the first generation fragment containing INT and the following generation fragments. """ fragments = [self.first_frag] + self.first_frag.next_frags[:] while fragments: frag = fragments.pop(0) # dihe_4th_atom_ids saves extra branching sites for dihe_4th_atom_id, dihe_4th_resnum, sub_backbone in frag.dihe_4th_atom_ids: backbone_atom_ids = self.continueResBackbone(dihe_4th_atom_id) new_frag = None # Along backbone_atom_ids to find rotatable a bond in each branch for idx, backbone_atom_id in enumerate(backbone_atom_ids): sub_backbone.append(backbone_atom_id) if backbone_atom_id not in frag.atom_ids: frag.atom_ids.append(backbone_atom_id) rotatable = set( sub_backbone[-2::]) in self.rotatable_bonds[frag.resnum] if rotatable or dihe_4th_resnum != frag.resnum: dihe_4th_atom_id = None if backbone_atom_id == backbone_atom_ids[ -1] else backbone_atom_ids[idx + 1] new_frag = self.createEndSubFragment( frag, dihe_4th_resnum, sub_backbone, dihe_4th_resnum, dihe_4th_atom_id) if new_frag: self.addBranchingAtoms(new_frag) fragments.append(new_frag) break else: # The dihe 2-3rd bond is not a rotatable bond within # residue and is not a cross-residue bond either frag.atom_ids += self.backbone_side_atoms[ dihe_4th_resnum][backbone_atom_id] frag.pre_backbone.append(backbone_atom_id) continue else: # No 2-3 rotatable bonds found, but reach the end of the residue for next_resnum, [ atom_id, next_atom_id ] in self.getDelNextResnumByAtomID(backbone_atom_id): connected_backbone_atom_ids = self.resBackbone( next_resnum, next_atom_id) if len(connected_backbone_atom_ids) > 1: dihe_4th_atom_id = connected_backbone_atom_ids[1] else: dihe_4th_atom_id = None frag.atom_ids.append(next_atom_id) new_frag = self.createEndSubFragment( frag, next_resnum, sub_backbone + [next_atom_id], next_resnum, dihe_4th_atom_id) if new_frag: self.addBranchingAtoms(new_frag) fragments.append(new_frag) resnum = self.template_polymer.atom[ backbone_atom_id].resnum if not self.residue_connectivity.get(resnum): # MATSCI-10158: the in-place modification on self.residue_connectivity # empties the content, which causes tracebacks in the next loop. break else: self.addBranchingAtoms(frag)
[docs] def appendMissedFrags(self): """ When polymer fragments don't find all the atoms in the original structure, this methods first groups them by bond connections and appends each group to a found fragment. """ # This is a temporary workaround that misses the rotatable bonds between # the missed atom groups and the found fragments. # This also ignores all rotatable bond within the missed atom group. # Collect the edge cases for a general fix all-togather: MATSCI-7009 all_atom_ids = set(self.template_polymer.getAtomIndices()) self.first_frag.adjustAtomIds(0) all_missed_atom_ids = all_atom_ids.difference( set(self.first_frag.atom_ids_in_mol)) if not all_missed_atom_ids: return missed_atom_id_groups = get_missed_atom_id_groups( self.template_polymer, all_missed_atom_ids) connection_infos = self.getConnectionInfos(missed_atom_id_groups, all_missed_atom_ids) msg = ( f"{connection_infos[0].connecting_atom_ids} in found fragments connect " f"missed_atom_id_groups {connection_infos[0].missed_atom_ids}") if self.logger: self.logger.debug(msg) for connection_info in connection_infos: for frag in self.first_frag.fragments(): intersection_ids = connection_info.connecting_atom_ids.intersection( frag.atom_ids) if intersection_ids: # This assumes that the linking between the missed frag # and the found one is not rotatable and all bonds within # the missed atoms are not rotatable frag.atom_ids += connection_info.missed_atom_ids
[docs] def getConnectionInfos(self, missed_atom_id_groups, all_missed_atom_ids): """ Return the connecting atoms, which belong to the atoms in the found fragment and connect to the missed atoms. :type missed_atom_id_groups: list of list :param missed_atom_id_groups: each sublist has missed atoms connected by covalent bonds. :type all_missed_atom_ids: list of int :param all_missed_atom_ids: a flat list of all missed atom indexes :rtype: list of set :return: each set has the connecting atoms, which belong to the found atoms in the fragment and are connected to the missed atoms. """ connection_infos = [] for missed_atom_id_group in missed_atom_id_groups: connecting_atom_group = [] for atom_id in missed_atom_id_group: connecting_atom_group += [ at.index for at in self.template_polymer.atom[atom_id].bonded_atoms ] connecting_atom_group = set(connecting_atom_group) connecting_atom_group = connecting_atom_group.difference( all_missed_atom_ids) connection_info = ConnectionInfo( missed_atom_ids=missed_atom_id_group, connecting_atom_ids=connecting_atom_group) connection_infos.append(connection_info) return connection_infos
[docs] def assignRingAtomIds(self): """ Assign ring atom indexes to each fragment. """ # FIXME: rings_atom_ids shoud be searched in monomers and mapped to polymer for frag in self.first_frag.fragments(): for ring in self.template_polymer.ring: rings_atom_ids = ring.getAtomIndices() shared_atom_ids = set( frag.atom_ids).intersection(rings_atom_ids) if len(shared_atom_ids) > 1: frag.rings_atom_ids.append(rings_atom_ids[:])
[docs] def structToOriginByFrag(self): """ Move the structure so the centroid of the fragment is at the origin. """ center = self.getFragCentroid(self.template_polymer, self.first_frag) self.template_polymer.setXYZ(self.template_polymer.getXYZ() - center)
[docs] def getFragCentroid(self, struct, frag): """ Calculate the centroid of polymer fragment. :type frag: `schrodinger.structure.Structure` :param frag: polymer structure to get atom positions from :type frag: `Polymerfragment` :param frag: The polymer fragment to get atom ids from :rtype: list :return: the centroid of polymer fragment """ return transform.get_centroid(struct, atom_list=frag.atom_ids)[:3]
[docs] def getDelNextResnumByResnum(self, cur_resnum): """ Given resnum, find all the connected residues and atoms. Delete the connectivity from current residue to next residue and the connectivity from next residue to current residue. :type cur_resnum: int :param cur_resnum: the residue number which the atom id belongs to :type del_connection: bool :param del_connection: del the connection from current atom and residue to the connected atom and residue :rtype: iterator to generate int, [int, int] :return: the connected residue number, [atom id, connected atom id] """ for next_resnum, [atom_id, next_atom_id] in list( self.residue_connectivity[cur_resnum].items()): self.delConnectivity(cur_resnum, next_resnum, atom_id, next_atom_id) yield next_resnum, [atom_id, next_atom_id]
[docs] def delConnectivity(self, cur_resnum, next_resnum, atom_id, next_atom_id): """ Delete the connectivity from current residue to next residue and the connectivity from next residue to current residue. :type cur_resnum: int :param cur_resnum: the residue number which the current atom id belongs to :type next_resnum: int :param next_resnum: the residue number which the connected atom id belongs to :type atom_id: int :param atom_id: the atom id from which other connections are searched :type next_atom_id: int :param next_atom_id: one connected atom id """ self.residue_connectivity[next_resnum].pop(cur_resnum) self.connected_resnums[next_atom_id].remove(cur_resnum) self.residue_connectivity[cur_resnum].pop(next_resnum) self.connected_resnums[atom_id].remove(next_resnum)
[docs] def createEndSubFragment(self, cur_frag, dihe_3rd_resnum, sub_backbone, dihe_4th_resnum, dihe_4th_atom_id, append_to_frag=None, last_created_frag=False): """ Create one child PolymerFragment or append atoms to append_to_frag. :type cur_frag: `PolymerFragment` :param cur_frag: current polymer fragment :type dihe_3rd_resnum: int :param dihe_3rd_resnum: the resnum of the 3rd atom in dihedral :type sub_backbone: list of int :param sub_backbone: the backbone whose last atom (dihedral 3rd) will be connected to new fragment :type dihe_4th_resnum: int :param dihe_4th_resnum: a guess of the resnum of the 4th atom in res :type dihe_4th_atom_id: int or None :param dihe_4th_atom_id: the 4th atom of dihedral, if int; None, if the 4th atom is in the adjacent res :type append_to_frag: PolymerFragment or None :param append_to_frag: if None, the found 'branching' is new polymer fragment, and new PolymerFragment is created with dihedral_4th_atom. if not None, the found 'branching' is part of the old polymer fragment and no new PolymerFragment is created. Add dihedral_4th_atom to old polymer fragment. :type last_created_frag: bool :param last_created_frag: if True, will not return fragment :rtype: `PolymerFragment` or None :return: the newly created `PolymerFragment`, or None if atoms are appended to append_to_frag fragment """ dihe_3rd_atom_id = sub_backbone[-1] if not dihe_4th_atom_id: # Dihedral 3rd and 4th atoms in the different residues # in other words, bond connects 2 residues # dihe_4th_resnum and dihe_4th_atom_id are assigned by for loop for dihe_4th_resnum, [ atom_id, dihe_4th_atom_id ] in self.getDelNextResnumByAtomID(dihe_3rd_atom_id): # At the end of the residue, dihedral 3rd may connect to # multiple residues, get one as the connecting residue break else: # dihedral 3rd is the termination atom of current moiety dihe_4th_atom_id = self.bondedAtomIdInSameRes(dihe_3rd_atom_id) # FIXME: moiety broken from side group chain has no attached TRM if dihe_4th_atom_id and self.template_polymer.atom[ dihe_4th_atom_id].pdbres.rstrip() == TRM: # The bond between TRM and the rest of a polymer is considered # rotatable, as long as TRM has more than one atom. last_created_frag = True else: cur_frag.atom_ids += self.backbone_side_atoms[ dihe_3rd_resnum][dihe_3rd_atom_id] return None if not append_to_frag: new_frag = PolymerFragment(dihe_3rd_resnum, cur_frag.generation + 1, pre_frag=cur_frag, pre_backbone=sub_backbone) new_frag.dihedral = sub_backbone[-3::] + [dihe_4th_atom_id] new_frag.dihe_4th_resnum = dihe_4th_resnum new_frag.atom_ids += self.backbone_side_atoms[dihe_3rd_resnum][ dihe_3rd_atom_id] cur_frag.next_frags.append(new_frag) else: new_frag = append_to_frag # Save branching dihe 4th atoms and related pre_backbone new_frag.dihe_4th_atom_ids.append( tuple([dihe_4th_atom_id, dihe_4th_resnum, sub_backbone[-2::]])) return None if (append_to_frag or last_created_frag) else new_frag
[docs] def addBranchingAtoms(self, cur_frag): """ Append extra dihedral 4th atoms for branching. Add to the parent of cur_frag instead of cur_frag, if the current fragment and new branch share the 1st, 2nd, and 3rd atoms in a dihedral angle. :type cur_frag: `PolymerFragment` :param cur_frag: branching points will be added to the this fragment """ # cur_frag.dihedral[2] is the dihe 3rd atom in cur_frag dihe_3rd_and_all = [cur_frag.dihedral[2]] + cur_frag.atom_ids for idx, atom_id in enumerate(cur_frag.pre_backbone): if atom_id not in self.backbone_to_marked_end: # 1 atom in INI could hit this continue for bonded_to_marker, path_with_ends in self.backbone_to_marked_end[ atom_id].items(): branching_atom_id = path_with_ends[-1] sub_backbone = cur_frag.pre_backbone[:idx] + path_with_ends for dihe_4th_resnum, [ cur_atom_id, dihe_4th_atom_id ] in self.getDelNextResnumByAtomID(branching_atom_id): if branching_atom_id not in dihe_3rd_and_all: frag = cur_frag.pre_frag else: frag = cur_frag frag.dihe_4th_atom_ids.append( tuple([ dihe_4th_atom_id, dihe_4th_resnum, sub_backbone[-2::] ]))
[docs] def resBackbone(self, resnum, atom_id): """ Given residue number and starting atom id, return the short backbone path (from the starting atom id to the end) and set head/tail using dictionary. :type resnum: int :param resnum: the residue number :type atom_id: int :param atom_id: backbone path starts from this atom id :rtype: list of int :return: the atom ids of backbone from atom_id to the other end """ backbone = list(self.backbone_side_atoms[resnum]) # Monomer may be connected as headfirst or tailfirst is_headfirst = backbone[0] == atom_id if is_headfirst: self.backbone[resnum] = backbone else: self.backbone[resnum] = backbone[::-1] return self.backbone[resnum]
[docs] def continueResBackbone(self, atom_id): """ Return rest of the backbone path continuing from the atom_id. :type atom_id: int :param atom_id: atom id of the starting point :rtype: list :return: the atom ids of backbone from atom_id to the other end """ resnum = self.template_polymer.atom[atom_id].resnum try: backbone = self.backbone[resnum] except KeyError: # set backbone by head-tail and return backbone return self.resBackbone(resnum, atom_id) index = backbone.index(atom_id) return backbone[index:]
[docs] def bondedAtomIdInSameRes(self, atom_id): """ Find the atom id of a neighbor atom within the same residue. :type atom_id: int :param atom_id: atom id :rtype: int or None :return: the atom id of a neighbor atom in the same residue """ atom = self.template_polymer.atom[atom_id] atom_resnum = atom.resnum for neighbor in atom.bonded_atoms: if neighbor.resnum == atom_resnum: return neighbor.index return None
[docs] def getDelNextResnumByAtomID(self, atom_id): """ Given atom id, find all the connected residues and atoms. Delete the connectivity from current residue to next residue and the connectivity from next residue to current residue. :type atom_id: int :param atom_id: the atom id from which all connections are returned :type del_connection: bool :param del_connection: del the connection from current atom and residue to the connected atom and residue :rtype: iterator to generate int, [int, int] :return: the connected residue number, [atom id, connected atom id] """ cur_resnum = self.template_polymer.atom[atom_id].resnum for next_resnum in self.connected_resnums[atom_id][:]: next_atom_id = self.residue_connectivity[cur_resnum][next_resnum][1] self.delConnectivity(cur_resnum, next_resnum, atom_id, next_atom_id) yield next_resnum, [atom_id, next_atom_id]
[docs] def isAllAtomPolymer(self): """ Each polymer built by polymer has one INI, TRM, and at least one MOMONOMER; each atom in polymer has MONOMER_ORIG_ATOM_IDX_PROP property; residues from the same moiety should have same number of atoms. :rtype: bool :return: True, if the structure is built using polymer builder. """ residue_names = {} for residue in self.template_polymer.residue: for atom in residue.atom: if not atom.property.get(msprops.MONOMER_ORIG_ATOM_IDX_PROP): # manually added atoms do not have MONOMER_ORIG_ATOM_IDX_PROP # when users add some atoms to one polymer return False residue_type = residue.pdbres.rstrip() res_appeared_before = residue_type in residue_names if residue_type == INI: if res_appeared_before: # one single INI for each polymer return False elif res_appeared_before: if len(residue.getAtomIndices()) != residue_names[residue_type]: # residues from the same moiety should have same number of atoms # when users del some atoms from one polymer return False continue residue_names[residue_type] = len(residue.getAtomIndices()) return (INI in residue_names) and (TRM in residue_names) and ( len(residue_names) > 2)
[docs] def breakIntoMols(self): """ Make a copy of the template polymer and break the structure into molecules serving as INI, TRM, and Monomer. :rtype: {schrodinger.Structure.structure}, int :return: the structure copy broken into molecules serving as a moiety, atom id of one atom in INI """ head_atom_id, _ = self.findHeadTail(self.template_polymer) side_atom_ids = self.template_polymer.getAtomIndices() side_atom_ids.remove(head_atom_id) nr_bonds = nearest_rotatable_bonds(self.template_polymer, self.template_polymer_rings, head_atom_id, side_atom_ids, pre_atom_prop=HEAD_ATOM_PROP, atom_prop=HEAD_ATOM_PROP) log_debug(f"breakIntoMols: head atom id: {head_atom_id}") log_debug(f"breakIntoMols: nearest rotatable bonds: {nr_bonds}") if self.debug: # Mark the bond to be break for debugging mode for atom1, atom2 in nr_bonds: bond = self.template_polymer.getBond(atom2, atom1) bond.setStyle(structure.ATOM_BALLNSTICK) struct_copy = self.template_polymer.copy() for atom_ids in nr_bonds: struct_copy.deleteBond(*atom_ids) head_atom_ids = [atom_ids[1] for atom_ids in nr_bonds] for mol in struct_copy.molecule: mol_atom_ids = set(mol.getAtomIndices()) if head_atom_id in mol_atom_ids: # this mol is INI continue # Longest simple path with end marked as tail atom mono_head_atom_id = mol_atom_ids.intersection(head_atom_ids).pop() # Mark the backbone for debugging mode _, mono_tail_atom_id = self.findHeadTail(self.template_polymer, atom_ids=mol_atom_ids, source=mono_head_atom_id, set_style=self.debug) mono_tail_atom = self.template_polymer.atom[mono_tail_atom_id] mono_tail_atom.property[TAIL_ATOM_PROP] = True if self.debug: # Mark the rings and write out intermediate st for debugging mode for idx, rings in enumerate(self.template_polymer_rings): color = get_color(idx) for atom_id in rings: self.template_polymer.atom[atom_id].color = color filename = 'break_into_mols_' + self.debug_file self.template_polymer.write(filename) jobutils.add_outfile_to_backend(filename, backend=self.backend) log_debug(f"breakIntoMols: {filename} has nearest rotatable bonds" " and atoms in rings marked.") return struct_copy, head_atom_id
[docs] @staticmethod def findHeadTail(struct, atom_ids=None, source=None, set_style=False): """ Find the head and tail atoms of the longest path. If multiple longest paths are found, rank those longest paths by the head atom ids and then tail atom ids. Choose the longest path with the smallest head/tail atom ids. :param struct: the structure to find head and tail :type struct: "schrodinger.structure.Structure" :param atom_ids: list of int or None :type atom_ids: atom ids of the target molecule. If None, all the atoms in the struct are used. :param source: node (head atom index), Starting node for path. If not specified, compute shortest path lengths using all nodes as source nodes. :type source: int :param set_style: Mark the head, tail, and backbone atoms :type source: bool :rtype: int, int :return: atom ids for head and tail atoms """ if not atom_ids: atom_ids = struct.getAtomIndices() graph = analyze.create_nx_graph(struct, atoms=atom_ids) smallest_atom_id = min(atom_ids) head_tail_pairs = collections.defaultdict(list) # In case there is only one atom head_tail_pairs[smallest_atom_id].append(smallest_atom_id) max_path_length = 0 shortest_path_length = networkx.shortest_path_length(graph, source=source) if source: shortest_path_length = [(source, shortest_path_length)] for tmp_head_atom_id, tail_and_path_length in shortest_path_length: for tmp_tail_atom_id, path_length in tail_and_path_length.items(): if max_path_length < path_length: # A longer path found: clear the stored head / tail pairs max_path_length = path_length head_tail_pairs = collections.defaultdict(list) if max_path_length == path_length: # Multiple longest paths found: append new head / tail pair head_tail_pairs[tmp_head_atom_id].append(tmp_tail_atom_id) # Get the minimum of the head and tail atom id sets so that the pair is # unique for one structure head_atom_id = min(head_tail_pairs.keys()) tail_atom_id = min(head_tail_pairs[head_atom_id]) log_debug( f"shortest_path: {networkx.shortest_path(graph, source=head_atom_id, target=tail_atom_id)}" ) if set_style: spath = networkx.shortest_path(graph, source=head_atom_id, target=tail_atom_id) for atom1, atom2 in zip(spath[:-1], spath[1:]): bond12 = struct.getBond(atom1, atom2) bond12.setStyle(structure.ATOM_CPK) bond21 = struct.getBond(atom2, atom1) bond21.setStyle(structure.ATOM_CPK) h_atom = struct.atom[spath[0]] h_atom.style = structure.ATOM_BALLNSTICK t_atom = struct.atom[spath[-1]] t_atom.style = structure.ATOM_BALLNSTICK return head_atom_id, tail_atom_id
[docs] def setAndMarkResidues(self): """ Break the molecule into fragments by rotatable bonds and set each fragment as one residue. :rtype: bool :return: True, if struct_copy has more than one molecule, serving as INI, TRM, and Monomers """ if self.template_polymer.atom_total < 4: # Structures with less than 4 atoms don't have rotatable bonds return False # non-polymer struct may be obtained by modifying polymer struct for atom in self.template_polymer.atom: for prop in ATOM_PROP_MARKER: atom.property[prop] = False struct_copy, head_atom_id = self.breakIntoMols() if len(struct_copy.molecule) == 1: return False if len(struct_copy.molecule) > self.LETTER_NUM**2: return False for residue_number, mol in enumerate(struct_copy.molecule, 1): atom_indexes = set(mol.getAtomIndices()) if head_atom_id in atom_indexes: residue_name = INI else: floor_div = residue_number // self.LETTER_NUM residue_name = chr(65 + floor_div) if floor_div else '' residue_name += chr(65 + residue_number % 26) residue_name = residue_name.ljust(4) self.setResProperties(residue_name, atom_indexes, residue_number) if self.debug: for residue_number, mol in enumerate(struct_copy.molecule, 1): color = get_color(residue_number) atom_indexes = set(mol.getAtomIndices()) for atom_index in atom_indexes: self.template_polymer.atom[atom_index].color = color # Residues of different colors and additional validation for debug mode filename = 'set_and_mark_residues_' + self.debug_file self.template_polymer.write(filename) jobutils.add_outfile_to_backend(filename, backend=self.backend) log_debug( f"setAndMarkResidues: {filename} has residues in different colors." ) assert len(struct_copy.molecule) == len( self.template_polymer.residue) return True
[docs] def setResProperties(self, residue_name, atom_indexes, residue_number): """ Reassign atom properties so that atoms of atom_indexes are treated as one residue with proper MONOMER_ORIG_ATOM_IDX_PROP. NOTE: MONOMER_ORIG_ATOM_IDX_PROP MODIFIED BASED ON ATOMS IN RESIDUE :type residue_name: str :param residue_name: the residue name for atoms in this residue :type atom_indexes: list of int :param atom_indexes: the atom ids for atoms in one residue :type residue_number: int :param residue_number: the residue number for atom_indexes """ for orig_atom_id, atom_id in enumerate(atom_indexes, 1): atom = self.template_polymer.atom[atom_id] # All atoms in the same residue must have the same residue number, # residue name, chain and insertion code atom.resnum = residue_number atom.pdbres = residue_name atom.chain = " " atom.inscode = " " atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = orig_atom_id atom.property[MONOMER_ORIG_IDX_PROP2] = orig_atom_id atom.property.pop(msprops.BACKBONE_ADJOINING_ATOM_PROP, None) atom.property.pop(msprops.BACKBONE_ATOM_PROP, None) atom.property.pop(msprops.SIDE_GROUP_ATOM_PROP, None)
[docs] def extractMoieties(self): """ Extract residue structures in a polymer and save one for each residue type. """ self.moiety_structs = {} for residue in self.template_polymer.residue: residue_type = residue.pdbres.rstrip() if residue_type in self.moiety_structs: continue residue_struct = residue.extractStructure(copy_props=True) if residue_type == INI: moiety_prop = INITIATOR.capitalize() elif residue_type == TRM: moiety_prop = TERMINATOR.capitalize() elif residue_type == CAS: moiety_prop = CASCADER.capitalize() else: moiety_prop = 'Monomer_%s' % residue_type residue_struct.property[MOIETY_PROP] = moiety_prop polym = self.template_polymer if self.debug else None assign_markers(residue_struct, polym=polym) self.moiety_structs[residue_type] = residue_struct
[docs] def updateOrigAtomIdx(self): """ Update the MONOMER_ORIG_ATOM_IDX_PROP property in each polymer atom with the atom index of atoms in moiety_structs set by setResProperties(). """ orig_prop = msprops.MONOMER_ORIG_ATOM_IDX_PROP if self.debug: ids_moiety = [] for pdbres, struct in self.moiety_structs.items(): orig_ids = [x.property.get(orig_prop) for x in struct.atom] ids_moiety += [x for x in orig_ids if x is not None] # moiety_structs has one INI and multiple monomers (CAS and TRM # are optional) if pdbres == TRM: # TRM is directly connected to one monomer assert 1 == orig_ids.count(None) elif pdbres == INI: # INI is not directly connected to TRM and CAS snum = 1 if TRM in self.moiety_structs: snum += 1 if CAS in self.moiety_structs: snum += 1 none_num = len(self.moiety_structs) - snum # '<' for Multi-site, '=' for single-site INI assert none_num <= orig_ids.count(None) elif pdbres == CAS: # CAS is directly connected to multiple moieties assert 1 < orig_ids.count(None) else: marker_num = [[x.property.get(y) for y in ATOM_PROP_MARKER] for x in struct.atom] none_num = sum([x.count(True) for x in marker_num]) # Monomer may be connected to one type of moiety multiple times assert none_num <= orig_ids.count(None) ids = [ x.property.get(orig_prop) for x in self.template_polymer.atom ] # polymer may have multiple residues with the same pdbres name assert set(ids) == set(ids_moiety) for residue in self.template_polymer.residue: residue_type = residue.pdbres.rstrip() for res_atom in residue.atom: atoms = [ x for x in self.moiety_structs[residue_type].atom if x.property.get(orig_prop) == res_atom.property[orig_prop] ] res_atom.property[orig_prop] = atoms[0].index
[docs] def markMoietiesProps(self): """ Copy and add additional properties to moiety_structs to pass the validations in `Moieties`. However, these properties are not used, since polymer is already built. :type polymer: `schrodinger.structure.Structure` :param polymer: structure of a polymer """ for residue_struct in self.moiety_structs.values(): for key, value in self.template_polymer.property.items(): if not residue_struct.property.get(MOIETY_PROP): residue_struct.property[key] = value if residue_struct.property[MOIETY_PROP].startswith('Monomer_'): residue_struct.property[TACTICITY_PROP] = TACTICITY_ISO residue_struct.property[BRANCH_PCT_PROP] = 0.5 residue_struct.property[BRANCH_MAX_PROP] = 0.8 residue_struct.property[BBTRANS_PROP] = True elif residue_struct.property[MOIETY_PROP].startswith('Cascader'): residue_struct.property[CASCADE_GEN_PROP] = 2 if self.debug: # Write out all moieties with Br atoms filename = 'mark_moieties_props_' + self.debug_file residue_struct.append(filename) jobutils.add_outfile_to_backend(filename, backend=self.backend) log_debug(f"markMoietiesProps: {filename} has all moieties.")
[docs]class PolymerFragment(object):
[docs] def __init__(self, resnum, generation, pre_frag=None, is_branching=False, pre_backbone=None, template_frag=None, molnum=None, template_polymer=None): """ PolymerFragment class stores the connectivity, dihedral angle, and anthoer other information for tangled chain method. :type resnum: int :param resnum: the resnum of the 3rd atom in dihedral angle :type generation: int :param generation: the generation of the fragment :type pre_frag: `PolymerFragment` :param pre_frag: The parent fragment :type is_branching: bool :param is_branching: True, if current fragment has more than children :type pre_backbone: list of int :param pre_backbone: the last atom in pre_backbone is the 3rd dihedral atom :type template_frag: `PolymerFragment` or None :param template_frag: the polymer fragment to be copied :type molnum: int :param molnum: molecule number :type template_polymer: `schrodinger.structure.Structure` :param template_polymer: the structure of one polymer chain """ self.resnum = resnum self.generation = generation self.in_sidegroup = False self.atom_ids = [] self.dihedral = [] self.dihe_4th_atom_ids = [] self.orig_dihedral = [] self.next_frags = [] self.pre_backbone = pre_backbone or [] self.pre_frag = pre_frag self.is_branching = is_branching self.dihe_pool = [] self.dihe_rand = [] self.dihe_value = None self.hitted_num = 0 self.max_hit_num = MAX_HIT_NUM self.molnum = None self.rings = [] self.rings_atom_ids = [] self.spear_rings = [] self.template_polymer = template_polymer self.neighbor_frags = [] self.local_struct_atom_ids = set() self.end_atom_ids = [] self.local_struct_side_dihedrals = [] self.torsion_pattern = None self.torsion_probability = None self.tail_atom = None self.debug = schrodinger.in_dev_env() if template_frag: self.preDeepCopy(template_frag, molnum)
[docs] def preDeepCopy(self, original_frag, molnum): """ Deep copy the fragment attributes except those linking different fragments. :type original_frag: `PolymerFragment` :param original_frag: Polymerfragment object to be copied :type molnum: int :param molnum: molecule number """ self.molnum = molnum self.generation = original_frag.generation self.in_sidegroup = original_frag.in_sidegroup self.is_branching = original_frag.is_branching self.torsion_pattern = original_frag.torsion_pattern self.atom_ids = original_frag.atom_ids[:] self.dihedral = original_frag.dihedral[:] self.dihe_pool = original_frag.dihe_pool[:] self.dihe_rand = original_frag.dihe_rand[:] # [[atom ids in first ring],[atom ids in second ring],..] self.rings_atom_ids = copy.deepcopy(original_frag.rings_atom_ids) self.next_frags = original_frag.next_frags[:] self.template_polymer = original_frag.template_polymer self.torsion_probability = original_frag.torsion_probability self.tail_atom = original_frag.tail_atom
[docs] def deepCopy(self, molnum): """ Deep copy the fragment class from the INI fragment. :type molnum: int :param molnum: molecule number :rtype: `PolymerFragment` :return: new INI fragment directing to all copied fragments """ first_frag = PolymerFragment(self.resnum, self.generation, template_frag=self) first_frag.molnum = molnum next_frags = [first_frag] while next_frags: frag = next_frags.pop(0) for next_original_frag in frag.next_frags[:]: frag_copied = PolymerFragment(next_original_frag.resnum, next_original_frag.generation, pre_frag=frag, template_frag=next_original_frag, molnum=molnum) next_frags.append(frag_copied) frag.next_frags.remove(next_original_frag) frag.next_frags.append(frag_copied) return first_frag
[docs] def setDihedrals(self, dihe_init): """ Set the dihedral angle pool and rand for each fragment. dihedral angle pool and rand are lists of possible dihedral angles. When the polymer chain grows, move to next polymer fragment and set the dihedral angle using a value randomly picked from rand; When the polymer growing site dies, move to previous polymer fragment, pop up the used dihedral angle from the pool, and randomly pick a new one from the left dihedral values in pool. :type dihe_init: list of floats :param dihe_init: list of possible dihedral angles """ self.dihe_init = dihe_init fragnum = 1 self.fragnum = fragnum for frag in self.fragments(include_self=False): fragnum += 1 frag.fragnum = fragnum frag.dihe_rand = dihe_init[:] frag.dihe_pool = dihe_init[:] random.shuffle(frag.dihe_pool) frag.max_hit_num = MAX_HIT_NUM
[docs] def markSidegroupFrag(self): """ Mark the side group polymer fragment, if the 3rd atom in the dihedral angle is in side group (not backbone atom). """ for frag in self.fragments(include_self=False): atom_id = frag.dihedral[2] if self.template_polymer.atom[atom_id].pdbres.rstrip()[-1] != '0': frag.in_sidegroup = True
[docs] def resetFragDihedralPool(self): """ Reset the dihedral angle pool of all following fragment according to rand in each fragment. """ for frag in self.fragments(include_self=False): frag.dihe_pool = frag.dihe_rand[:] random.shuffle(frag.dihe_pool) frag.hitted_num = 0
[docs] def fragments(self, include_self=True): """ Yield the first fragment of next_frags and append the children of that fragment to next_frags for recursion. :type include_self: bool :param include_self: If True, loop over itself and all child fragments. If False, only loop over its child polymer fragments. :rtype: iterator to generate `PolymerFragment` :return: one child polymer fragment """ next_frags = [self] if include_self else self.next_frags[:] while next_frags: this_frag = next_frags.pop(0) yield this_frag next_frags += this_frag.next_frags[:]
[docs] def adjustAtomIds(self, pre_atomnum): """ Adjust atom ids in PolymerFragment when multipe chains are added into the cell. :type pre_atomnum: int :param pre_atomnum: number of atoms before adding current molecule """ mol_atom_ids = [] for frag in self.fragments(): frag.atom_ids = [x + pre_atomnum for x in frag.atom_ids] mol_atom_ids += frag.atom_ids for findex, ring_indexes in enumerate(frag.rings_atom_ids): frag.rings_atom_ids[findex] = [ x + pre_atomnum for x in ring_indexes ] frag.dihedral = [x + pre_atomnum for x in frag.dihedral] if frag.tail_atom: frag.tail_atom += pre_atomnum self.atom_ids_in_mol = sorted(mol_atom_ids) self.atom_gids_in_mol = [ atom_ids - 1 for atom_ids in self.atom_ids_in_mol ]
[docs] def markBranchingFrag(self): """ Mark the branching fragments. """ for frag in self.fragments(): if len(frag.next_frags) > 1: frag.is_branching = True
[docs] def getDiheValues(self): """ Get the possbile dihedral values of current fragment. :rtype: list :return: all polymer dihedral vaules """ if self.continue_grow: if self.torsion_probability is not None: return numpy.random.choice(self.dihe_rand, len(self.dihe_rand), replace=False, p=self.torsion_probability) dihe_values = self.dihe_rand[:] random.shuffle(dihe_values) elif self.hitted_num < self.max_hit_num: dihe_values = self.dihe_pool[:] else: dihe_values = [] return dihe_values
[docs]def assign_markers(residue_struct, polym=None): """ Find and add marker atoms to the extracted structures. :type residue_struct: `schrodinger.structure.Structure` :param residue_struct: The structure to add the markers to :type polym: `schrodinger.structure.Structure` :param polym: If provided, the corresponding head/tail atoms are located in this reference structure, and the position of Br is determined by bonding and residue informations """ marker_ids = {0: [], 1: [], 2: []} for atom in residue_struct.atom: m_types = [y for x, y in ATOM_PROP_MARKER_IDX if atom.property.get(x)] if not m_types: continue if polym: patoms = [x for x in polym.atom if x.pdbres == atom.pdbres] patoms = [ x for x in patoms if x.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] == atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] ] patom = patoms[0] bonded_atoms = [x for x in patom.bonded_atoms] batoms = [x for x in bonded_atoms if x.pdbres != atom.pdbres] # m_types += [m_types[-1]] * (len(batoms) - len(m_types)) for index, marker_type in enumerate(m_types): # Br atoms are used to mark head, tail, branch, and cas atoms # As the polymer chain is already built, the Br position is # not used. Putting them near the head and tail atoms helps # to visualize which atoms are marked. if polym: try: batom = batoms[index] except IndexError: # Br placed away from the residue (no bonds between residues) batom = bonded_atoms[0] xyz = numpy.array(patom.xyz) * 2 - numpy.array(batom.xyz) else: xyz = (numpy.array(patom.xyz) + numpy.array(batom.xyz)) / 2 residue_struct.addAtom('Br', *xyz) else: residue_struct.addAtom('Br', atom.x + 1., atom.y + 1., atom.z + 1.) residue_struct.addBond(atom.index, residue_struct.atom_total, 1) marker_ids[marker_type].append(residue_struct.atom_total) marker_id_list = [] for marker_type, atom_ids in marker_ids.items(): marker_id_list += atom_ids residue_struct.property[MARKER_PROP] = ','.join(map(str, marker_id_list))
[docs]class VolumeMemoryError(Exception): pass
[docs]class LowMemoryVolGraph(object): """ This class mimics a networkx graph for the XYZVolumeGraph class. It has only minimal networkx functionality. It is necessary because networkx graphs are giant memory hogs and can't be used for even moderately-sized sparse grid graphs. This class takes a tiny fraction - a few percent or so - of the memory for an equivalent networkx graph. """
[docs] def __init__(self): """ Create a LowMemoryVolGraph instance """ self._all_nodes = set() # Keys of _all_edges are nodes, values are a set of nodes connected to # that node self._all_edges = defaultdict(set) # Blobs are groups of connected nodes - the equivalent of # networkx.connected_components. Blobs are somewhat expensive timewise # to find, so they are cached. Each time a change is made to the number # of nodes or connectivity of the nodes the blob cache is erased. self._all_blobs = [] self.machine_memory_mb = psutil.virtual_memory().total / float(2**20)
[docs] def __len__(self): """ The length of the graph is just the number of nodes :rtype: int :return: The number of nodes in the graph """ return len(self._all_nodes)
[docs] def checkMemoryLimit(self): """ Check that the amount of memory used is not approaching the total memory available :raise VolumeMemoryError: If the amount of memory used is too large compared to the total system memory """ mymemory = jobutils.memory_usage_psutil() # Use 0.80 as some jobs fail at 84% memory usage if mymemory / self.machine_memory_mb > 0.80: raise VolumeMemoryError('The size of the system combined with the ' 'free space in the system is too large to ' 'fit in memory on this machine.')
[docs] def addNode(self, node): """ Add a new node to the graph. :type node: tuple (or hashable, sortable object) :param node: The node to add """ self._all_nodes.add(node) self._all_blobs = []
[docs] def addEdge(self, node1, node2): """ Add a new edge (a connection between two nodes) to the graph :type node1: tuple (or hashable, sortable object) :param node1: One of the nodes to create the edge between. Node order does not matter. :type node2: tuple (or hashable, sortable object) :param node2: The other node to create the edge between. Node order does not matter. :raise KeyError: If one of the nodes is not found in the graph """ for anode in (node1, node2): if anode not in self._all_nodes: raise KeyError('Node %s is not in graph' % str(anode)) self._all_edges[node1].add(node2) self._all_edges[node2].add(node1) self._all_blobs = []
[docs] def removeNode(self, node): """ Remove a node from the graph. Also removes all edges attached to this node :type node: tuple (or hashable, sortable object) :param node: The node to remove :raise KeyError: If one of the nodes is not found in the graph """ self._all_nodes.remove(node) for bonded in self._all_edges[node]: self._all_edges[bonded].remove(node) del self._all_edges[node] self._all_blobs = []
[docs] def removeEdge(self, node1, node2): """ Remove an edge (a connection between two nodes) from the graph. The nodes are not removed. :type node1: tuple (or hashable, sortable object) :param node1: One of the nodes for the existing edge. Node order does not matter. :type node2: tuple (or hashable, sortable object) :param node2: The other node to for the existing edge. Node order does not matter. :raise KeyError: If one of the nodes is not found in the graph """ self._all_edges[node1].remove(node2) self._all_edges[node2].remove(node1) self._all_blobs = []
[docs] def nodes(self): """ A generator over all the nodes in the graph :rtype: node type :return: Iterates over each node in the graph """ for node in self._all_nodes: yield node
[docs] def edges(self): """ A generator over all edges in the graph :rtype: (node, node) :return: Iterates over each pair of connected nodes. Nodes are returned in sort order. """ for node, bonded in self._all_edges.items(): for bonder in bonded: if bonder > node: yield (node, bonder)
def _gatherBlobs(self, force=False): """ Find all the blobs in the system. A blob is a group of connected nodes. :type force: bool :param force: Recomputed the blobs even if the blob cache exists :raise VolumeMemoryError: If memory usage grows too large """ if self._all_blobs: if force: self._all_blobs = [] else: return # We start with a random node. We iterate through all its connected # neighbors, adding them to the blob. Then we iterate through the # connected neighbors of each neighbor, etc. recursive, etc. unblobbed_nodes = self._all_nodes.copy() self.checkMemoryLimit() count = 0 while unblobbed_nodes: count += 1 node = unblobbed_nodes.pop() # "added" contains all nodes added to the blob that we haven't yet # checked the neighbors of added = [node] this_blob = set(added) while added: # Grab a new node to check check_node = added.pop() for next_node in self._all_edges[check_node]: # Add any neighbors that we haven't already added if next_node not in this_blob: count += 1 # Add to the list of nodes whose neighbors we must check added.append(next_node) # Add to the blob this_blob.add(next_node) # Remove from the set of unblobbed nodes unblobbed_nodes.remove(next_node) if not count % 10000: self.checkMemoryLimit() self._all_blobs.append(this_blob) # Sort the blob cache so that the largest blob is first self._all_blobs.sort(key=len, reverse=True)
[docs] def blobs(self): """ A generator for all the blobs (groups of connected nodes) in the graph :rtype: set :return: Iterates over blobs. Blobs are returned in order of largest to smallest """ if not self._all_blobs: self._gatherBlobs() for blob in self._all_blobs: yield blob
[docs] def degree(self, node): """ Return the number of edges for the node :type node: tuple (or hashable, sortable object) :param node: The node to check :rtype: int :return: The number of edges for a node :raise KeyError: If the node is not found in the graph """ if node not in self._all_edges: # The edges dict is a defaultdict so must raise error manually raise KeyError('Node %s not found in graph' % str(node)) return len(self._all_edges[node])
[docs]class XYZVolumeGraph(object): """ Create a networkx graph to search the voids in structure. """
[docs] def __init__(self, struct, spacing=2.0, scaffold=None): """ Create networkx graph based on the structure PBC or coordinates. :type struct: `schrodinger.structure.Structure` :param struct: The structure to compute the graph over :type spacing: float :param spacing: The approximate spacing (Angstroms) between graph nodes. The actual grid spacing will be adjusted in each direction to ensure uniform grid point distribution, and the actual spacing used in each direction can be found in the self.xyz_spacings list :type scaffold: `Scaffold` or None :param scaffold: a structure that occupies space as a cluster of molecules. """ self.struct = struct self.origin = [0.0, 0.0, 0.0] self.box_lengths = [0.0, 0.0, 0.0] try: # May find incorrect largest void for interface scaffold self.pbc = clusterstruct.create_pbc(self.struct) except ValueError: # no PBC self.pbc = None # Determine X, Y and Z lengths; origin may be redefined if scaffold and scaffold.scaffold: self.defineBoxFromScaffold(scaffold) elif self.pbc: self.defineBoxFromPBC() else: self.defineBoxFromNonPBCStruct() self.num_xyz = [ int(math.ceil(old_div(size, spacing))) for size in self.box_lengths ] self.xyz_spacings = numpy.array([ old_div(length, num) for num, length in zip(self.num_xyz, self.box_lengths) ]) self.shifted_origin = self.origin + self.xyz_spacings * 0.5 self.total_points = 0
[docs] def defineBoxFromScaffold(self, scaffold): """ Define the grid box as the scaffold box size :type scaffold: `Scaffold` :param scaffold: a structure that occupies space as a cluster of molecules. """ for axis in range(3): [region_min, region_max] = scaffold.box.getMinMax(axis) self.box_lengths[axis] = region_max - region_min self.origin[axis] = region_min self.hull = None
[docs] def defineBoxFromPBC(self): """ Define the box from the PBC. This method works for triclinic cells by virtue of creating a hull that can be used to eliminate grid points outside the arbitrary-shaped PBC region. """ # Create the 8 vertices of the PBC vecs = self.pbc.getBoxVectors() # The first two for statements loop over every combination of 0, 1, 2 or # 3 vectors - 8 in total vertices = [] # Use anywhere from 0 (gives origin) to 3 vectors for numvecs in range(0, 4): # Select all possible combinations of numvecs vectors - the [0, 1, # 2] list provides the indexes for the x, y and z vectors for selection in itertools.combinations([0, 1, 2], numvecs): vertices.append(numpy.zeros(3)) for vecdex in selection: vertices[-1] += vecs[vecdex] vertices = numpy.array(vertices) self.box_lengths = vertices.ptp(0) # Determine the origin by finding the center of the structure and then # going "back" -1/2 each box length. This will center the box on the # structure. coords = self.struct.getXYZ() mins = numpy.min(coords, 0) maxes = numpy.max(coords, 0) center = old_div((mins + maxes), 2.0) self.origin = [ center[a] - old_div(self.box_lengths[a], 2) for a in range(3) ] # Create a convex hull that encloses the 8 vertices vertices += self.origin self.hull = spatial.Delaunay(vertices)
[docs] def defineBoxFromNonPBCStruct(self): """ Define a box that encompasses the structure, including the VDW radii of the extreme atoms """ xyz = self.struct.getXYZ() min_gids = numpy.argmin(xyz, axis=0) for dimens, gid in enumerate(min_gids): self.origin[dimens] = (xyz[gid][dimens] - self.struct.atom[gid + 1].radius) max_gids = numpy.argmax(xyz, axis=0) for dimens, gid in enumerate(max_gids): self.box_lengths[dimens] = (xyz[gid][dimens] + self.struct.atom[gid + 1].radius - self.origin[dimens]) self.hull = None
[docs] def getNodeXYZ(self, node): """ Convert networkx node (tuple of X, Y, Z index) to XYZ coordinates :type node: tuple :param node: the node to convert :rtype: tuple :return: A tuple of x, y z coordinates in Angstroms """ return tuple(self.shifted_origin + self.xyz_spacings * numpy.array(node))
[docs] @staticmethod def getDistanceCellDistance(struct, probe): """ Get the distance that will be used as the cutoff for atoms close to a point :type struct: `schrodinger.structure.Structure` :param struct: The structure that will be checked :type probe: float :param probe: The probe radius that will be used :rtype: float :return: The distance cutoff that will be used """ maxrad = max(x.radius for x in struct.atom) return maxrad + 0.1 + probe
[docs] def locateVoids(self, atom_ids=None, vdw_scale=1.0, probe=0.0, logger=None): """ Remove all nodes that overlap a bitset-on atom (all atoms by default) in current structure. And connect all grid points that are part of the same void. :type atom_ids: set or None :param atom_ids: if not None, only check clashes for atoms in this set :type vdw_scale: float :param vdw_scale: VDW scale factor to use :type probe: float :param probe: A radius to add to each atom's vdw radius in order to determine whether the atom encompasses a point or not. This is the thought equivalent of rolling a ball around the vdw surface of the atoms and tracing out the center of the ball. :type logger: `Logger` :param logger: If given, progress will be logged using this object :raise VolumeMemoryError: If memory usage grows too large """ # Create an empty graph with no nodes self.graph = LowMemoryVolGraph() dcell_distance = self.getDistanceCellDistance(self.struct, probe) # Distance cells are expensive, only create it once self.distance_cell, self.distance_pbc = \ clusterstruct.create_distance_cell( self.struct, dcell_distance, pbc=self.pbc) # For each potential point in the graph, we only add it if a) it is # inside the box hull (important for triclinic cells that are not cubic # like the grid is) and b) it is not encompassed by an atom. It is much # better memory-wise to only put valid points in the graph than it is to # put all points in the graph and then remove the invalid ones. total_points = self.num_xyz[0] * self.num_xyz[1] * self.num_xyz[2] per_ten = old_div(total_points, 10) since_last_log = 0 for xval in range(self.num_xyz[0]): for yval in range(self.num_xyz[1]): for zval in range(self.num_xyz[2]): node = (xval, yval, zval) xyz = self.getNodeXYZ(node) if self.hull and self.hull.find_simplex(xyz) < 0.0: # Point is outside the PBC box continue # Track the total number of possible points in the box not # counting those outside the hull self.total_points += 1 # Get list of atoms close to xyz point using distance cell all_matches = self.distance_cell.query_atoms(*xyz) overlap = False for match in all_matches: atom_id = match.getIndex() # Use vdw_scale & VDW radius of each atom to determine # if it overlaps if atom_ids and atom_id not in atom_ids: continue vdw = self.struct.atom[atom_id].radius size = vdw * vdw_scale + probe if match.getDistanceSquared() <= (size)**2: overlap = True break if overlap: continue # This is a node inside the box but outside any atom self.graph.addNode(node) if not self.total_points % 10000: self.graph.checkMemoryLimit() if logger: # Log progress about every 10% of steps since_last_log += self.num_xyz[1] * self.num_xyz[2] if since_last_log > per_ten: now = (xval + 1) * self.num_xyz[1] * self.num_xyz[2] pct = 100.0 * now / total_points logger.info('Processed %.1f%% of %d gridpoints' % (pct, total_points)) since_last_log = 0 # Now create edges between neighboring nodes self.bondRemainingNodes()
[docs] def bondRemainingNodes(self): """ Connect each node with its neighbor nodes that remain in the graph. Nodes that differ by 1 in 1 coordinate are considered neighbors. Any PBC is also accounted for so that neighboring nodes on the opposite side of the cell are also bonded. :raise VolumeMemoryError: If memory usage grows too large """ # FIXME: Does not work for neighbors on the boundary of triclinic cells for count, node in enumerate(self.graph.nodes()): for neighbor in self.gridNeighbors(node, self.num_xyz, self.pbc): try: self.graph.addEdge(node, neighbor) except KeyError: # Neighbor is not in graph, that's fine pass # Check memory every 10000 nodes if not count % 10000: self.graph.checkMemoryLimit()
[docs] @staticmethod def gridNeighbors(node, num_xyz, pbc): """ A generator over all potential node values for the neighbors of the given node. The nodes may or may not exist in the graph. Neighbors are considered to have one of x, y or z values that differs by exactly 1 and both other values are the same. The PBC is used to connect neighbors on opposite edges of the cell. :type node: tuple :param node: the node to get the neighbors for :type num_xyz: list :param num_xyz: The number of grid points in the x, y and z directions :type pbc: `schrodinger.infra.structure.PBC` or None :param pbc: The PBC if one exists :rtype: tuple :return: Iterates over potential (x, y, z) neighbors of the given node. """ # FIXME: Does not work for neighbors on the boundary of triclinic cells for coord in range(3): for delta in (-1, 1): newval = XYZVolumeGraph.getNeighborCoordVal( node, delta, coord, num_xyz[coord], pbc) if newval is None: continue other = list(node) other[coord] = newval other = tuple(other) yield other
[docs] @staticmethod def getNeighborCoordVal(node, delta, coord, numpts, pbc): """ Get the (x, y, z) tuple for a neighbor of node that differs by delta in coord. (x, y, z) is modified based on the PBC if necessary :type node: tuple :param node: the node to convert :type delta: int :param delta: How much the coord value differs from the given node. Typically 1 or -1 to give a neighboring node. :type coord: int :param coord: 0, 1 or 2 for the x, y or z direction :type numpts: int :param numpts: The number of grid points in the coord direction :type pbc: `schrodinger.infra.structure.PBC` or None :param pbc: The PBC if one exists :rtype: tuple or None :return: The (x, y, z) tuple formed by modifying coord of node by delta, or None if there is no pbc and the modified coord would be outside 0:numpts-1 """ val = node[coord] + delta if val < 0: if pbc: return numpts - 1 else: return None elif val == numpts: if pbc: return 0 else: return None return val
[docs] def voids(self): """ A generator that returns groups of connected nodes that define voids in the structure. :rtype: tuple :return: Each returned value is a set of nodes that are all connected and define a void blob. The sets are returned in order of size, largest to smallest """ for blob in self.graph.blobs(): yield blob
[docs] def getLargestVoid(self): """ Get the largest void. :rtype: tuple :return: The set of nodes that form the largest void in the structure """ for blob in self.graph.blobs(): return blob
[docs] def getSurfaceNodes(self, blob): """ Get a list of nodes in blob that have fewer than 6 connections :type blob: set :param blob: The set of nodes to search for surface nodes in :rtype: list :return: Each item of the list is a node that has fewer than 6 connections """ # FIXME: May not work for triclinic cells return [node for node in blob if self.graph.degree(node) != 6]
[docs] def getBuriedNode(self, blob): """ A generator that returns nodes that is buried inside the blob of connected nodes. Buried nodes have lots of connections to other nodes. :type blob: a set of tuples :param blob: Each item of the set is a node :rtype: tuple :return: The buried node """ nodes = [node for node in blob] # Shuffle to get a randomized order random.shuffle(nodes) # Sort shuffled nodes by number of connections for node in sorted(nodes, key=self.graph.degree, reverse=True): yield node
[docs] def getBuriedXYZ(self, blob): """ Like getBuriedNode but return the xyz coordinates. :type blob: a set of tuples :param blob: Each item of the set is a node :rtype: tuple :return: A tuple of x, y z coordinates in Angstroms """ for node in self.getBuriedNode(blob): yield self.getNodeXYZ(node)
[docs]def find_attached_atom_index(struct, mark_index, prop=None, propval=True): """ Find the index of the atom attached to the atom with the given index, optionally setting a property/value on that atom :type struct: `schrodinger.structure.Structure` :param struct: The structure to search for the atom :type mark_index: int :param mark_index: The index of the atom that we want to find an atom attached to. :type prop: str :param prop: An atom property name :param propval: The value to set prop to on the found attached atom. The type of this object depends on the type of property being set :rtype: int :return: The index of an atom attached to the mark_index atom. If more than one atom is attached to mark_index atom, the first one is returned. """ mark_atom = struct.atom[mark_index] for neighbor in mark_atom.bonded_atoms: # Only one neighbor allowed for marker atoms break else: raise ValueError('Expected marker atom %d to be bound to another atom ' 'but it is not') if prop: neighbor.property[prop] = propval return neighbor.index
[docs]def get_other_item(two_list, item): """ In a two item list, return the item that isn't the one passed in :type two_list: list :param two_list: A list two items long :param item: One of the items from two_list :return: The item in two_list that is not the one passed in """ return two_list[abs(two_list.index(item) - 1)]
[docs]def propagate_chain_data_from_atom(struct, chain_atom): """ Set the chain data for all atoms in struct to the same data as found on atom :type struct: `schrodinger.structure.Structure` :param struct: The structure to set atom properties on :type chain_atom: `schrodinger.structure._StructureAtom` :param chain_atom: The atom object to take the chain information from. Might not be from the same structure object as struct """ cid = chain_atom.property.get(CHAIN_ID_PROP) if cid is not None: name = chain_atom.chain for atom in struct.atom: atom.chain = name atom.property[CHAIN_ID_PROP] = cid
[docs]def get_random_nonzero_to_one(): """ Get a random float: 0 > x >= 1 :rtype: float :return: A floating point number in the range (0, 1] """ ranval = random.random() # ranval is [0, 1), but we really want (0, 1] - the line below # accomplishes that return 1.0 - ranval
[docs]class TorsionAngler(object): """ Class to handle cycling a structure though a series of torsion values """
[docs] def __init__(self, options, minimum_steps=5, max_stepsize=15.): """ Create a TorsionAngler object :type options: `argparse.Namespace` :param options: Object with dihedral_min and dihedral_max properties that describe the minimum and maximum allowed dihedral angles :type minimium_steps: int :param minimum_steps: The minimum steps to take. The actual number of steps may be larger if the allowed range of torsions is greather than max_stepsize*minimum_steps. :type max_stepsize: float :param max_stepsize: The largest number of degrees to move the torsion per step. """ self.minval = options.dihedral_min self.maxval = options.dihedral_max span = self.maxval - self.minval # We use n+1 in the line below because we don't want to get back to the # starting point by the nth step. self.delta_angle = min(old_div((span), float(minimum_steps + 1)), max_stepsize) self.steps = int(round(old_div(span, self.delta_angle))) - 1
[docs] def setData(self, struct, torsion_atoms): """ Set the structure to operate on and the 4 atoms involved in the torsion :type struct: `schrodinger.structure.Structure` :param struct: The struct containing the torsion to rotate :type torsion_atoms: list :param torsion_atoms: A four int list, each int is the atom index of an atom in the torsion, in the order A-B-C-D, where B-C is the bond that rotates when the torsion changes. """ self.struct = struct self.torsion_atoms = torsion_atoms self.torsion = self.getTorsionValueInRange()
[docs] def getTorsionValueInRange(self): """ Get a value for the torsion that falls within the specified range. The user may have supplied 150-210, but the measured value might be -170 instead of 190. Convert the measured value to fall within the range. :rtype: float :return: The torsion value converted to fall within the range specified for the min/max of the torsion values. """ raw_torsion = self.struct.measure(*self.torsion_atoms) if self.minval <= raw_torsion <= self.maxval: return raw_torsion sign = old_div(raw_torsion, abs(raw_torsion)) full_circle = -sign * 360. new_torsion = full_circle + raw_torsion if self.minval <= new_torsion <= self.maxval: return new_torsion # This should never happen, but punt because it isn't the worst thing in # the world return raw_torsion
[docs] def rotomers(self): """ A generator for structures that step the torsion through the desired values. :rtype: `schrodinger.structure.Structure` :return: Each yield gives the structure with the specified torsion marched through the specified values. """ for step in range(self.steps): self.incrementTorsionValue() self.struct.adjust(self.torsion, *self.torsion_atoms) yield self.struct
[docs] def incrementTorsionValue(self): """ Sets the torsion property to the next value to try for a dihedral as we rotate it through its allowed range. """ self.torsion = self.torsion + self.delta_angle if self.torsion > self.maxval: self.torsion = self.minval + (self.torsion - self.maxval)
[docs]class MoietyError(Exception): """ Raised if there is an error in creating a Moiety subclass object """
[docs]class InitiationError(Exception): """ Raised if there is an error in initiating the amorphous cell. """
[docs]class BaseMoiety(object): """ Base class for all polymer units - initiator, terminator, cascader, monomers """ # This is a class variable so that all subclass instances have access to the # same cache bond_length_cache = {} RX_PROP = 'i_matsci_rx'
[docs] def __init__(self, struct, markers=None, name=None): """ Create a BaseMoiety object :type struct: `schrodinger.structure.Structure` :param struct: The structure of this Moiety :type markers: list :param markers: A list of the indexes of the Rx atoms. If not supplied, information will be read from the structure properties """ if markers: self.markers = markers self.name = name else: self.readFromStructure(struct) self.raw_struct = struct if self.name: for atom in self.raw_struct.atom: atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = atom.index atom.property[msprops.MONOMER_NAME_PROP] = self.name self.head = None # self.rings is a list of sets, each set is the atom indexes for a ring self.rings = [set(x) for x in self.raw_struct.find_rings()] self.has_rings = bool(self.rings) self.is_cg = msutils.is_coarse_grain(struct) self.backbone_path_indexes = []
[docs] def findAttachedAtomIndex(self, mark_index, prop=None, propval=True): """ Find the index of the atom attached to the atom with the given index, optionally setting a property/value on that atom :type mark_index: int :param mark_index: The index of the atom that we want to find an atom attached to. :type prop: str :param prop: An atom property name :param propval: The value to set prop to on the found attached atom. The type of this object depends on the type of property being set :rtype: int :return: The index of an atom attached to the mark_index atom. If more than one atom is attached to mark_index atom, the first one is returned. """ index = find_attached_atom_index(self.raw_struct, mark_index, prop=prop, propval=propval) return index
[docs] def moveHeadToOrigin(self): """ Move the structure so that the head atom is at the origin """ translate = [-a for a in self.raw_struct.atom[self.head].xyz] transform.translate_structure(self.raw_struct, *translate)
[docs] def alignToVector(self, struct, current_vector, vector): """ Rotate the structure so that the current_vector is aligned to a new vector :type struct: `schrodinger.structure.Structure` :param struct: The structure to rotate. The coordinates of this structure will be modified by this method :type current_vector: list :param current_vector: The current vector that will be aligned to the new vector :type vector: list :param vector: The new vector that current_vector should be aligned to :return: There is no return value, the structure is modified directly :rtype: `schrodinger.structure.Structure` :raises ValueError: If either vector has zero magnitude """ current_array = numpy.array(current_vector) new_array = numpy.array(vector) try: matrix = transform.get_alignment_matrix(current_array, new_array) except ValueError: raise ValueError( 'Unable to align fragments due to overlapped atoms.') transform.transform_structure(struct, matrix)
[docs] def getStructureForCoupling(self, orientation, location=None, vector=None, remove_marker=True, residue_number=None, mark_bond=False): """ Get the appropriate structure to couple to the existing unit :type orientation: str :param orientation: Should be the HEADFIRST or TAILFIRST constant, indicates which end of the moiety will bind :type location: list :param location: XYZ coordinates that the coupling atom (head or tail depending on orientation) should be moved to (moving the rest of this moiety with it) :type vector: list :param vector: The vector that the coupler-coupling marker vector should be aligned to :type remove_marker: bool :param remove_marker: Whether the marker atom should be removed. :type residue_number: int or None :param residue_number: If not None, the resnum property of every atom in the returned structure will be set to this value and the residue name will be assigned. :type mark_bond: bool :param mark_bond: If True, the created bond between two monomers is marked :rtype: (`schrodinger.structure.Structure`, `AttachementAtoms`) :return: A copy of the structure of this moiety rotated and translated as requested so that it can bind in the specified orientation. An AttachedAtoms tuple that gives the coupler, grower and marker indexes for this structure. """ struct = self.getNextStructure(orientation) attachments = self.getCouplerAndGrower(orientation) # Mark the grower and coupler atoms for this entity struct.atom[attachments.coupler].property[COUPLER_ATOM_PROP] = True if attachments.grower: struct.atom[attachments.grower].property[GROWER_ATOM_PROP] = True if mark_bond: self.markHeadTailLabels(struct.atom[attachments.grower], struct.atom[attachments.coupler]) if vector: current_vector = struct.atom[attachments.coupler_marker].xyz self.alignToVector(struct, current_vector, vector) if location: transform.translate_structure(struct, *location) if remove_marker: mapping = struct.deleteAtoms([attachments.coupler_marker], renumber_map=True) coupler = mapping[attachments.coupler] grower = mapping.get(attachments.grower) grow_marker = mapping.get(attachments.grow_marker) attachments = AttachmentAtoms(coupler, None, grower, grow_marker) # If requested, set the residue name and number if residue_number is not None: resname = self.getPDBName().ljust(4) for atom in struct.atom: atom.resnum = residue_number atom.pdbres = resname return struct, attachments
[docs] def markHeadTailLabels(self, grower, coupler): """ Mark the atom unique label based on element, monomer lettter, and 'Head or Tail' orientation. :param grower: the atom connecting chain :type grower: 'structure.Structure.Atom' :param coupler: the atom waiting for other coming monomer :type coupler: 'structure.Structure.Atom' """ letter = self.getPDBName() if len(letter) != 1: # def getLetter in MonomerRow (polymer_builder_gui.py) defines # a one letter code, and def getStructureForCoupling assigns the # letter to monomer atoms when building the polymer # INI, TRM or UNK (three letters )are assigned to initiator, terminators # or unknown monmers return monomer_index = str(ord(letter) - 64) # Only the bonds between known monomers are marked g_label = grower.element + monomer_index c_label = coupler.element + monomer_index if not self.directional or g_label != c_label: grower.property[UNIQUE_ATOM_PROP] = g_label coupler.property[UNIQUE_ATOM_PROP] = c_label return label = g_label for atom in [grower, coupler]: for prop, ht_char in zip(HT_ATOM_PROPS, ['H', 'T']): if atom.property.get(prop): ht_lb = coarsegrain.CHIRAL_SEPARATOR.join([label, ht_char]) atom.property[UNIQUE_ATOM_PROP] = ht_lb break
[docs] def getBondLength(self, atom1, atom2, cg_bond_factor=0.8): """ Get the desired bond length between the grower atom of the existing unit and the coupling atom of this moiety :type atom1: `_StructureAtom` :param atom1: One of the atoms to form a bond in between. :type atom2: `_StructureAtom` :param atom2: The other atoms to form a bond in between. :type cg_bond_factor: float :param cg_bond_factor: The pre-factor to calcuate the bond length based on the particle radius. :rtype: float :return: The desired bond distance """ # Check for cached value key = tuple(sorted([atom1.element, atom2.element])) try: return self.bond_length_cache[key] except KeyError: pass if self.is_cg: # This 2.35 is from the 'Use Martini defaults' in sketcherGUIElements.cpp radius_sum = sum([atom.radius for atom in [atom1, atom2]]) self.bond_length_cache[key] = radius_sum * cg_bond_factor return self.bond_length_cache[key] # Create a structure with both atoms and use mmideal to deliver (what is # very probably a very much NOT) ideal bond length (SHARED-3432) struct = structure.create_new_structure(0) for element in key: # Add as 'C' to avoid atom typing warning messages for strange # elements atom = struct.addAtom('C', 0.0, 0.0, 0.0) buildcomplex.transmute_atom(atom, element) struct.addBond(1, 2, 1) build.add_hydrogens(struct) struct.retype() # Note that this ideal bond order doesn't account for the hybridization # of the atoms, but this is a secondary effect that will easily be # cleaned up by simulation/QM calculation. self.bond_length_cache[key] = mm.mmideal_get_stretch(struct, [1, 2]) return self.bond_length_cache[key]
[docs] def determineCouplerLocation(self, xyz, vector, bond_length): """ Figure out the XYZ coordinates where the coupler atom should go :type xyz: list :param xyz: The XYZ coordinates of the grower atom :type vector: list :param vector: The desired bond vector for coupling (goes from grower->coupler) :type bond_length: float :param bond_length: the bond length between grower and coupler :rtype: list :return: The XYZ coordinates where the coupler atom should be placed """ normalv = transform.get_normalized_vector(numpy.array(vector)) scaledv = [a * bond_length for a in normalv] location = [a + b for a, b in zip(xyz, scaledv)] return location
[docs] def addToChain(self, chain, grower, grow_marker, orientation, remove_marker=True, set_residue_data=False, propagate_chain_data=False, chain_namer=None, mark_bond=False): r""" Bind this moiety to an existing chain The grower, grow_marker, coupler and coupler_marker are defined so that the binding occurs like this: chain-grower coupler_marker \ \ grow_marker coupler-this_moiety goes to: chain-grower \ coupler-this_moiety :type chain: `schrodinger.structure.Structure` :param chain: The existing chain to add this moiety to :type grower: int :param grower: The index of the atom in the existing chain that this moiety should bond with :type grow_marker: int :param grow_marker: The index of the Rx atom that marks the attachment point - the grower->grow_marker bond vector indicates the desired grower->coupler bond vector. :type orientation: str :param orientation: Whether this moiety should add head first or tail first. Should be a module HEADFIRST or TAILFIRST constant. :type remove_marker: bool :param remove_marker: Whether the chains grower_marker atom should be removed. Set to False if adding multiple moieties and you don't want existing atom numbers to change during the process. :type set_residue_data: bool :param set_residue_data: Whether to set the residue number property for all atoms added by this method to 1 greater than the residue number of the grow atom on the passed in chain. If False, no residue numbers will be set. A residue name is also assigned if this value is True. :type propagate_chain_data: bool :param propagate_chain_data: Whether to set the chain data for all atoms added by this method to the same chain information as the grow atom on the passed in chain. If False or the grower atom contains no chain information, this will not be done. This parameter is used if the added moiety should have the same chain information as the existing moiety. See also the chain_namer parameter. :type chain_namer: function :param chain_namer: A function that should be called with the structure that will be added to chain. The purpose of this function should be to add chain name data to the added structure. Supply this function if the moiety to be added is a full chain. If the moiety to be added is just another monomer, terminator, etc during the building of a single chain, do not supply this function. This parameter is used if the added moiety should have different chain information from the existing moiety, and it is up to the chain_namer function to set that data. See also the propagate_chain_data paramter. :type mark_bond: bool :param mark_bond: If True, the created bond between two monomers is marked :rtype: tuple(AttachmentAtoms, dict) :return: First item is the updated attachment atoms with coupler atom, the new grower atom and the new grow marker. Second item is dictionary with updated atom indices after removing markers where keys are atom numbers before deleting, and value for each is the new atom number, or None if that atom was deleted. """ num_existing_atoms = chain.atom_total # Figure out the location to place the coupling atom for the proper bond # angle and length to the chain coupler_atom = self.getCouplerAtom(orientation) grower_atom = chain.atom[grower] grower_xyz = grower_atom.xyz grow_marker_xyz = chain.atom[grow_marker].xyz if set_residue_data: residue_number = grower_atom.resnum + 1 else: residue_number = None # coupling_vector runs in the direction from my coupling atom to the # chain grower atom. Grow vector runs in the other direction coupling_vector = [a - b for a, b in zip(grower_xyz, grow_marker_xyz)] grow_vector = [-a for a in coupling_vector] bond_length = self.getBondLength(coupler_atom, grower_atom) location = self.determineCouplerLocation(grower_xyz, grow_vector, bond_length) # Attach a copy of this monomer struct, attachments = self.getStructureForCoupling( orientation, location=location, vector=coupling_vector, residue_number=residue_number, mark_bond=mark_bond) # Set any requested chain data if chain_namer: chain_namer(struct) elif propagate_chain_data: propagate_chain_data_from_atom(struct, grower_atom) # Different residues should have different residue numbers residue_number = len(chain.residue) for increment, residue in enumerate(struct.residue, 1): residue.resnum = residue_number + increment chain.extend(struct) # Create the bond coupler = attachments.coupler + num_existing_atoms chain.addBond(grower, coupler, 1) if mark_bond: self.markBond(chain, grower, coupler) if attachments.grower: new_grower = attachments.grower + num_existing_atoms new_grow_marker = attachments.grow_marker + num_existing_atoms else: # This is an end group, no backbone/grower atom new_grower = None new_grow_marker = None # We no longer need the chain's marker atom - delete it and get the new # atom numbers mapping = {} if remove_marker: mapping = chain.deleteAtoms([grow_marker], renumber_map=True) coupler = mapping[coupler] if new_grower: new_grower = mapping[new_grower] new_grow_marker = mapping[new_grow_marker] new_attachment_atoms = AttachmentAtoms(coupler, None, new_grower, new_grow_marker) return new_attachment_atoms, mapping
[docs] def markBond(self, chain, grower, coupler): """ Mark the directional bond from grower to coupler. :param chain: the structure whose bond is to be marked :type chain: 'structure.Structure' :param grower: the core atom connecting to the just added monomer :type grower: int :param coupler: the just added monomer's atom connecting to the core :type coupler: int """ bond = chain.getBond(grower, coupler) for prop, index in { coarsegrain.CG_UNIQUE_BOND_PARTICLE_1_LABEL_KEY: grower, coarsegrain.CG_UNIQUE_BOND_PARTICLE_2_LABEL_KEY: coupler }.items(): label = chain.atom[index].property.pop(UNIQUE_ATOM_PROP, None) if label: bond.property[prop] = label
[docs] def findBackbone2BranchingPoint(self, exclude_markers): """ Find the branching point and the shortest path to the nearest atom in backbone_path_indexes. The braching point may have multiple branches connected to, but only one single pair of [backbone atom][branching atom] = [path with ends] is recorded. :type exclude_markers: list of int :param exclude_markers: the atom ids of markers to be excluded """ branching_markers = list(set(self.markers) - set(exclude_markers)) self.backbone_to_marked_end = {} for atom_index in self.backbone_path_indexes: self.backbone_to_marked_end[atom_index] = {} for marker in branching_markers[:]: bonded_atom = next(self.raw_struct.atom[marker].bonded_atoms) bonded_to_marker = bonded_atom.index path_with_ends = None if bonded_to_marker in self.backbone_side_atoms[atom_index]: # the branching point is in the side group path_with_ends = analyze.find_shortest_bond_path( self.raw_struct, atom_index, bonded_to_marker) elif bonded_to_marker == atom_index: # branching from backbone path_with_ends = [bonded_to_marker] if path_with_ends: branching_markers.remove(marker) self.backbone_to_marked_end[atom_index][ bonded_to_marker] = path_with_ends
[docs] @classmethod def write(cls, filename, struct, markers, name, classname=None): """ Write the structure out to a file, tagging it with properties in such a way that the driver will recognize when reading the structure in :type filename: str :param filename: The path to the file :type struct: `schrodinger.structure.Structure` :param struct: The structure to write :type markers: list :param markers: A list of the indexes of the Rx atoms. :type name: str :param name: The name to use for the title of the structure :type classname: str :param classname: The chemical class of this moiety. If not supplied, the actual python class name will be used since those are typically the same as the chemical class. """ if not classname: classname = cls.__name__ struct.property[MOIETY_PROP] = classname struct.property[MARKER_PROP] = ','.join([str(x) for x in markers]) struct.title = name struct.append(filename)
[docs] def checkRequiredProperties(self, struct, propexps): """ Check to see if the structure has the desired properties set on it :type struct: `schrodinger.structure.Structure` :param struct: The structure to check :type propexps: list of tuple :param propexps: (property string, explanation of what the property is) for each structure property to check """ for prop, explanation in propexps: if prop not in struct.property: raise MoietyError('Structure %s is missing %s which specifies ' '%s.' % (struct.title, prop, explanation))
[docs] def readFromStructure(self, struct, propexps=None): """ Read the data for this moiety from the structure object and check to make sure all the required properties are set :type struct: `schrodinger.structure.Structure` :param struct: The structure to check :type propexps: list of tuple :param propexps: (property string, explanation of what the property is) for each structure property to check """ if not propexps: propexps = [] # Check that all required properties are available propexps.append( (MARKER_PROP, 'comma-separated list of Rx atom indexes')) self.checkRequiredProperties(struct, propexps) # Convert the structure properties to instance data try: markers = [int(x) for x in struct.property[MARKER_PROP].split(',')] except ValueError: raise MoietyError('The Rx atom indexes in %s must be a comma-' 'separated list of integers. Got "%s" instead.' % (MARKER_PROP, struct.property[MARKER_PROP])) self.markers = markers for index, atom_id in enumerate(self.markers, start=1): struct.atom[atom_id].property[self.RX_PROP] = index self.name = struct.title
[docs] def findSideGroup(self): """ Find and mark the side group atoms. The side group is any heavy atom not part of the backbone and any hydrogen attached to one of those heavy atoms. Note that IUPAC specifically states that a "sidechain" is an oligomer or polymer, while a "side group" is a non-oligomer/polymer. So monomers have side groups, not sidechains. """ # Find the indexes of all backbone atoms, including those rings fused to # the backbone backbone_atoms = { x for x in self.raw_struct.atom if x.property.get(msprops.BACKBONE_ATOM_PROP) } marked_indexes = {x.index for x in backbone_atoms} # In a four-atom dihedral angle list, bond rotation (2nd-3rd) changes # the xyz of side atoms attached to the 3rd atom and the xyz of the # 4th atom. self.backbone_side_atoms = {} num_groups = 0 adjoining_prop = msprops.BACKBONE_ADJOINING_ATOM_PROP all_side_atom_ids = set() for atom in backbone_atoms: self.backbone_side_atoms[atom.index] = [] for neighbor in atom.bonded_atoms: if not neighbor.property.get(msprops.BACKBONE_ATOM_PROP): # This is a non-backbone atom bound to the backbone if neighbor.index in self.markers: # This is just a temporary marker atom, ignore it marked_indexes.add(neighbor.index) continue if neighbor.element == 'H': # Hydrogens are just adjoining atoms and not side groups neighbor.property[adjoining_prop] = True marked_indexes.add(neighbor.index) self.backbone_side_atoms[atom.index] += [neighbor.index] else: # This may be a single atom or multi atom group try: group = self.raw_struct.getMovingAtoms( atom, neighbor) except ValueError: # This is a sidegroup atom in the same ring as a # backbone atom. This can happen if rings share at # least one atom with a ring that is part of the # backbone. group = self.findRingSideGroup( self.raw_struct, marked_indexes, neighbor.index) if len(group) == 1: if neighbor.property.get( msprops.SIDE_GROUP_ATOM_PROP): # This is an atom in a side ring. if neighbor.index not in all_side_atom_ids: # This atom has not been added to side atoms self.backbone_side_atoms[atom.index] += [ neighbor.index ] # Register this atom and prevent double # counting from the other end. all_side_atom_ids.add(neighbor.index) continue else: # This is a non-backbone atom bound to the # backbone and not yet part of any side group neighbor.property[adjoining_prop] = True # Mark all atoms in this side group with the group name num_groups += 1 gst = self.raw_struct.extract(group) formula = analyze.generate_molecular_formula(gst) group_name = '%s_%d' % (formula, num_groups) for gindex in group: gatom = self.raw_struct.atom[gindex] gatom.property[ msprops.SIDE_GROUP_ATOM_PROP] = group_name marked_indexes.add(gindex) if gindex not in self.markers: self.backbone_side_atoms[atom.index] += [gindex] all_side_atom_ids.update( self.backbone_side_atoms[atom.index])
[docs] def finalizeSideGroup(self): """ Find atoms in backbone (marked as BACKBONE_ATOM_PROP) but not in the shortest path (backbone_path_indexes) and then treat these atoms along with their side groups as the side groups of certain atom in shortest path (backbone_path_indexes) """ backbone_path_indexes = set(self.backbone_path_indexes) atom_ids_to_move = set() for atom_id_to_move in self.backbone_side_atoms: if atom_id_to_move not in backbone_path_indexes: atom_ids_to_move.add(atom_id_to_move) if not atom_ids_to_move: # all atoms marked as BACKBONE_ATOM_PROP are in # backbone_path_indexes return for backbone_atom_id in self.backbone_path_indexes: # search along backbone_path_indexes to find neighbor atoms marked # as BACKBONE_ATOM_PROP but not in backbone_path_indexes next_indexes = [backbone_atom_id] while next_indexes: current_index = next_indexes.pop(0) for next_atom in self.raw_struct.atom[ current_index].bonded_atoms: next_index = next_atom.index if next_index in atom_ids_to_move: # Find a neighbor atom marked as BACKBONE_ATOM_PROP, # but not in backbone_path_indexes. self.backbone_side_atoms[ backbone_atom_id] += self.backbone_side_atoms[ next_index] self.backbone_side_atoms[backbone_atom_id].append( next_index) self.backbone_side_atoms.pop(next_index) atom_ids_to_move.remove(next_index) next_indexes.append(next_index)
[docs] def markBackboneAtom(self, index): """ Mark the atom as a backbone atom and make sure that if the atom is in a ring that all atoms in the ring are marked as backbone :type index: int :param index: The index of the atom to mark """ self.raw_struct.atom[index].property[msprops.BACKBONE_ATOM_PROP] = True for ring_indexes in self.rings: if index in ring_indexes: for rindex in ring_indexes: ratom = self.raw_struct.atom[rindex] ratom.property[msprops.BACKBONE_ATOM_PROP] = True
[docs] def setAsFragment(self): """ Set backbone path and side atoms for Initiator and Terminator. Only the atom connected to R1 group is treated as backbone path; all the rest real atoms are treated as side groups. """ # Use the atom connected to the first R1 marker as the head atom head = next(self.raw_struct.atom[self.markers[0]].bonded_atoms) self.backbone_path_indexes = [head.index] self.backbone_side_atoms = {head.index: []} excluded_indexes = set([head.index] + self.markers) for atom in self.raw_struct.atom: if atom.index in excluded_indexes: continue self.backbone_side_atoms[head.index].append(atom.index)
[docs] def findRotatableBonds(self): """ Find rotatable bond along the backbone path. """ self.rotatable_bonds = [] backbone = self.backbone_path_indexes atom_idx0 = backbone[0] for atom_idx1 in backbone[1:]: if analyze.is_bond_rotatable(self.raw_struct.getBond( atom_idx0, atom_idx1), allow_methyl=True): self.rotatable_bonds.append({atom_idx0, atom_idx1}) atom_idx0 = atom_idx1
[docs]class Terminator(BaseMoiety): """ A unit that terminates a chain Note that the concept of an "Terminator" can be a bit fungible. It can either be the actual terminator moiety that ends a polymer chain, or it may be an already built chain that will be tacked on to some initiation point. """
[docs] def __init__(self, struct, markers=None, name=None): """ Create a Terminator object Terminators drawn by the user as: R1-stuff And stored as: marker-head-stuff where marker is the R1 atom, head is the atom it is bound to, and stuff is everything else. :type struct: `schrodinger.structure.Structure` :param struct: The structure of this Moiety :type markers: list :param markers: A list of the indexes of the Rx atoms :type name: str :param name: The name of this moiety - gets added to each atom as an atom property """ BaseMoiety.__init__(self, struct, markers=markers, name=name) self.findHead() self.moveHeadToOrigin()
[docs] def findHead(self): """ Find and store the index of the head atom """ self.head = self.findAttachedAtomIndex(self.markers[0], HEAD_ATOM_PROP)
[docs] def getNextStructure(self, *args): """ Get a copy of this structure to add to a chain """ return self.raw_struct.copy()
[docs] def getCouplerAtom(self, *args): """ Get the coupler (head) atom """ return self.raw_struct.atom[self.head]
[docs] def getCouplerAndGrower(self, *args): """ Get the coupler and grower atoms and markers. The latter is None because Terminators have no grow points :rtype: `AttachmentAtoms` :return: The atoms indexes of the coupler, grower and marker atoms """ coupler = self.head coupler_marker = self.markers[0] return AttachmentAtoms(coupler, coupler_marker, None, None)
[docs] def getPDBName(self): """ Get a PDB residue-like name for this moiety """ # This overrides the base class return value return TRM
[docs]class Cascader(Terminator): """ A group that ends a chain but starts multiple new chains Cascaders are drawn as:: R2 | R1-stuff-R2 and stored as:: cascader | marker-head-stuff-cascader where cascader is an atom property added to the R2 marker atoms """
[docs] def __init__(self, *args, **kwargs): Terminator.__init__(self, *args, **kwargs) self.findBackbone()
[docs] def findBackbone(self): """ Find the set of atoms that creates the shortest distance (number of bonds) between the head and each cascade points. These are considered backbone atoms. Backbone atoms are marked with a backbone atom property. """ all_backbone = [] for atom in self.raw_struct.atom: if atom.property.get(CASCADER_ATOM_PROP): path = analyze.find_shortest_bond_path(self.raw_struct, self.head, atom.index) all_backbone.extend(path) if len(path) > len(self.backbone_path_indexes): self.backbone_path_indexes = path # Set the backbone property for index in all_backbone: self.markBackboneAtom(index)
[docs] def findHead(self): """ Find the head atom, but also mark cascader atoms that will start new points """ Terminator.findHead(self) for index in self.markers[1:]: self.raw_struct.atom[index].property[ CASCADER_MARKER_ATOM_PROP] = True next(self.raw_struct.atom[index].bonded_atoms ).property[CASCADER_ATOM_PROP] = True
[docs] def readFromStructure(self, struct): """ Overrides the parent method to add and read Cascader-specific properties. See the parent class method for documentation """ # Check all required properties are available propexps = [(CASCADE_GEN_PROP, 'the number of cascade generations')] BaseMoiety.readFromStructure(self, struct, propexps=propexps) # Convert the structure properties to instance data self.generations = struct.property[CASCADE_GEN_PROP]
[docs] @classmethod def write(cls, generations, filename, struct, *args): """ Over-ride the parent class method to add generations as a property to the structure. :type generations: int :param generations: The number of cascade generations allowed See the parent class method for documentation """ struct.property[CASCADE_GEN_PROP] = generations Terminator.write(filename, struct, *args, classname='Cascader')
[docs] def getPDBName(self): """ Get a PDB residue-like Name for this moiety """ return CAS
[docs] def setAsFragment(self): """ Prepare Cascader information for in-cell grow. """ self.findSideGroup() self.finalizeSideGroup() self.findBackbone2BranchingPoint([self.markers[0]]) self.findRotatableBonds()
[docs]class Initiator(BaseMoiety): """ A group that starts the whole polymer off - can have multiple grow points, which means that multiple chains will radiate from this group (a dendrimer). Note that the concept of an "Initiator" can be a bit fungible. It can either be the actual intiator moiety that starts the whole polymer, or it may be a partially built polymer that still needs additional chains added to it. """
[docs] def __init__(self, *args, **kwargs): """ Overrides the parent init method to allow the parial_polymer keyword parameter. :type partial_polymer: bool :keyword partial_polymer: If True, this Initiator is actually a partially built polymer that is going to just act like an initiator. """ self.partial_polymer = kwargs.pop('partial_polymer', False) BaseMoiety.__init__(self, *args, **kwargs) for grow_marker in self.markers: self.findAttachedAtomIndex(grow_marker, HEAD_ATOM_PROP) if not self.partial_polymer: for atom in self.raw_struct.atom: atom.resnum = 1 atom.pdbres = self.getPDBName()
[docs] def completePolymer(self, chain_terminator, chain_namer, clash_fixer): """ Tack on an existing chain to each grow point :type chain_terminator: `Terminator` :param chain_terminator: A Terminator object that stores the existing chain to add :type chain_namer: function :param chain_namer: A function that should be called on each chain that will be added to the existing polymer. The purpose of this function should be to add chain name data to the added structure. :type clash_fixer: function :param clash_fixer: A function to call on the polymer when any new chain is added in order to fix any clashes caused by the new chain """ polymer = self.raw_struct.copy() if not self.partial_polymer: # This is a true initiator so we need to define chain information chain_namer(polymer) propagate_chain_data = len(self.markers) == 1 if propagate_chain_data: # This has a single initiation point so the chain attached to # it will share the same chain ID chain_namer = False else: # This is a partial polymer, so chain data is already set on it propagate_chain_data = False for grow_marker in self.markers: grower = self.findAttachedAtomIndex(grow_marker, HEAD_ATOM_PROP) polymer.atom[grower].property[GROWER_ATOM_PROP] = True chain_terminator.addToChain( polymer, grower, grow_marker, orientation=HEADFIRST, remove_marker=False, propagate_chain_data=propagate_chain_data, chain_namer=chain_namer) if not propagate_chain_data: # We've added a whole new chain to an existing chain, so need to # fix clashes polymer = clash_fixer(polymer) # We wait until all chains are added to remove any marker atoms because # we don't want atom indexes to change during this process polymer.deleteAtoms(self.markers) return polymer
[docs] def fillBranchPoints(self, chain_terminator, chain_branch_points, chain_namer, clash_fixer): """ Tack on an existing chain to each of our branch points - each point has a percent chance of getting a chain added to it as long as its generation is less than the generation limit :type chain_terminator: `Terminator` :param chain_terminator: A Terminator object that stores the existing chain to add :type chain_branch_points: list :param chain_branch_points: A list of atom indexes in the chain_terminator structure that are branch points :type chain_namer: function :param chain_namer: A function that should be called on each chain that will be added to the existing polymer. The purpose of this function should be to add chain name data to the added structure. :type clash_fixer: function :param clash_fixer: A function to call on the polymer when any new chain is added in order to fix any clashes caused by the new chain :rtype: (`schrodinger.structure.Structure`, int) :return: The new polymer structure with additional branching, plus the number of new branches that were added """ polymer = self.raw_struct.copy() markers_to_delete = [] for grow_marker in self.markers: grower = self.findAttachedAtomIndex(grow_marker) markatom = polymer.atom[grow_marker] generation = markatom.property[BRANCH_GEN_ATOM_PROP] if generation == markatom.property[BRANCH_MAXGEN_ATOM_PROP]: # We've reached the maximum number of branch generations markatom.property[BRANCH_PCT_ATOM_PROP] = 0 continue chance = get_random_nonzero_to_one() * 100. if chance > markatom.property[BRANCH_PCT_ATOM_PROP]: # Failed the chance to branch at this atom markatom.property[BRANCH_PCT_ATOM_PROP] = 0 continue # All tests have passed, we'll branch at this point polymer.atom[grower].property[BRANCH_ATOM_PROP] = True num_existing_atoms = polymer.atom_total chain_terminator.addToChain(polymer, grower, grow_marker, orientation=HEADFIRST, remove_marker=False, chain_namer=chain_namer) polymer = clash_fixer(polymer) # Mark the correct generation on all the newly-added branching atoms for index in chain_branch_points: batom = polymer.atom[num_existing_atoms + index - 1] batom.property[BRANCH_GEN_ATOM_PROP] = generation + 1 markers_to_delete.append(grow_marker) # Wait to delete all marker atoms until the end so that atom indexes do # not change polymer.deleteAtoms(markers_to_delete) return polymer, len(markers_to_delete)
[docs] def getPDBName(self): """ Get a PDB residue-like Name for this moiety """ return INI
[docs]class Monomer(BaseMoiety): """ The repeating unit that makes up the polymer chain """
[docs] def __init__(self, struct): """ Create a Monomer object Monomers drawn by the user as: R1-stuff-R2 And stored as: marker1-head-stuff-tail-marker2 where marker1 is the R1 atom, head is the atom it is bound to, marker2 is the R2 atom and tail is the atom it is bound to, and stuff is everything else. """ BaseMoiety.__init__(self, struct) # 4 structures are stored for each instance based on orientation and # chirality - HEADFIRST/R, HEADFIRST/S, TAILFIRST/R, TAILFIRST/S # precalculating these saves time when building the polymer. To access # the HEADFIRST, R structure, use # self.structures[HEADFIRST][msprops.CHIRAL_R] self.structures = {HEADFIRST: {}, TAILFIRST: {}} self.tail = None self.tactical_index = None # backbone_path_indexes may differ from the set of atoms marked with the # backbone atom property. The former contains only the atoms that form # the shortest path while the latter will additionally contain any # atoms that are in the same ring as an atom in backbone_path_indexes self.default_chirality = None self.previous_chirality = None self.directional = False self.findHeadTailBranch() self.findBackbone() self.findSideGroup() if self.all_trans: self.setAllTransBackbone() self.alignOnXAxis() self.setupTacticity() self.setDirecional() self.flipTailFirst() self.rings = self.raw_struct.find_rings(sort=True) self.is_monosaccharide = self.getMonosaccharideType() is not None
[docs] def setAllTransBackbone(self): """ Set all the backbone dihedrals to 180 """ atomlist = [self.markers[0]] atomlist += self.backbone_path_indexes atomlist += [self.markers[1]] for index in range(len(atomlist) - 3): indexes = atomlist[index:index + 4] try: self.raw_struct.adjust(180., *indexes) except structure.AtomsInRingError: # Center two atoms are in a ring; dihedral can't be adjusted. pass
[docs] def hasChirality(self): """ Check if this monomer has a chiral backbone :rtype: bool :return: Whether the backbone has a chiral atom """ return bool(self.tactical_index)
[docs] def findHeadTailBranch(self): """ Find and mark the head, tail and any branching atoms """ self.head = self.findAttachedAtomIndex(self.markers[0], HEAD_ATOM_PROP) self.tail = self.findAttachedAtomIndex(self.markers[1], TAIL_ATOM_PROP) for marker in self.markers[2:]: # This marks the atom attached to marker as a branch atom self.findAttachedAtomIndex(marker, BRANCH_ATOM_PROP, self.branching_percent) batom = self.raw_struct.atom[marker] batom.property[BRANCH_PCT_ATOM_PROP] = self.branching_percent batom.property[BRANCH_GEN_ATOM_PROP] = 1 batom.property[BRANCH_MAXGEN_ATOM_PROP] = self.branching_max_gen
[docs] def findBackbone(self): """ Find the set of atoms that creates the shortest distance (number of bonds) between the head and tail atoms. This is the monomer backbone. Backbone atoms are marked with a backbone atom property """ self.backbone_path_indexes = analyze.find_shortest_bond_path( self.raw_struct, self.head, self.tail) # Set the backbone property for index in self.backbone_path_indexes: self.markBackboneAtom(index)
[docs] def findRingSideGroup(self, struct, terminator_indexes, root_index): r""" Return a list of atoms bound to root_index (and recursively all atoms bound to those atoms, ad infinitum). terminator_indexes is a set of atoms that terminate the group - when a terminator atom is encountered, the group stops growing in that direction. Unlike Structure.getMovingAtoms or buildcomplex.find_atoms_to_remove, this method gracefully handles cases where terminator atoms and group atoms share the same ring. Imagine naphthalene:: 3 7 / \ / \ 4 2 8 | | | 5 1 9 \ / \ / 6 10 If root_index=6 and terminator_indexes={1}, all other atoms will become part of the same group as 6 - because there is a path all the way around the outer ring that bypasses 1 going clockwise from 6. If root_index=6 and terminator_indexes={1,2}, then only atoms 6, 5, 4, 3 will be members of the group, because both atoms 1 and 2 terminate the group. root_index is always part of the group :type struct: schrodinger.structure.Structure :param struct: The structure to use :type terminator_indexes: set of int :param terminator_indexes: The indexes of the atoms that terminate the group. :type root_index: int :param root_index: The index of the first atom in the group. All neighbors of this atom that are not terminator atoms will be added to the list. :rtype: list :return: A list of all atoms recursively bound to the root atom, including root_index itself but not including any atom in terminator_indexes """ indexes_not_yet_checked = {root_index} group_indexes = set() while indexes_not_yet_checked: check_index = indexes_not_yet_checked.pop() group_indexes.add(check_index) for atom in struct.atom[check_index].bonded_atoms: adex = atom.index if adex in terminator_indexes: # Stop going in this direction if this is a terminator atom pass elif adex not in group_indexes and \ adex not in indexes_not_yet_checked: # We haven't encountered this atom before indexes_not_yet_checked.add(adex) return list(group_indexes)
[docs] def alignOnXAxis(self): """ Align the monomer so the head is at the origin and the backbone is aligned to the +X axis """ self.moveHeadToOrigin() if len(self.backbone_path_indexes) < 2: # Nothing to align - single atom backbone return # Now rotate the backbone vector to align with the +X axis. The # backbone vector points from the head (origin) to the tail backbone_vector = self.raw_struct.atom[self.tail].xyz self.alignToVector(self.raw_struct, backbone_vector, transform.X_AXIS)
[docs] def flipTailFirst(self): """ Flip the structure so the tail is on the origin and the backbone is aligned to the +X axis """ # matrix for rotation 180 degrees about the Y-axis matrix = transform.get_rotation_matrix(transform.Y_AXIS, numpy.pi) for chirality, struct in self.structures[HEADFIRST].items(): scopy = struct.copy() # Rotate around the Y-axis transform.transform_structure(scopy, matrix) # Now move the tail to the origin translate = [-coord for coord in scopy.atom[self.tail].xyz] transform.translate_structure(scopy, *translate) self.structures[TAILFIRST][chirality] = scopy
[docs] def setDirecional(self): """ Set whether the monomer has directional properties. """ if msutils.is_coarse_grain(self.raw_struct): return head_capper_elements = set([ x.element for x in self.raw_struct.atom[self.head].bonded_atoms if len(x.bond) == 1 and x.index != self.markers[0] ]) tail_capper_elements = set([ x.element for x in self.raw_struct.atom[self.tail].bonded_atoms if len(x.bond) == 1 and x.index != self.markers[1] ]) elements = set(['H', 'F', 'Cl', 'Br', 'I', 'At']) hc_elements = elements.difference(head_capper_elements) tc_elements = elements.difference(tail_capper_elements) if len(hc_elements) == 1: tc_elements = tc_elements.difference(hc_elements) elif len(tc_elements) == 1: hc_elements = hc_elements.difference(tc_elements) st_orig = self.raw_struct.copy() for ele in hc_elements: st_orig.atom[self.markers[0]].element = ele try: tc_elements.remove(ele) except KeyError: pass break for ele in tc_elements: st_orig.atom[self.markers[1]].element = ele break st_flipped = self.raw_struct.copy() st_flipped.atom[self.markers[1]].element = st_orig.atom[ self.markers[0]].element st_flipped.atom[self.markers[0]].element = st_orig.atom[ self.markers[1]].element self.directional = not st_orig.isEquivalent(st_flipped)
[docs] def setupTacticity(self): """ Store both R and S structures for the monomer so we don't have to keep inverting the chirality when needed. This takes a miniscule amount of effort, since it is only done once per monomer type """ # Find a chiral atom in the backbone chiral_indexes = analyze.get_chiral_atoms(self.raw_struct) backset = set(self.backbone_path_indexes) self.tactical_index = None bb_chirality = None for index, chirality in chiral_indexes.items(): if chirality in R_S and index in backset: if self.tactical_index: # We only allow tacticity for monomers with 1 chiral # backbone atom self.tactical_index = None bb_chirality = None break self.tactical_index = index bb_chirality = chirality self.default_chirality = bb_chirality # Label the atoms by bb chirality scopy = self.raw_struct.copy() prop = PROP_BY_CHIRALITY[bb_chirality] for atom in self.raw_struct.atom: atom.property[prop] = True self.structures[HEADFIRST][bb_chirality] = self.raw_struct if self.tactical_index: # Store a structure of the opposite chirality tactical_atom = self.raw_struct.atom[self.tactical_index] tactical_atom.property[CHIRAL_BB_ATOM_PROP] = True # Create and store a structure with the opposite chirality fixed_atoms = [] # Find two backbone neighbors to the chiral atom candidates = backset candidates.update(self.markers[:2]) tactical_atom = scopy.atom[self.tactical_index] tactical_atom.property[CHIRAL_BB_ATOM_PROP] = True for neighbor in tactical_atom.bonded_atoms: if neighbor.index in candidates: fixed_atoms.append(neighbor.index) if len(fixed_atoms) == 2: break # Convert to the other chirality and store the structure mm.mmstereo_atom_invert_chirality(scopy, self.tactical_index, *fixed_atoms) other_chirality = get_other_item(R_S, bb_chirality) # Label the atoms by bb chirality other_prop = PROP_BY_CHIRALITY[other_chirality] for atom in scopy.atom: atom.property[other_prop] = True self.structures[HEADFIRST][other_chirality] = scopy
[docs] def getNextStructure(self, orientation): """ Get the next monomer structure with the given orientation. "Next" in this case takes into account the chirality of the previous structure - for syntactic polymers, the monomers alternate chirality. :type orientation: str :param orientation: HEADFIRST or TAILFIRST constants :rtype: `schrodinger.structure.Structure` :return: The next structure to use for this monomer """ if self.default_chirality is None: chirality = None elif self.tacticity == TACTICITY_ISO: chirality = self.default_chirality elif self.tacticity == TACTICITY_ATAC: chirality = random.choice(R_S) else: if not self.previous_chirality: chirality = self.default_chirality else: chirality = get_other_item(R_S, self.previous_chirality) self.previous_chirality = chirality return self.structures[orientation][chirality].copy()
[docs] def getCouplerAndGrower(self, orientation): """ Get the coupler, coupler_marker, grower and grow_marker atoms for this monomer :type orientation: str :param orientation: HEADFIRST or TAILFIRST constants :rtype: `AttachmentAtoms` :return: The atoms indexes of the coupler, grower and marker atoms. Which one is the head and which is the tail depends on orientation. """ headtail = [self.head, self.tail] if orientation == HEADFIRST: index = 0 else: index = 1 coupler = headtail[index] coupler_marker = self.markers[index] grower = get_other_item(headtail, coupler) grow_marker = get_other_item(self.markers, coupler_marker) return AttachmentAtoms(coupler, coupler_marker, grower, grow_marker)
[docs] def getCouplerAtom(self, orientation): """ Get the coupler atom. :type orientation: str :param orientation: HEADFIRST or TAILFIRST constants :rtype: `_StructureAtom` :return: The coupler atom Whether this is the head and or the tail depends on orientation. """ if orientation == HEADFIRST: return self.raw_struct.atom[self.head] else: return self.raw_struct.atom[self.tail]
[docs] def isBrancher(self): """ Is this monomer a braching monomer? :rtype: bool :return: Whether this monomer is a branching monomer """ return len(self.markers) == 3
[docs] def isLadder(self): """ Is this monomer a braching monomer? :rtype: bool :return: Whether this monomer is a branching monomer """ return len(self.markers) == 4
[docs] @staticmethod def addPropsToStructure(struct, letter, markers, name, tacticity, branching_percent, branching_max_gen, all_trans): """ Add properties to the structure that the driver will need as input :type struct: `schrodinger.structure.Structure` :param struct: The structure to add the properties to :type letter: str :param letter: The one-character code for this monomer :type markers: list of int :param markers: Atom indexes of the marker atoms :type name: str :param name: The title to use :type tacticity: str :param tacticity: One of the TACTICITY module constants specifying the tacticity for this monomer :type branching_percent: float :param branching_percent: The % chance to branch for this monomer :type branching_max_gen: int :param branching_max_gen: The maximum branching generations for this monomer :type all_trans: bool :param all_trans: Whether the intra-monomer backbone dihedrals should be all set to 180. """ struct.property[MOIETY_PROP] = 'Monomer_%s' % letter struct.property[MARKER_PROP] = ','.join([str(x) for x in markers]) struct.title = name struct.property[TACTICITY_PROP] = tacticity struct.property[BRANCH_PCT_PROP] = branching_percent struct.property[BRANCH_MAX_PROP] = branching_max_gen struct.property[BBTRANS_PROP] = all_trans
[docs] @classmethod def write(cls, filename, struct, *args): """ Write out the structure :type filename: str :param filename: The path to the file :type struct: `schrodinger.structure.Structure` :param struct: The structure to write out See the addPropsToStructure method for additional argument documentation """ cls.addPropsToStructure(struct, *args) struct.append(filename)
[docs] def readFromStructure(self, struct): """ Overrides the parent method to add and read Monomer-specific properties. See the parent class method for documentation """ # Check all required properties are available propexps = [] propexps.append( (TACTICITY_PROP, 'tacticity, which can have values of ' '%s, %s or %s' % (TACTICITY_ISO, TACTICITY_SYN, TACTICITY_ATAC))) propexps.append((BRANCH_PCT_PROP, 'percent branching probability')) propexps.append((BRANCH_MAX_PROP, 'maximum branching generations')) propexps.append((BBTRANS_PROP, 'whether the monomer backbone is all ' 'trans')) BaseMoiety.readFromStructure(self, struct, propexps=propexps) # Convert the structure properties to instance data msg = ('For monomers, the %s property value must be "%s", where x is a ' 'single letter that is different for each monomer. %s instead ' 'has the value "%s"') letters = struct.property[MOIETY_PROP].split('_')[-1] letter = letters[0] letter_up = letter.upper() if len(letter) > 1 or letter.upper() not in string.ascii_uppercase: raise MoietyError(msg % (MOIETY_PROP, MONOMERX, struct.title, letter)) # self.letter is letter assigned as the code of this monomer and the # first letter in self.letter is a single letter self.letter = letter_up + letters[1:] self.all_trans = struct.property[BBTRANS_PROP] self.tacticity = struct.property[TACTICITY_PROP] self.branching_percent = struct.property[BRANCH_PCT_PROP] self.branching_max_gen = struct.property[BRANCH_MAX_PROP]
[docs] def getPDBName(self): """ Get a PDB residue-like Name for this moiety """ # self.letter is the single letter assigned as the code of this monomer return self.letter
[docs] def setAsFragment(self): """ Prepare monomer information for in-cell grow. """ self.finalizeSideGroup() self.findBackbone2BranchingPoint([self.markers[0]]) self.findRotatableBonds()
[docs] def breakTemplatePolymer(self): """ Copy the original moieties to tmp_moieties and break the monomers into small new monomers according to rotatable bonds in side group. :rtype: list of {Monomer} :return: the small monomer moieties that replace the old large one """ # FIXME: should modify this function to be compatible with CAS self.tmp_moieties = [copy.deepcopy(self)] self.tmp_moieties[-1].pdbres = self.getPDBName().ljust(4) self.finished_moieties = [] while self.tmp_moieties: tmp_moiety = self.tmp_moieties.pop(0) bonds_to_break = tmp_moiety.findBondsToBreak() struct_copy = tmp_moiety.raw_struct.copy() struct_copy.deleteAtoms(tmp_moiety.markers) for atom_ids in bonds_to_break: struct_copy.deleteBond(*atom_ids) self.stripSideStructs(struct_copy, tmp_moiety.pdbres) return self.finished_moieties
[docs] def findBondsToBreak(self): """ Search the side groups of all backbone atoms and find the 'first neighbor' rotatable bonds. :rtype: list of tuple :return: 'first neighbor' rotable bonds in side groups for all backbone atoms """ bonds_to_break = [] for backbone_atom_id, side_atom_ids in self.backbone_side_atoms.items(): bonds_to_break += nearest_rotatable_bonds(self.raw_struct, self.rings, backbone_atom_id, side_atom_ids) return bonds_to_break
[docs] def stripSideStructs(self, struct_copy, pdbres): """ Strip the sub structures in side groups and form new monomers from these sub structures. :type struct_copy: `schrodinger.structure.Structure` :param struct_copy: the structure copy with 'first neighbor' rotatable bonds deleted. :type pdbres: str :param pdbres: the name of the residue to strip side structs from """ for molecule in struct_copy.molecule: struct = molecule.extractStructure(copy_props=True) for atom in struct.atom: if atom.property.get(HEAD_ATOM_PROP): head_atom_id = atom.index if atom.property.get(TAIL_ATOM_PROP): # This is the tail atom of parent monomer is_finished_moiety = True break else: # This is a new struct broken from parent monomer, which # does not contain the backbone of parent monomer # Find the longest path from root to leaf as new backbone is_finished_moiety = False graph = analyze.create_nx_graph(struct) shortest_path_length = networkx.shortest_path_length( graph, source=head_atom_id) tail_atom_id = max(shortest_path_length, key=shortest_path_length.get) struct.atom[tail_atom_id].property[TAIL_ATOM_PROP] = True resname = self.getNewResidueName(is_finished_moiety) self.updateTemplatePolymerResidue(struct, pdbres, resname) assign_markers(struct) new_monomer = Monomer(struct) new_monomer.pdbres = resname if is_finished_moiety: self.finished_moieties.append(new_monomer) else: self.tmp_moieties.append(new_monomer)
[docs] def getNewResidueName(self, is_finished_moiety): """ Generate the residue name for new sub structure. :type is_finished_moiety: bool :param is_finished_moiety: whether the new sub structure contains the backbone of the parent monomer :rtype: str :return: residue name for new sub structure """ if is_finished_moiety: # new Monomers based on Monomer 'X ' have resnames from 'X0 ' # to 'X999' pdb_name = self.getPDBName() digits = ''.join(x for x in pdb_name if x.isdigit()) letters = pdb_name.replace(digits, '') resname = letters + str(len(self.finished_moieties)) elif self.tmp_moieties: last_resname = self.tmp_moieties[-1].pdbres.rstrip() resname = str(int(last_resname) + 1) else: resname = self.getFirstTmpResname() return resname.ljust(4)
[docs] def getFirstTmpResname(self): """ The resname of the first temporary moiety in self.tmp_moieties list. :type resname: str :param resname: residue name of a moiety """ tmp_resnames = [] for residue in self.template_polymer.residue: try: # Temporary residue pdbres is '0', '1', ... # Finished residue pdbres is 'A0', 'A1', ... tmp_resnames.append(int(residue.pdbres)) except ValueError: pass if tmp_resnames: return str(max(tmp_resnames) + 1) else: return '0'
[docs] def updateTemplatePolymerResidue(self, struct, pdbres, resname): """ Update the residue number and name for the atoms in template polymer for the newly added residue. :type struct: `schrodinger.structure.Structure` :param struct: the new sub struct to form new monomer :type pdbres: str :param pdbres: name of the parent residue :type resname: str :param resname: residue name of the new sub struct """ map_monomer_atom_id = {} for atom in struct.atom: map_monomer_atom_id[atom.property[ msprops.MONOMER_ORIG_ATOM_IDX_PROP]] = atom.index residue_number = 0 for residue in self.template_polymer.residue: residue_number += 1 residue.resnum = residue_number if residue.pdbres != pdbres: continue residue_number += 1 for atom in residue.atom: orig_atom_id = atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] if orig_atom_id in map_monomer_atom_id: atom.pdbres = resname atom.resnum = residue_number atom.property[ msprops. MONOMER_ORIG_ATOM_IDX_PROP] = map_monomer_atom_id[ orig_atom_id]
[docs] def getMonosaccharideType(self): """ Get the monosaccharide type for this monomer, if any :rtype: str :return: The monosaccharide type, or None if the monomer is not a monosaccharide """ return self.raw_struct.property.get(MONOSACC_PROP)
[docs]def remove_stale_props(st): """ Remove stale properties from the given structure. :type st: `schrodinger.structure.Structure` :param st: the structure from which to remove the stale properties """ # these properties could be present if the structure was read # from moiety files prepared by the polymer builder GUI atom_props = list(PROP_BY_CHIRALITY.values()) + [CHIRAL_BB_ATOM_PROP] for atom in st.atom: for key in atom_props: atom.property.pop(key, None)
[docs]class Moieties(object): """ A holder and manager for the various moieties that make up the polymer """
[docs] def __init__(self, filename, structs=None, one_letter=True, simple=False): """ Create a Moieties object :type filename: str :param filename: The path to the file that holds the moiety structures :type structs: list :param structs: List of structure moiety :type one_letter: If True, the monomer letter must a one letter code :param one_letter: bool :param bool simple: If True, use the simplified input rules - initiator/terminator need not be specified, structures not marked with MOIETY_PROP are considered monomers, etc. """ self.initiator = None self.terminator = None self.cascader = None self.monomers = {} self.one_letter = one_letter untyped = 'untyped' single_msg = ('There is more than one structure in the input ' 'file with %s defined. There can be only one.') if not structs: structs = structure.StructureReader(filename) untyped_monomers = [] for struct in structs: remove_stale_props(struct) # Each structure has a property defined that tells what kind of # moiety it is try: mtype = struct.property[MOIETY_PROP].lower() except KeyError: if simple: mtype = untyped else: raise MoietyError('Each structure in the input file must ' 'have the %s property defined.' % MOIETY_PROP) if mtype == INITIATOR: if self.initiator: raise MoietyError(single_msg % INITIATOR) self.initiator = Initiator(struct) elif mtype == TERMINATOR: if self.terminator: raise MoietyError(single_msg % TERMINATOR) self.terminator = Terminator(struct) elif mtype == CASCADER: if self.cascader: raise MoietyError(single_msg % CASCADER) self.cascader = Cascader(struct) elif mtype == untyped: untyped_monomers.append(struct) elif (self.one_letter and mtype[:-1] == MONOMERX[:-1]) or ( not self.one_letter and mtype.startswith(MONOMERX[:-1])): this_monomer = Monomer(struct) letter = this_monomer.letter if letter in self.monomers: raise MoietyError('There is more than one monomer defined ' 'with the code "%s".' % letter) self.monomers[letter] = this_monomer else: raise MoietyError('%s has an undefined value for %s' % (struct.title, MOIETY_PROP)) if simple: self.handleSimpleInput(untyped_monomers) singlets = [ x for x in [self.initiator, self.terminator, self.cascader] if x ] self.all_moieties = list(self.monomers.values()) + singlets
[docs] def handleSimpleInput(self, untyped_monomers): """ Fill out any required moiety information that was not supplied due to the simplified input format :param list untyped_monomers: Structures that were not typed in the input file so need to have monomer codes assigned to them. """ self._createSimpleIT() self._assignUntypedMonomers(untyped_monomers)
def _createSimpleIT(self): """ Create simple initiator and/or terminator if required """ # Create H initiator/terminator if needed # Note - we don't have to specify if we want an AA or CG # end group because CG initiator/teminators are deleted after the # polymer is built, so it does not matter if they are AA end groups. # CG just needs "something" to put there temporarily. if not self.initiator: struct = get_generic_end_structure() struct.title = 'aH' self.initiator = Initiator(struct) if not self.terminator: struct = get_generic_end_structure() struct.title = 'wH' self.terminator = Terminator(struct) def _assignUntypedMonomers(self, untyped_monomers): """ Assign unused codes to monomers that didn't have input codes assigned. Also assign necessary structure properties """ available_codes = [ x for x in string.ascii_uppercase if x not in self.monomers ] for struct in untyped_monomers: try: good_code = available_codes.pop(0) except IndexError: raise MoietyError('Too many (> 26) unique monomers requested') marker_string = struct.property.get(MARKER_PROP, None) if marker_string: markers = [int(x) for x in marker_string.split(',')] else: markers = self.findSimpleHeadTailMarkers(struct) name = struct.title tacticity = struct.property.get(TACTICITY_PROP, TACTICITY_ATAC) branch_pct = struct.property.get(BRANCH_PCT_PROP, 0.0) branch_maxg = struct.property.get(BRANCH_MAX_PROP, 0) all_trans = struct.property.get(BBTRANS_PROP, False) Monomer.addPropsToStructure(struct, good_code, markers, name, tacticity, branch_pct, branch_maxg, all_trans) self.monomers[good_code] = Monomer(struct)
[docs] def findSimpleHeadTailMarkers(self, struct): """ Find the atoms that mark the head and tail atoms for monomers that did not have these marked on input. The "marker" atom is the one that is bonded to the head or tail and gets deleted when joining monomers. For now, a simple algorithm is used that finds the longest bond path in the structure and uses that, preferrably one terminated with H atoms in case of ties. :param `schrodinger.structure.Structure` struct: The monomer structure :rtype: list :return: First item is the index of the head atom marker, the second item is the index of the tail atom marker """ # Create the graph of the structure st_graph = analyze.create_nx_graph(struct) # We choose backbones that start and end with H preferentially hydrogens = {x.index for x in struct.atom if x.element == 'H'} backbone = graph.find_backbone(st_graph, prefer_indexes=hydrogens) head_marker = backbone[0] tail_marker = backbone[-1] return [head_marker, tail_marker]
[docs] def hasCascader(self): """ Has a cascader been defined? :rtype: bool :return: Whether a cascader was defined """ return bool(self.cascader)
[docs] def hasBrancher(self): """ Has a brancher been defined? :rtype: bool :return: Whether a brancher was defined """ return any([x.isBrancher() for x in self.monomers.values()])
[docs] def hasLadder(self): """ Return True if ladder monomers are found. :rtype: bool :return: Ladder monomers exist """ return any([x.isLadder() for x in self.monomers.values()])
[docs] def singleR1Atom(self, end_moieties=None): """ Whether each end moiety has one single R1 atom. :param end_moieties: the end moieties to check against. :type end_moieties: list :return: True if each end moiety has one single R1 atom :rtype: bool """ if end_moieties is None: end_moieties = [self.initiator, self.terminator] return all(len(x.markers) == 1 for x in end_moieties)
[docs] def hasRings(self): """ Do any moieties have rings? :rtype: bool :return: Whether any of the moieties have rings """ return any(x.has_rings for x in self.all_moieties)
[docs] def getMonomer(self, letter): """ Get the monomer with the given one-letter code :type letter: str :param letter: The one-letter code for the desired monomer :rtype: `Monomer` :return: The monomer with the given one-letter code """ return self.monomers[letter]
[docs] def getMoiety(self, letter): """ Get the get one single moiety based on 'INI', 'TRM', 'CAS' or monomer one-letter code. :type letter: str :param letter: The one-letter code for the desired monomer :rtype: `Monomer`, `Initiator`, `Cascader`, or `Terminator` :return: the initiator, cascader, terminat or one nonomer find by the one letter code """ if letter == INI: return self.initiator elif letter == TRM: return self.terminator elif letter == CAS: return self.cascader else: return self.monomers[letter]
[docs] def getOneLetterCodes(self): """ Get all the allowed one-letter codes :rtype: list :return: All the defined one-letter codes """ return list(self.monomers)
[docs] def checkValidOneLetterCodes(self, codes): """ Check that all the codes in the given string are valid :type codes: str :param codes: A string with a series of one-letter codes :rtype: bool :return: Whether all the one-letter codes are associated with a Monomer """ for letter in codes: if letter == WILDCARD: continue try: self.monomers[letter] except KeyError: return False return True
[docs] def checkValidWeights(self, weights): """ Validate the given weights :type weights: dict :param weights: Keys are one-letter codes, values are integer weights for that monomer in a random copolymer :rtype: bool :return: Whether all the one-letter codes in weights are valid """ for letter in self.monomers.keys(): if letter not in weights: weights[letter] = 1 codestring = "".join(weights.keys()) return self.checkValidOneLetterCodes(codestring)
[docs] def getCascadeGenerations(self): """ Get the number of cascade generations :rtype: int :return: The number of cascade generations (0 if none) """ if not self.cascader: return 0 return self.cascader.generations
[docs] def getAllMoietyNames(self): """ Get the names of all Moieties - initiator, terminator, monomers, etc. :rtype: list :return: A list of all moiety names. Monomers first, then initiator, terminator and cascader (if defined) """ names = [x.name for x in self.all_moieties] return names