Source code for schrodinger.application.matsci.amorphous

"""
Builder classes for making amorphous cells

Copyright Schrodinger, LLC. All rights reserved.
"""

# Contributor: Dave Geisen, Teng Zhang

import collections
import copy
import itertools
import logging
import math
import random
import string
import sys
from collections import OrderedDict
from collections import defaultdict
from collections import namedtuple
from past.utils import old_div

import networkx
import numpy
import psutil
import scipy.constants as constants
import scipy.spatial as spatial
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform

import schrodinger
from schrodinger import structure
from schrodinger.application.desmond.bennett import BOLTZMANN
from schrodinger.application.matsci import buildcomplex
from schrodinger.application.matsci import clusterstruct
from schrodinger.application.matsci import coarsegrain
from schrodinger.application.matsci import desmondutils
from schrodinger.application.matsci import graph
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import msprops
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import textlogger
from schrodinger.application.matsci.nano import xtal
from schrodinger.forcefield import common
from schrodinger.infra import fast3d
from schrodinger.infra import mm
from schrodinger.infra import mmbitset
from schrodinger.infra import mmlist
from schrodinger.infra import structure as infrastructure
from schrodinger.job import jobcontrol
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import ringspear
from schrodinger.structutils import smiles
from schrodinger.structutils import standard_nuclear_orientation
from schrodinger.structutils import transform

# Color constants
COLOR_BY_MOLECULE = 'molecule'
GREEN = 'green'
RED = 'red'
BLUE = 'user27'
YELLOW = 'user16'
CYAN = 'user4'
ORANGE = 'orange'
PINK = 'pink'
DARK_GREEN = 'user3'
MAGENTA = 'purple'
GREY = 'gray'
SEAFOAM = 'userM'
OLIVE = 'olive'
LIME = 'user19'
DEEPRED = 'brown'
DEEPTEAL = 'user59'
LIGHTBLUE = 'blue9'
SLATE = 'blue14'
YELLOWORANGE = 'userT'
HOTPINK = 'userN'
COLORS = [
    GREEN, RED, BLUE, YELLOW, CYAN, ORANGE, PINK, DARK_GREEN, MAGENTA, GREY,
    SEAFOAM, OLIVE, LIME, DEEPRED, DEEPTEAL, LIGHTBLUE, SLATE, YELLOWORANGE,
    HOTPINK
]

TWO_PI = 2.0 * numpy.pi
OBEY_DENSITY = 'density'
OBEY_CLASH = 'clash'
OBEY_BOTH = 'both'

SCAFFOLD_ATOM_PROP = 'b_matsci_scaffold_atom'

OPLS2005 = mm.OPLS_NAME_F14
AMCELL_NO_SYSTEM_OUT = '-amcell.maegz'

INI = 'INI'
TRM = 'TRM'
CAS = 'CAS'
MON = 'A  '
WILDCARD = '*'
DIHEDRAL_NUM = 73
CHAIN_ID_PROP = 'i_matsci_polymer_chain_id'
COUPLER_ATOM_PROP = 'b_matsci_polymer_coupler_atom'
GROWER_ATOM_PROP = 'b_matsci_polymer_grower_atom'
# Groups together information needed when joining two fragments
AttachmentAtoms = namedtuple(
    'AttachmentAtoms', ['coupler', 'coupler_marker', 'grower', 'grow_marker'])

HEADFIRST = 'headfirst'
TAILFIRST = 'tailfirst'

# Tacticity constants
TACTICITY_ISO = 'Isotactic'
TACTICITY_SYN = 'Syndiotactic'
TACTICITY_ATAC = 'Atactic'

# Atom properties
HEAD_ATOM_PROP = msprops.HEAD_ATOM_PROP
TAIL_ATOM_PROP = msprops.TAIL_ATOM_PROP
HT_ATOM_PROPS = [HEAD_ATOM_PROP, TAIL_ATOM_PROP]
CASCADER_ATOM_PROP = 'b_matsci_polymer_cascader_atom'
CASCADER_MARKER_ATOM_PROP = 'b_matsci_polymer_cascader_marker_atom'
BRANCH_ATOM_PROP = 'b_matsci_polymer_branch_atom'
BRANCH_PCT_ATOM_PROP = 'r_matsci_polymer_branch_atom_pct'
BRANCH_GEN_ATOM_PROP = 'i_matsci_polymer_branch_atom_gen'
BRANCH_MAXGEN_ATOM_PROP = 'r_matsci_polymer_branch_atom_maxgen'
CHIRAL_BB_ATOM_PROP = 'b_matsci_polymer_chiral_bb_atom'
CHIRAL_NONE_MONOMER_PROP = 'b_matsci_polymer_monomer_chiral_none'
CARB_ATOM_IDENTIFIER_PROP = 's_matsci_carbohydrate_atom_identifier'
MARKER_START_PROP = 'b_matsci_polymer_marker_start'
MONOSACC_PROP = 's_matsci_polymer_monosaccharide_type'

# Role properties
MOIETY_PROP = 's_matsci_polymer_gui_moiety'
# Properties for initiators/terminators/cascaders/monomers for communication
# between GUI and backend
# Comma-delimited list of marker atom numbers
MARKER_PROP = 's_matsci_polymer_gui_marker_list'
# Set to one of the tacticity constants
TACTICITY_PROP = 's_matsci_polymer_gui_tacticity'
# The % branching probability for branching monomers
BRANCH_PCT_PROP = 'r_matsci_polymer_gui_branch_pct'
# The max number of branching genertions
BRANCH_MAX_PROP = 'i_matsci_polymer_gui_branch_max'
# Whether the backbone should be all trans
BBTRANS_PROP = 'b_matsci_polymer_gui_bb_trans'
# Number of cascade generations
CASCADE_GEN_PROP = 'i_matsci_polymer_gui_cascade_gen'
MONOMER_ORIG_IDX_PROP2 = 'i_matsci_polymer_monomer_orig_atom_idx2'
INITIATOR = 'initiator'
TERMINATOR = 'terminator'
CASCADER = 'cascader'
MONOMERX = 'monomer_x'
RIGID_BODY_PROP = 'b_matsci_rigid_body'
DEFAULT_VDW_SCALE = 0.5

# Chirality
R_S = [msprops.CHIRAL_R, msprops.CHIRAL_S]
PROP_BY_CHIRALITY = OrderedDict()
PROP_BY_CHIRALITY[msprops.CHIRAL_R] = msprops.CHIRAL_R_MONOMER_PROP
PROP_BY_CHIRALITY[msprops.CHIRAL_S] = msprops.CHIRAL_S_MONOMER_PROP
PROP_BY_CHIRALITY[None] = CHIRAL_NONE_MONOMER_PROP
ATOM_PROP_MARKER = [
    HEAD_ATOM_PROP, TAIL_ATOM_PROP, CASCADER_ATOM_PROP, BRANCH_ATOM_PROP
]
UNIQUE_ATOM_PROP = coarsegrain.CG_UNIQUE_PARTICLE_LABEL_KEY
ATOM_PROP_MARKER_IDX = list(zip(ATOM_PROP_MARKER, [0, 1, 2, 2]))
ORIG_ATOM_IDX_PROP = 'i_matsci_orig_atom_idx'
MAX_HIT_NUM = 50

BOLTZMANN_DIS = 'Boltzmann at'
UNIFORM_DIS = 'Uniform'
MAX_VDW_SCALE = 0.5

ConnectionInfo = namedtuple('ConnectionInfo',
                            ['missed_atom_ids', 'connecting_atom_ids'])

IMMERSED = 'immersed'
CONTAINER = 'container'
INTERFACE = 'interface'

# Convert AMU to kg, then kg to g (*1000) and A3 to CM3
AMU_TO_KG = constants.physical_constants['atomic mass constant'][0]
AMU_PER_A3_TO_G_PER_CM3 = AMU_TO_KG * 1000 * (1e8)**3

logger = None


[docs]def log_debug(msg):
    """
    Add a message to debug logger file.

    :type msg: str
    :param msg: The message to debug logging
    """

    global logger
    if logger is None:
        logger = textlogger.create_debug_logger(__name__)
    logger.debug(msg)


def _add_head_and_tail_props_to_neighbors(struct, indexes):
    """
    Mark the two given atoms with the polymer role HEAD (first atom) and TAIL
    (second atom).

    :param `schrodinger.structure.Structure` struct: The structure to mark

    :param list indexes: [head atom index, tail atom index]

    :raises RuntimeError: If len(indexes) != 2
    """

    if len(indexes) != 2:
        raise RuntimeError('indexes must have length = 2')

    for index, role, capper_role in zip(
            indexes, (msprops.HEAD, msprops.TAIL),
        (msprops.HEAD_CAPPER, msprops.TAIL_CAPPER)):
        atom = struct.atom[index]
        atom.property[msprops.ROLE_PROP] = capper_role
        for neighbor in atom.bonded_atoms:
            neighbor.property[msprops.ROLE_PROP] = role


[docs]def has_head_tail_roles_marked(struct):
    """
    Check if a monomer is marked with the polymer head and tail role properties

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to check

    :rtype: bool
    :return: True if head and tail role atoms are marked, False if not
    """

    role_names = [
        msprops.HEAD, msprops.TAIL, msprops.HEAD_CAPPER, msprops.TAIL_CAPPER
    ]
    asl = ' or '.join([f'(atom.{msprops.ROLE_PROP} {x})' for x in role_names])
    role_indexes = analyze.evaluate_asl(struct, asl)
    if len(role_indexes) != 4:
        return False

    roles = [struct.atom[x].property[msprops.ROLE_PROP] for x in role_indexes]
    return all(roles.count(x) == 1 for x in role_names)


[docs]def mark_monomer_head_tail_ce_th(struct):
    """
    Find monomers marked with the Canvas method of marking head and tail - a Ce
    atom marks the head and tha Th atom marks the tail. Mark the Ce atom with
    the HEAD role and the Th atom with the TAIL role.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to mark

    :rtype: bool
    :return: True if head and tail role atoms were marked, False if not
    """

    ce_atoms = analyze.evaluate_asl(struct, 'atom.ele Ce and atom.att 1')
    th_atoms = analyze.evaluate_asl(struct, 'atom.ele Th and atom.att 1')
    if len(ce_atoms) != 1 or len(th_atoms) != 1:
        return False

    _add_head_and_tail_props_to_neighbors(struct, (ce_atoms[0], th_atoms[0]))
    return True


[docs]def mark_monomer_head_tail_dummy(struct):
    """
    Find monomers marked with the dummy atom method of marking head and tail -
    dummy atoms mark both the head and tail. In this case there is nothing to
    distinguish the two position, so mark the first dummy atom with the HEAD
    role and the second dummy atom with the TAIL role.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to mark

    :rtype: bool
    :return: True if head and tail role atoms were marked, False if not
    """

    # Note - can't use ASL for dummies, see SHARED-6642
    dummies = [
        x.index
        for x in struct.atom
        if x.atomic_number == -2 and x.bond_total == 1
    ]
    if len(dummies) != 2:
        return False

    _add_head_and_tail_props_to_neighbors(struct, dummies)
    return True


[docs]def mark_monomer_head_tail_rx(struct):
    """
    Find monomers marked with the polymer builder method of marking head and
    tail - the Rx atom property is used to identify atoms. The atom designated
    as R1 is marked as the HEAD role and the atom designated as R2 with the TAIL
    role.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to mark

    :rtype: bool
    :return: True if head and tail role atoms were marked, False if not
    """

    rx_atoms, max_x_val = buildcomplex.get_marker_atom_indexes_from_structure(
        struct)
    # There must be exactly two Rx values, 1 and 2
    if any((1 not in rx_atoms, 2 not in rx_atoms, len(rx_atoms) != 2)):
        return False
    # There must be only one atom of each type
    for atoms in rx_atoms.values():
        if len(atoms) != 1:
            return False
    indexes = [rx_atoms[x][0] for x in (1, 2)]
    _add_head_and_tail_props_to_neighbors(struct, indexes)
    return True


[docs]def mark_monomer_head_and_tail(struct):
    """
    Find monomers marked with a variety of marking head and tail atoms and mark
    them with the polymer role property. Note that the atoms so marked will be
    the atoms to remove when building a polymer - the disposable H, dummy or
    other atom attached to the heavy atom that remains in the monomer when the
    polymer is built.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to mark

    :rtype: bool
    :return: True if head and tail role atoms were marked, False if not
    """

    return (has_head_tail_roles_marked(struct) or
            mark_monomer_head_tail_ce_th(struct) or
            mark_monomer_head_tail_rx(struct) or
            mark_monomer_head_tail_dummy(struct))


[docs]def mark_orig_atom_idx(struct):
    """
    Mark the original atom indexes as atom property.

    :type struct: 'schrodinger.structure.Structure'
    :param struct: each atom in this structure will be marked with atom index
        as original atom index property.
    """
    for atom in struct.atom:
        atom.property[ORIG_ATOM_IDX_PROP] = atom.index


[docs]def get_orig_atom_idx(struct):
    """
    Return the original atom indexes.

    :type struct: 'schrodinger.structure.Structure'
        or any 'schrodinger.structure._AtomCollection'
    :param struct: the structure to get original atom indexes.

    :rtype: list of int
    :return: the original atom indexes of this atom collection.
    """

    return [atom.property[ORIG_ATOM_IDX_PROP] for atom in struct.atom]


[docs]def get_missed_atom_id_groups(struct, atom_ids):
    """
    Return the missed atom indexed grouped by bond connectivity.

    :type struct: 'schrodinger.structure.Structure'
    :param struct: the structure to get missed atom indexes groups

    :type atom_ids: list of int
    :param atom_ids: each int is an atom index of a missing atom in the
        polymerfragment.

    :rtype: list of list
    :return: such sublist contains atom indexes that are bonded.
    """

    struct = struct.copy()
    mark_orig_atom_idx(struct)
    sub_struct = struct.extract(atom_ids)
    return [get_orig_atom_idx(mol) for mol in sub_struct.molecule]


[docs]def get_density(struct):
    """
    Calculate the density of the struct.

    :type struct: `schrodinger.structure.Structure`
    :param struct: A structure with chorus_properties

    :rtype: float
    :return: The density of the struct in unit g/cm^3
    """

    chorus_properties = xtal.get_chorus_properties(struct)
    params = xtal.get_params_from_chorus(chorus_properties)
    volume = xtal.get_volume_from_params(params)
    return AMU_PER_A3_TO_G_PER_CM3 * struct.total_weight / volume


[docs]def get_maximum_vdw_radius(struct):
    """
    Find the maximum VDW radius of any atom in the structure

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to check

    :rtype: float
    :return: The maximum VDW radius of any atom
    """

    maxrad = 0.0
    for atom in struct.atom:
        maxrad = max(maxrad, atom.radius)
    return maxrad


[docs]def get_color(index):
    """
    Get the next color in our color rotation

    :type index: int
    :param index: The index in the color rotation to get. If index is greater
        than the number of colors, we just start over

    :rtype: str
    :return: A color as understood by the
        `schrodinger.structure._StructureAtom.color` property
    """

    true_index = index % len(COLORS)
    return COLORS[true_index]


[docs]def random_rotation(struct,
                    centroid=None,
                    max_rotation=TWO_PI,
                    no_rotation=False,
                    rotate_velocities=False):
    """
    Randomly rotate struct in 3 dimensions

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to rotate

    :type centroid: 3-element numpy array
    :param centroid: the rotation center [x, y, z]

    :type max_rotation: float
    :param max_rotation: The maximum rotation to perform

    :param bool no_rotation: If True, do not rotate the structure

    :param bool rotate_velocities: If True, besides the structure, also rotate
        FFIO atom velocities (if present)

    :rtype: list, list
    :return: Centroid and rotation matrix
    """

    # Make sure the centroid is at the origin
    if centroid is None:
        centroid = transform.get_centroid(struct)[:3]
    struct.setXYZ(struct.getXYZ() - centroid)

    if no_rotation:
        matrix = numpy.eye(4).tolist()
    else:
        # Obtain a random vector to rotate around and the amount to rotate
        vec = [random.uniform(-0.5, 0.5) for x in range(3)]
        nvec = transform.get_normalized_vector(numpy.array(vec))
        angle = random.uniform(0.0, max_rotation)
        # Get the rotation matrix and perform the rotation
        matrix = transform.get_rotation_matrix(nvec, angle)
        transform.transform_structure(struct, matrix)

        if rotate_velocities:
            for atom in struct.atom:
                vel = msutils.get_atom_ffio_velocity(atom)
                vel = transform.transform_atom_coordinates(vel, matrix)
                msutils.set_atom_ffio_velocity(atom, vel)

    # Move the structure back to its original centroid
    struct.setXYZ(struct.getXYZ() + centroid)

    return centroid, matrix


[docs]def prepare_building_block(struct, index, recolor=True, preserve_res=False):
    """
    Do some prepwork on a component before adding them to the disordered cell

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to modify

    :type index: int
    :param index: The index of this structure in the list of components

    :type recolor: bool
    :param recolor: Whether to color the molecule a single color

    :type preserve_res: bool
    :param preserve_res: Whether to preserve the current residue information
    """

    # Set properties of each structure to make it easy to distinguish
    # the components in the full cell
    if recolor:
        color = get_color(index - 1)
    # Ensure we stay within 4 character pdbres values (MATSCI-769)
    if preserve_res:
        set_pdbres = False
    else:
        new_pdbres = 'M%d' % index
        new_pdbres = new_pdbres.ljust(4)
        old_pdbres = struct.atom[1].pdbres.strip()
        # Define->Molecule->Molecule type needs single residue (MATSCI-6087)
        set_pdbres = not old_pdbres or old_pdbres == 'UNK' or len(
            struct.residue) != 1
    for atom in struct.atom:
        if recolor:
            atom.color = color
        if set_pdbres:
            atom.pdbres = new_pdbres
            atom.resnum = 1


[docs]def detect_custom_mmffld_data(structs):
    """
    Detect if a custom FEP charge block exists in the structure

    :param list structs: List of `Structure` objects to check

    :rtype: bool
    :return: Whether a custom FEP charge block exists in any of the strucures
    """

    has_data = False
    for struct in structs:
        # I'm not sure the attempt to use get_or_open_additional_data is needed
        # in this case - in all tested cases when reading a structure from a
        # file like we do here the get_unrequested call works fine, but this is
        # the pattern used in other places so I'm keeping it here for safety
        # until we better understand this.
        try:
            handle = mm.mmct_ct_m2io_get_unrequested_handle(struct)
        except:
            try:
                handle = mm.mmct_ct_get_or_open_additional_data(struct, True)
            except:
                handle = None

        if handle:
            has_data = bool(
                mm.m2io_get_number_blocks(handle, 'mmffld_custom_virt'))
        if has_data:
            break
    return has_data


[docs]def get_generic_end_structure(is_coarse_grain=False):
    """
        Get a single atom structure that can be used as a terminal
        (initiator/terminator) moiety.

        :type is_coarse_grain: bool
        :param is_coarse_grain: whether the structure is coarse grain

        :rtype: `schrodinger.structure.Structure`
        :return: Structure of one single H or C atom with Br marker.
        """

    struct = structure.create_new_structure()
    struct.addAtom('C' if is_coarse_grain else 'H', 0, 0, 0)
    struct.addAtom('Br', 1, 0, 0)
    struct.addBond(1, 2, 1)
    struct.property[MARKER_PROP] = '2'
    # Without this line the H/C gets an "unknown atom type" color
    buildcomplex.transmute_atom(struct.atom[1], struct.atom[1].element)
    return struct


[docs]def dispersity(structs):
    """
    Return dispersity of the structures.

    Notes with Einstein notation (duplicated index means summation):
    Mn = NiMi/sum(Ni); Mw = NiMiMi/NiMi
    Mw/Mn = sum(Ni) * NiMiMi / (NiMi)^2
    When Ni = 1, Mw/Mn = len(Mi) * MiMi /(sum(Mi))^2

    :param structs: the list of structures to calculate dispersity.
    :type structs: list of structure.Structure

    :return: dispersity over all structures
    :rtype: float
    """

    mols = [mol for st in structs for mol in st.molecule]
    mws = [mol.extractStructure().total_weight for mol in mols]
    mws_squared = numpy.power(mws, 2)
    return len(mws) * numpy.sum(mws_squared) / numpy.power(numpy.sum(mws), 2)


[docs]class FlorySchulzDistribution:
    """
    Molecular weight distribution in linear step-growth homo polymers (single
    monomer type or equimolar quantities for two types of monomers).
    Collected and published by Polymer Properties Database (polymerdatabase.com)

    Refs:
    P. J. Flory, Journal of the American Chemical Society, 58, 1877-1885 (1936)
    Wallace H. Carothers, Trans. Faraday Soc., Vol. 32, pp 39-49 (1936)
    Paul L. Flory, Principles of Polymer Chemistry, Ithaca, New york, 1953
    Paul J. Flory, Chem. Rev., Vol. 39, No. 1, 137 (1946)

    Story (paraphrase):
    Monomer conversion (p) is reactivated monomers over all initial monomers
    An oligomer containing x repeat units must have undergone x-1 reactions
    """

[docs]    def __init__(self, conversion=0.5, initial_num=1000, xmer=5):
        """
        :param conversion: reactivated monomers over all initial monomers
        :type conversion: float in [0, 1)

        :param initial_num: the initial number of monomers
        :type initial_num: positive int

        :param xmer: repeat unit number in a chain
        :type xmer: positive int
        """

        self.conversion = conversion
        self.xmer = xmer
        self.initial_num = initial_num

[docs]    def polydispersity(self):
        """
        Carothers equation in step-growth polymerization, proposed by Wallace
        Carothers, who invented nylon in 1935.

        Conditions: two monomers in equimolar quantities
        Ref: https://en.wikipedia.org/wiki/Carothers_equation

        :return: polydispersity according to Carothers equation for two monomers
            in equimolar quantities
        :rtype: float
        """

        return 1 + self.conversion

[docs]    def xmerProbability(self):
        """
        The number probability of x-mer at the specific conversion.

        :return: The probability of a chain is x-mer long
        :rtype: float
        """

        return (1 - self.conversion) * pow(self.conversion, self.xmer - 1)

[docs]    def moleculeNum(self):
        """
        The number of molecules (polymer, oligomer, and monomers).

        :return: the total number of molecules at the specific conversion.
        :rtype: float
        """

        return self.initial_num * (1 - self.conversion)

[docs]    def degreeOfPolymerization(self):
        """
        Degree of polymerization at the specific conversion.

        :return: Degree of polymerization at the specific conversion.
        :rtype: float
        """

        return 1 / (1 - self.conversion)

[docs]    def setConversion(self, deg_of_polym=2):
        """
        Set the conversion based on degree of polymerization.

        :param dp: Degree of polymerization
        :type dp: int
        """

        self.conversion = 1 - 1 / deg_of_polym

[docs]    def xmerNum(self):
        """
        The number of molecules of x-mer length at the specific conversion.

        :return: number of molecules
        :rtype: float
        """

        return self.moleculeNum() * self.xmerProbability()

[docs]    def numberDistribution(self, mer_lim=None):
        """
        The number distribution: monomer length vs number probability.

        :param mer_lim: the lower and upper limits of x-mers
        :type mer_lim: None or a tuple of two int

        :return: monomer length, probability
        :rtype: 2d numpy array
        """

        if mer_lim is None:
            mer_lim = (1, 1000000)
        mer_start, mer_end = list(map(int, mer_lim))
        mer_num = mer_end - mer_start + 1
        xmers = numpy.linspace(mer_start, mer_end, num=mer_num)
        probs = (1 - self.conversion) * numpy.power(self.conversion, xmers - 1)

        return numpy.vstack((xmers, probs)).T

[docs]    def weightDistribution(self, mer_lim=None):
        """
        The weight distribution: monomer length vs length-weighted probability.

        :param mer_lim: the lower and upper limits of x-mers
        :type mer_lim: None or a tuple of two int

        :return: monomer length, length-weighted probability
        :rtype: 2d numpy array
        """

        xmer_prop = self.numberDistribution(mer_lim=mer_lim)
        xmer_prop[:, 1] *= xmer_prop[:, 0]
        xmer_prop[:, 1] *= (1 - self.conversion)
        return xmer_prop


[docs]class Box(object):
    """
    Class that defines the region a new structure may be placed in
    """

    X, Y, Z = list(range(3))
    AXES = [X, Y, Z]

[docs]    def __init__(self, vertices):
        """
        Create a box object

        :type vertices: list
        :param vertices: A six item list that defines the min and max value of
            X, Y and Z. The six values in order are: Xmin, Xmax, Ymin, Ymax, Zmin,
            Zmax
        """

        self.vertices = vertices

[docs]    def getMinMax(self, axis):
        """
        Get the min and max values for axis

        :type axis: int
        :param axis: One of the class constants X, Y or Z

        :rtype: (float, float)
        :return: The min and max value for the requested axis
        """

        return self.vertices[axis * 2], self.vertices[axis * 2 + 1]

[docs]    def getLargestSpan(self):
        """
        Get the largest span of the box in the X, Y or Z direction

        :rtype: float
        :return: The largest span (delta between the min and max value)
        """

        spans = []
        for axis in self.AXES:
            amin, amax = self.getMinMax(axis)
            spans.append(amax - amin)
        return max(spans)

[docs]    def getCentroid(self):
        """
        Get the centroid of the box

        :rtype: `numpy.array`
        :return: The centoid of the box - a numpy array of 3 items (x, y, z)
        """

        centroid = []
        for axis in self.AXES:
            amin, amax = self.getMinMax(axis)
            centroid.append(old_div((amin + amax), 2.))
        return numpy.array(centroid)

[docs]    def getPointInBox(self):
        """
        Get a point in a random position in the box

        :rtype: list
        :return: [x, y, z] coordinates of a random point in the box
        """

        return [random.uniform(*self.getMinMax(a)) for a in self.AXES]

[docs]    def isPointInBox(self, point):
        """
        Is the given point inside the box

        :type point: iterable
        :param point: An iterable of length 3 floats that gives the x, y and z
            values of the point in question

        :rtype: bool
        :return: Whether the point falls within the box
        """

        for axis in self.AXES:
            amin, amax = self.getMinMax(axis)
            if not amin <= point[axis] < amax:
                return False
        return True

[docs]    def getTranslationToBox(self, point):
        """
        Return a vector that translates the mirror image of a point inside the
        box

        :type point: iterable
        :param point: An iterable of length 3 floats that gives the x, y and z
            values of the point in question

        :rtype: list
        :return: A list of 3 floats that gives the x, y and z coordinates of the
            mirror image of point that lies within the box
        """

        newpoint = []
        for axis in self.AXES:
            amin, amax = self.getMinMax(axis)
            width = amax - amin
            value = point[axis]
            while value >= amax:
                value = value - width
            while value < amin:
                value = value + width
            newpoint.append(value)
        return [b - a for a, b in zip(point, newpoint)]

[docs]    def isValidPoint(self, point):
        """
        Check if point is a valid point - the default implementation just
        returns True. Subclasses may use this to weed out points that violate
        custom box conditions
        """

        return True


[docs]class BoxWithInnerHull(Box):
    """
    A cubic Box object that contains an inner non-cubic region that limits the
    valid volume for the components to be placed in.
    """

[docs]    def __init__(self, hull, *args, **kwargs):
        """
        Create a BoxWithInnerHull object

        :type hull: `scipy.spatial.Delaunay`
        :param hull: The convex hull defining the region of space the components
            are allowed to be placed in

        See parent class for additional documentation
        """

        Box.__init__(self, *args, **kwargs)
        self.hull = hull

[docs]    def isPointInBox(self, point, *args):
        """
        Overrides the parent method to check to see if the given point is also
        inside the hull

        :type point: iterable
        :param point: An iterable of length 3 floats that gives the x, y and z
            values of the point in question

        :rtype: bool
        :return: Whether the point falls within the box
        """

        raw_check = Box.isPointInBox(self, point, *args)
        return raw_check and self.isPointInHull(point)

[docs]    def isPointInHull(self, point):
        """
        Check to see if the point is inside the hull

        :type point: iterable
        :param point: An iterable of length 3 floats that gives the x, y and z
            values of the point in question

        :rtype: bool
        :return: Whether the point falls within the hull
        """

        # This is how you check if a point is inside the complex hull
        return self.hull.find_simplex(point) >= 0.0

[docs]    def getPointInBox(self, max_attempts=1000):
        """
        Overrides the parent class to get a point that is inside the hull rather
        than just in the box

        :type max_attempts: int
        :param max_attempts: Maximum number of attempts to try to find a point
            that is both inside the box and the hull

        :rtype: list
        :return: List of three floats that give the xyz coordinates of a point
            in the hull

        :raise ScaffoldError: If a point can't be found inside the hull
        """

        for ptry in range(max_attempts):
            point = Box.getPointInBox(self)
            if self.isPointInHull(point):
                return point
        raise ScaffoldError('Unable to find points inside the substrate '
                            'container')

[docs]    def isValidPoint(self, point):
        """
        Overrides the parent method to check to make sure the point is inside
        the hull

        :type point: iterable
        :param point: An iterable of length 3 floats that gives the x, y and z
            values of the point in question

        :rtype: bool
        :return: Whether the point is inside the hull
        """

        return self.isPointInHull(point)


[docs]class ScaffoldError(ValueError):
    """
    Class for errors involving the scaffold input
    """


[docs]class Scaffold(object):
    """
    A Scaffold is a structure that occupies space in a cluster of molecules but
    is not considered part of the system itself. Scaffolds could be: 1) A
    nanoparticle that has an amorphous system surrounding it or a protein
    surrounded by a box of water molecules, 2) A container such as a zeolyte or
    nanotube that holds an amorphous system within it, 3) a surface upon which a
    disordered system is laid down.
    """

[docs]    def __init__(self, struct=None, volume=None):
        """
        Create a Scaffold object

        :type struct: `schrodinger.structure.Structure` or None
        :param struct: The scaffold structure, or None if there is no scaffold
            for the cell.
        """

        self.scaffold = struct
        if volume is None:
            self.volume = self.approximateVolume(self.scaffold)
        else:
            self.volume = volume
        if self.scaffold:
            self.centroid = transform.get_centroid(self.scaffold)[:3]

            # Mark scaffold atoms
            for atom in self.scaffold.atom:
                atom.property[SCAFFOLD_ATOM_PROP] = True
            self.has_rings = bool(self.scaffold.find_rings())
        else:
            self.centroid = [0, 0, 0]
            self.has_rings = False
        self.box = None

    def __bool__(self):
        """
        Causes scaffold objects to evaluate to False if the scaffold structure
        evaluates to False

        :rtype: bool
        :return: A boolean cast of the scaffold property
        """

        return bool(self.scaffold)

[docs]    @staticmethod
    def approximateVolume(input_struct,
                          vdw_scale=1.0,
                          seed=None,
                          sphere_radius=None,
                          sphere_origin=None,
                          buffer_len=2,
                          return_ratio=False):
        """
        Computes the approximate volume of the scaffold molecule using a Monte
        Carlo sampling algorithm. The alogorithm:
        1) define a box or sphere that fully encloses the structure
        2) randomly pick a point inside the shape
        3) check if the point lies inside the VDW radius of an atom
        4) iterate over steps 2 & 3 a bunch
        5) The volume is shape_volume * fraction of points inside VDW radii

        :type vdw_scale: float
        :param vdw_scale: The VdW scale factor to apply to VdW radii when
            checking to see if a point is "inside" an atom

        :type seed: int or None
        :param seed: the seed for random, if None then random is not re-seeded

        :type sphere_radius: float
        :param sphere_radius: specifies to sample points in a sphere of this radius
            in Angstrom rather than the default which uses the smallest enclosing
            box

        :type sphere_origin: numpy.array
        :param sphere_origin: the origin in Angstrom of the sphere used if
            sampling points in a sphere

        :type buffer_len: float
        :param buffer_len: a shape buffer lengths in Angstrom

        :type return_ratio: bool
        :param return_ratio: whether to return the hit ratio as a percent

        :rtype: float or float, float
        :return: The approximate volume of a molecule in Angstrom^3 followed
            by the hit ratio if return_ratio is used

        :note: This is expensive, so is only done at class initialization. After
            that, call getExcludedVolume to obtain the cached volume.
        """

        if not input_struct:
            return 0.0

        struct = input_struct.copy()
        standard_nuclear_orientation.standard_nuclear_orientation(struct)

        # see MATSCI-7250 GO custom classes, where during STU tests
        # it was found that different results where obtained for different
        # numbers of processors, having to do with how the processes
        # inherit random, best to use a custom random
        if seed is not None:
            my_random = random.Random()
            my_random.seed(seed)
        else:
            my_random = random

        if sphere_radius is None:
            # Determine the box size
            mins = [min(struct.getXYZ()[:, x]) - buffer_len for x in range(3)]
            maxes = [max(struct.getXYZ()[:, x]) + buffer_len for x in range(3)]
            spans = [maxes[x] - mins[x] for x in range(3)]
            vol = spans[0] * spans[1] * spans[2]
            get_coords = lambda: numpy.array(
                [mins[x] + my_random.random() * spans[x] for x in range(3)])
        else:
            # Determine the sphere size
            sphere_radius += buffer_len
            vol = 4 * numpy.pi * sphere_radius**3 / 3
            mean = 0.
            std_dev = 1.
            if sphere_origin is None:
                sphere_origin = transform.get_centroid(struct)[:3]

            def get_coords():
                # taken from functionalize_nanoparticle_driver.py
                vec = numpy.array(
                    [my_random.gauss(mean, std_dev) for i in range(3)])
                vec = (sphere_radius * my_random.uniform(0, 1)**
                       (1. / 3.)) * transform.get_normalized_vector(vec)
                return vec + sphere_origin

        # Create a distance cell to find any atom within the max VDW radius of
        # a point
        maxrad = get_maximum_vdw_radius(struct)
        cell = mm.mmct_create_distance_cell(struct.handle, maxrad * vdw_scale)

        hits = 0
        numtries = max(300 * struct.atom_total, 20000)
        for tries in range(numtries):
            coords = get_coords().tolist()
            neighbors_mm = mm.mmct_query_distance_cell(cell, *coords)
            neighbors = mmlist._mmlist_to_pylist(neighbors_mm)
            mm.mmlist_delete(neighbors_mm)
            for neighbor in neighbors:
                atom = struct.atom[neighbor]
                dist2 = sum([(a - b)**2 for a, b in zip(coords, atom.xyz)])
                radius = atom.radius * vdw_scale
                if dist2 <= radius * radius:
                    hits += 1
                    break
        mm.mmct_delete_distance_cell(cell)
        ratio = hits / numtries
        if return_ratio:
            return vol * ratio, 100 * ratio
        else:
            return vol * ratio

[docs]    def getNewCell(self):
        """
        Return a new structure that contains the scaffold molecule. If there is
        scaffold structure, the returned structure is empty

        :rtype: `schrodinger.structure.Structure`
        :return: A structure object that is either empty or contains the
            scaffold structure, depending on whether a scaffold structure exists or
            not.
        """

        cell = structure.create_new_structure()
        if self.scaffold:
            cell.extend(self.scaffold)
        return cell

[docs]    def getExcludedVolume(self):
        """
        Get the amount of volume the scaffold occupies

        :rtype: bool
        :return: The volume computed by approximateVolume when this object was
            created.
        """

        return self.volume

[docs]    def defineBox(self, volume):
        """
        The default implementation is to define a cube with all sides equal that
        is centered on the scaffold centroid.

        :type volume: float
        :param volume: The volume of the desired cube

        :rtype: `Box`
        :return: The Box object for this scaffold
        """

        side = volume**(old_div(1, 3.0))
        if self.scaffold:
            halfside = old_div(side, 2.0)
            vertices = []
            for coord in self.centroid:
                vertices += [coord - halfside, coord + halfside]
        else:
            vertices = [0, side] * 3
        self.box = Box(vertices)
        return self.box

[docs]    def addPBCProperties(self, struct, volume):
        """
        Add periodic boundary condition properties to the structure. This method
        overwrites any existing periodic boundary condition properties,
        including Desmond chorus and PDB space group properties.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to add the properties to

        :type volume: float
        :param volume: The volume of the cubic periodic boundary condition
        """

        side = volume**(old_div(1, 3.0))
        aval = bval = cval = side
        self.addDesmondPBC(struct, side)
        self.addSpaceGroupPBC(struct, aval, bval, cval)

[docs]    def addDesmondPBC(self,
                      struct,
                      ax,
                      ay=0.0,
                      az=0.0,
                      bx=0.0,
                      by=None,
                      bz=0.0,
                      cx=0.0,
                      cy=0.0,
                      cz=None):
        """
        Add properties to the structure that define the periodic boundary
        condition in the way Desmond wants it defined.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to add the properties to

        :type ax: float
        :param ax: The value of the ax box property

        :type ay: float
        :param ay: The value of the ay box property. Defaults to 0.

        :type az: float
        :param az: The value of the az box property. Defaults to 0.

        :type bx: float
        :param bx: The value of the bx box property. Defaults to 0.

        :type by: float
        :param by: The value of the by box property. If not given, this value is
            set the same as ax.

        :type bz: float
        :param bz: The value of the bz box property. Defaults to 0.

        :type cx: float
        :param cx: The value of the cx box property. Defaults to 0.

        :type cy: float
        :param cy: The value of the cy box property. Defaults to 0.

        :type cz: float
        :param cz: The value of the cz box property. If not given, this value is
            set the same as ax.
        """

        desmondutils.store_chorus_box_props(struct,
                                            ax,
                                            ay=ay,
                                            az=az,
                                            bx=bx,
                                            by=by,
                                            bz=bz,
                                            cx=cx,
                                            cy=cy,
                                            cz=cz)

[docs]    def addSpaceGroupPBC(self,
                         struct,
                         aval,
                         bval,
                         cval,
                         alpha=90.,
                         beta=90.,
                         gamma=90.,
                         space_group='P 1'):
        """
        Add properties to the structure that define the periodic boundary
        condition in the way Crystal wants it defined.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to add the properties to

        :type side: float
        :param side: The length of one side of the cubic periodic boundary
            condition
        """

        xcl = xtal.Crystal
        struct.property[xcl.SPACE_GROUP_KEY] = space_group
        struct.property[xcl.A_KEY] = aval
        struct.property[xcl.B_KEY] = bval
        struct.property[xcl.C_KEY] = cval
        struct.property[xcl.ALPHA_KEY] = alpha
        struct.property[xcl.BETA_KEY] = beta
        struct.property[xcl.GAMMA_KEY] = gamma

[docs]    def getPBCMinMaxes(self):
        """
        Get the length of one side of the periodic boundary condition

        :type cell: `schrodinger.structure.Structure`
        :param cell: The current cell with structures that have already been
            placed

        :rtype: list
        :return: A six item list that defines the min and max value of
            X, Y and Z. The six values in order are: Xmin, Ymin, Zmin, Xmax, Ymax,
            Zmax. If not PBC box has been created, a list of all 0.0 is returned
        """

        if self.box:
            mins = []
            maxes = []
            for axis in self.box.AXES:
                amin, amax = self.box.getMinMax(axis)
                mins.append(amin)
                maxes.append(amax)
            return mins + maxes
        else:
            return [0.0] * 6

[docs]    def getPointInBox(self):
        """
        Get a point in a random position in the box

        :rtype: list
        :return: [x, y, z] coordinates of a random point in the box
        """

        return self.box.getPointInBox()


[docs]class BuilderWithClashDetection(object):
    """
    The base class for the amorphous builder classes
    """

[docs]    def __init__(self,
                 basename='cell',
                 backend=None,
                 logger=None,
                 color=None,
                 vdw_scale=1.0):
        """
        Create a Builder object

        :type basename: str
        :param basename: The base name for structure files created by this
            builder

        :type backend: `schrodinger.job.jobcontrol._Backend`
        :param backend: The job control backend we are running under, or None if
            not running under a backend

        :type logger: `logging.Logger`
        :param logger: The logger for this builder

        :type color: str or None
        :param color: Set to module constant COLOR_BY_MOLECULE to color the
            structures in the cell by molecule

        :type vdw_scale: float
        :param vdw_scale: Scale factor for VdW radii during clash detection
        """

        self.basename = basename
        self.backend = backend
        if self.backend is None:
            self.backend = jobcontrol.get_backend()
        self.logger = logger
        self.vdw_scale = vdw_scale
        self.color = color
        self.has_rings = True

[docs]    def log(self, msg, level=logging.INFO):
        """
        Log a message

        :type msg: str
        :param msg: The message to log

        :type level: `logging` constant
        :param level: A log level constant from the `logging` module such as
            INFO, WARNING, ERROR...
        """

        textlogger.log(self.logger, msg, level=level)

[docs]    def buildingBlocksHaveRings(self):
        """
        Override in subclasses to check if any of the building blocks have
        rings. If none do, it will be a waste of time to look for them in the
        larger structure.

        :rtype: bool
        :return: If any of the building blocks have rings
        """

        return self.has_rings

[docs]    def findRings(self, struct):
        """
        Find all the rings in the provided structure. Since ring-finding is
        expensive, its best to pre-calculate them once. Since the coordinates
        of the ring don't matter, we can use these rings over and over as long
        as the atom numbering doesn't change.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to find the rings for

        :rtype: list
        :return: List of `schrodinger.structure._Ring` objects in the given
            structure.
        """

        if self.buildingBlocksHaveRings():
            return list(struct.ring)
        else:
            return []

[docs]    def findRingSpears(self,
                       ring_struct,
                       spear_struct=None,
                       rings=None,
                       ring_based=True,
                       pbc=None):
        """
        Find all cases where a bond spears a ring

        :type ring_struct: `schrodinger.structure.Structure`
        :param ring_struct: The structure containing the rings

        :type spear_struct: `schrodinger.structure.Structure`
        :param spear_struct: The structure containing the atoms that might spear
            the rings. If not provided, ring_struct will be used.

        :type rings: list
        :param rings: Each item of the list is a `schrodinger.structure._Ring`
            object. This is the list returned by the findRings() method. If not
            provided, they will be calculated on the fly - which takes considerable
            time. If findRingSpears will be run more than once on the same structure
            (even if the geometry changes), the rings should be precalculated via
            findRings and passed in via this parameter.

        :type ring_based: bool
        :param ring_based: Whether the returned dictionary should contain keys
            that are atom indexes of the speared ring (True), or of the bond
            spearing the ring (False)

        :type pbc: None, infrastructure.PBC, or list
        :param pbc: If periodic boundary conditions should be used, provide
            either an infrastructure.PBC object or the parameters to construct one.
            Allowed constructors:
            * a, b, c : box lengths
            * a, b, c, alpha, beta, gamma box : box lengths and angles
            * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors

        :rtype: dict
        :return: If ring_based=True, keys are an atom index of one of the atoms
            in the speared ring, and values are the atom index of one of the atoms
            in the spearing bond. If ring_based=False, the keys/values are flipped.
        """

        if not rings:
            rings = self.findRings(ring_struct)
            if not rings:
                # No rings to spear
                return {}

        spears = ringspear.check_for_spears(ring_struct,
                                            spear_struct=spear_struct,
                                            rings=rings,
                                            first_only=False,
                                            pbc=pbc)

        # Convert the spears list to a list of clashes
        clashes = {}
        for spear in spears:
            spear_at = spear.spear_indexes[0]
            ring_at = spear.ring_indexes[0]
            if ring_based:
                clashes.setdefault(ring_at, []).append(spear_at)
            else:
                clashes.setdefault(spear_at, []).append(ring_at)
        return clashes

[docs]    def removeIgnoredClashes(self, all_clashes, ignored_clashes):
        """
        Get only those clashes that are not ignored.

        :type all_clashes: dict
        :param all_clashes: All found clashes

        :type ignored_clashes: dict
        :param ignored_clashes: Clashes that should be ignored

        :rtype: dict
        :return: Those clashes in all_clashes that are not in ignored_clashes.
            Keys are atom indexes, values are all the atom indexes that clash with
            that atom
        """

        used_clashes = {}
        for clashee, clashers in all_clashes.items():
            if clashee not in ignored_clashes:
                # All of this atom's clashes are not ignored
                used_clashes[clashee] = clashers
            else:
                # Find if any of this atom's clashes are not ignored
                raw_clashers = set(ignored_clashes[clashee])
                for clasher in clashers:
                    if clasher not in raw_clashers:
                        used_clashes.setdefault(clashee, []).append(clasher)
        return used_clashes

[docs]    def getInfraStructure(self, struct):
        """
        Get an infrastructure Structure object and an associated bitset
        struct.atom_total long

        :type struct: `schrodinger.structure.Structure`
        :param struct: The python module Structure object

        :rtype: tuple
        :return: First item of the tuple is a
            `schrodinger.infra.structure.Structure` object. Second item is a bitset
            that is struct.atom_total long
        """

        infra_struct = infrastructure.Structure_get_structure(struct)
        bitset = mmbitset.Bitset(size=struct.atom_total)
        bitset.fill()
        return infra_struct, bitset

[docs]    def getClashes(self,
                   struct1,
                   cutoff,
                   struct2=None,
                   pbc=None,
                   check_14=False):
        """
        Find clashes - either intrastructure if struct2 is None, or
        interstructure if struct2 is not None.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure for intrastructure clashes or the first
            structure for interstructure clashes

        :type cutoff: float
        :param cutoff: The cutoff for finding clashes. This value is multipied
            times the sum of the VDW radii of the two atoms. If the distance is less
            than this scaled VDW radii sum, a clash exists

        :type struct2: `schrodinger.structure.Structure`
        :param struct2: The second structure for interstructure clashes

        :type pbc: None, infrastructure.PBC, or list
        :param pbc: If periodic boundary conditions should be used, provide
            either an infrastructure.PBC object or the parameters to construct one.
            Allowed constructors:
            * a, b, c : box lengths
            * a, b, c, alpha, beta, gamma box : box lengths and angles
            * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors

        :type check_14: bool
        :param check_14: If False, the atom pairs separated by 3 covalent bonds
            are excluded for clash check.

        :rtype: dict
        :return: keys are atom indexes in struct1 (or struct2 if defined), and
            values are all the atom indexes in struct1 that clash with that atom
        """

        # Create any necessary periodic boundary condition
        if pbc and not isinstance(pbc, infrastructure.PBC):
            pbc = infrastructure.PBC(*pbc)

        # Set up the parameters for finding clashes
        hbonds = None
        salt_bridges = None
        cparams = infrastructure.ContactParams()
        cparams.setCutoff(cutoff)
        cparams.setShow1_4(check_14)

        # Convert to infrastructure Structures and find clashes
        infra_struct1, bitset1 = self.getInfraStructure(struct1)
        if struct2:
            infra_struct2, bitset2 = self.getInfraStructure(struct2)
            contacts = infrastructure.get_contacts(infra_struct1, bitset1,
                                                   infra_struct2, bitset2,
                                                   cparams, hbonds,
                                                   salt_bridges, pbc)
        else:
            contacts = infrastructure.get_contacts(infra_struct1, bitset1,
                                                   cparams, hbonds,
                                                   salt_bridges, pbc)

        # Convert the returned clashes to a dict of clashes
        all_clashes = {}
        for contact in contacts:
            atom1 = contact.getAtom1().getIndex()
            atom2 = contact.getAtom2().getIndex()
            all_clashes.setdefault(atom1, []).append(atom2)
        return all_clashes

[docs]    def checkForIntraStructureClashes(self,
                                      struct,
                                      scale=None,
                                      pbc=None,
                                      rings=None):
        """
        Check for any intrastructure clashes

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure for intrastructure clashes

        :type scale: float
        :param scale: The cutoff for finding clashes. This value is multipied
            times the sum of the VDW radii of the two atoms. If the distance is less
            than this scaled VDW radii sum, a clash exists

        :type pbc: None, infrastructure.PBC, or list
        :param pbc: If periodic boundary conditions should be used, provide
            either an infrastructure.PBC object or the parameters to construct one.
            Allowed constructors:
            * a, b, c : box lengths
            * a, b, c, alpha, beta, gamma box : box lengths and angles
            * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors

        :type rings: list
        :param rings: The precalculated rings returned by findRings() on the
            structure. If not supplied, they will be calculated on the fly.

        :rtype: dict
        :return: keys are atom indexes, and values are all the atom indexes
            that clash with that atom. A clash may come from two atoms too close
            together, or a ring that is speared by a bond.
        """

        if not scale:
            scale = self.vdw_scale
        # Get the close atom clashes
        clashes = self.getClashes(struct, scale, pbc=pbc)
        # Add in the ring spears
        ring_clashes = self.findRingSpears(struct, rings=rings, pbc=pbc)
        for at1, at2 in ring_clashes.items():
            clashes.setdefault(at1, []).extend(at2)
        return clashes

[docs]    @staticmethod
    def countClashes(clashes):
        """
        Count the total number of clashes

        :type clashes: dict
        :param clashes: keys are atom indexes, values are all the atom indexes
            that clash with that atom
        """

        return sum([len(x) for x in clashes.values()])

[docs]    def colorByMolecule(self, struct):
        """
        Color each molecule in struct a different color

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to color
        """

        for mol in struct.molecule:
            color = get_color(mol.number - 1)
            for atom in mol.atom:
                atom.color = color


[docs]def nearest_rotatable_bonds(struct,
                            rings,
                            backbone_atom_id,
                            side_atom_ids,
                            pre_atom_prop=BRANCH_ATOM_PROP,
                            atom_prop=HEAD_ATOM_PROP):
    """
    Search the side groups of one atom and find the 'first neighbor'
    rotatable bonds.

    :type struct: `schrodinger.structure.Structure`
    :param struct: structure to search bonds

    :type rings: list
    :param rings: List of ring atom index lists

    :type backbone_atom_id: int
    :param backbone_atom_id: the atom id of one backbone atom

    :type side_atom_ids: list
    :param side_atom_ids: list of atom ids of the side groups for the
        backbone_atom

    :type pre_atom_prop: str
    :param pre_atom_prop: the property to be set True for the atom in the
        rotatable bonds that is closer to the backbone_atom_id

    :type atom_prop: str
    :param atom_prop: the property to be set True for the atom in the
        rotatable bonds that is deeper in side group

    :rtype: list of tuple
    :return: each tuple is two atom indexes, indicating 'first neighbor'
        rotatable bonds in side groups for one atoms
    """

    bonds_to_break = []
    atom_ids_to_walk = [backbone_atom_id]
    side_atom_id_set = set(side_atom_ids)
    while atom_ids_to_walk:
        pre_atom = struct.atom[atom_ids_to_walk.pop(0)]
        for atom in pre_atom.bonded_atoms:
            if atom.index not in side_atom_id_set:
                continue
            if analyze.is_bond_rotatable(struct.getBond(pre_atom.index,
                                                        atom.index),
                                         rings=rings,
                                         allow_methyl=True):
                pre_atom.property[pre_atom_prop] = True
                atom.property[atom_prop] = True
                bonds_to_break.append(tuple([pre_atom.index, atom.index]))
            else:
                atom_ids_to_walk.append(atom.index)
            side_atom_id_set.remove(atom.index)

    return bonds_to_break


[docs]class BuilderGrowInCellMixin(object):
    """
    A mixin for classes that uses in-cell polymer grow method.
    """

[docs]    def setRingsForOneMol(self, new_init_frag, ringnum):
        """
        Create `_Ring` objects for each fragment in one molecule.

        :type new_init_frag: `PolymerFragment`
        :param new_init_frag: the first fragment containing initiator

        :type ringnum: int
        :param ringnum: the total number of rings of all molecules in cell
        """

        for frag in new_init_frag.fragments():
            if frag.rings_atom_ids:
                for ring_ids in frag.rings_atom_ids:
                    one_ring_struct = structure._Ring(self.cell, ringnum,
                                                      ring_ids, self.all_rings)
                    self.all_rings.append(one_ring_struct)
                    frag.rings.append(one_ring_struct)

[docs]    def placeOneIniInCell(self, frag, new_chain=True):
        """
        Place a new or dead chain fragment containing initiator into the cell.
        If new_chain=True, randomly pick a position in cell; make sure the position
        is away from pre-existing initiators; check contact of the newly added
        fragments against all pre-existing atoms. If contact exists, randomly pick
        anothor position.
        If new_chain=False, find a random position in the largest void in cell;
        check contact; if contact exists, loop over all the gridded positions in the
        largest void.

        :type frag: `PolymerFragment`
        :param frag: the first fragment containing initiator

        :type new_chain: bool
        :param new_chain: if True, the fragment is from a new chain; else, the
            fragment is from a dead chain.

        :rtype: bool
        :return: True, if the initiator fragment is successfully placed in cell
        """

        all_xyz = self.cell.getXYZ()
        self.cur_frag = frag
        self.cur_bitset = mmbitset.Bitset.from_list(self.cell.atom_total,
                                                    self.cur_frag.atom_ids)
        self.setDistanceCell()
        for new_xyz in self.getPosition(new_chain):
            copy_struct = frag.template_polymer.copy()
            random_rotation(copy_struct, centroid=numpy.array([0, 0, 0]))
            mol_xyz = copy_struct.getXYZ() + new_xyz
            all_xyz[frag.atom_gids_in_mol, :] = mol_xyz
            self.cell.setXYZ(all_xyz)
            if not self.hasClashes():
                # succeed to place the INI frag into cell
                return True
                # Centroids of INI frags are aways, but newly added INI
                # frag has clashes with pre-existing atoms
                # Shouldn't hit this at the beginning of grow_together,
                # unless very short polymer with very large INI
        else:
            # reach the max trials to place INI frag into cell
            return False

[docs]    def initiateMultiMols(self, frags, grid_mids, margins, indexes):
        """
        Place multiple initiators of the fragments into the cell.

        :type frags: list of `PolymerFragment`
        :param frags: the first fragments containing initiators

        :param grid_mids: middle points on a grid layout
        :type grid_mids: list

        :param margins: the margin between two middle points
        :type margins: list

        :param indexes: the available grid indexes
        :type indexes: list

        :return: the fragment failed to be placed
        :rtype: list of `PolymerFragment`
        """

        self.distance_cell = infrastructure.DistanceCell(
            self.cell.handle, self.max_distance, self.pbc)
        if any(x.rings for x in frags):
            self.ringspear_distance_cell = infrastructure.DistanceCell(
                self.cell.handle, ringspear.ROUGH_CUT_DISTANCE, self.pbc)

        # Rotate and move the molecules so that the initiators fit into the grids,
        # and then filter out the ones clashing with the previous structure.
        all_xyz = self.cell.getXYZ()
        xyzs = self.getPositions(len(frags), grid_mids, margins, indexes)
        failed_frags = []
        for frag, xyz in zip(frags, xyzs):
            copy_struct = frag.template_polymer.copy()
            random_rotation(copy_struct, centroid=numpy.array([0, 0, 0]))
            mol_xyz = copy_struct.getXYZ() + xyz
            all_xyz[frag.atom_gids_in_mol, :] = mol_xyz
            self.cell.setXYZ(all_xyz)
            self.cur_bitset = mmbitset.Bitset.from_list(self.cell.atom_total,
                                                        frag.atom_ids)
            self.cur_frag = frag
            if self.hasClashes():
                failed_frags.append(frag)
                continue

            self.bitSetOn(frag)
            for nfrag in frag.next_frags:
                nfrag.continue_grow = True
            if frag.next_frags:
                self.frags_by_mol.append([frag.next_frags[:], 0])
                continue
            self.cur_struct_in_cell += 1

        return failed_frags + frags[len(xyzs)::]

[docs]    def getGrids(self, diameter):
        """
        Get the grid layout related things.

        :param diameter: the diameter of the largest initiator
        :type diameter: float

        :return: grid points, margins in three dims, available grid indexes
        :rtype: dict, list, list
        """

        if self.scaffold.scaffold_mode == IMMERSED:
            # Without substrate, the scaffold box defines the space
            vecs = self.pbc.getBoxVectors()
            vrt = self.scaffold.box.vertices
            origin = numpy.array([vrt[0], vrt[2], vrt[4]])
            atoms_on = list(self.existing_atom_bitset)
            if atoms_on:
                # The align the grid centroid with the existing atom centroid
                # bitset starts from index 1, and getXYZ() needs indexes from 0
                gids = [i - 1 for i in atoms_on]
                origin += self.cell.getXYZ()[gids, :].mean(axis=0)
        elif self.scaffold.scaffold_mode == INTERFACE:
            # The edges of the hull parallel to the pbc.getBoxVectors() are
            # used to grid the void. The hull has vectors of the proper length but we
            # don't know which one corresponds to which PBC vector. So we find the
            # three hull vectors that are most closely parallel to the PBC vectors.
            pbc_vecs = [
                x / numpy.linalg.norm(x) for x in self.pbc.getBoxVectors()
            ]
            points = self.scaffold.box.hull.points
            hull_vecs = numpy.array(
                [points[i] - points[0] for i in range(1, len(points))])
            # Find the angles between each PBC vector and all hull vectors
            angles = [[
                abs(transform.get_angle_between_vectors(y, x))
                for y in hull_vecs
            ]
                      for x in pbc_vecs]
            # Find the hull vector that is most nearly parallel to each PBC vector
            mangles = [min(x) for x in angles]
            # Indexes of the hull vectors that are parallel to the PBC vectors
            mangle_idxs = [x.index(y) for x, y in zip(angles, mangles)]
            # Each vector has the length of the corresponding Hull vector and
            # the direction of the PBC vector
            vecs = [
                x * numpy.linalg.norm(hull_vecs[y])
                for x, y in zip(pbc_vecs, mangle_idxs)
            ]
            origin = points[0]

        lens = [numpy.linalg.norm(vec) for vec in vecs]
        grid_num = [math.floor(x / diameter) for x in lens]
        num = [i if i <= 2 else i - 1 for i in grid_num]
        indexes = tuple(
            itertools.product(*[[y + 0.5 for y in range(x)] for x in num]))
        pts = {
            x: origin + numpy.dot(numpy.array(x) / num, vecs) for x in indexes
        }
        margins = [(x - y * diameter) / y for x, y in zip(lens, num)]
        return pts, margins, indexes

[docs]    def getPositions(self, num, grid_mids, margins, indexes):
        """
        Get positions based on grid with randomness

        :param num: the number of position requested
        :type num: int

        :param grid_mids: the grid middle points
        :type grid_mids: dict

        :param margins: the margins for grid points in each dimension.
            Intentionally design this room so that initiators can translationally
            move away from the grid mid points.
        :type margins: list of float

        :param indexes: the available grid indexes
        :type indexes: list

        :return: list of random points
        :rtype: list
        """

        try:
            sel_indexes = random.sample(indexes, num)
        except ValueError:
            # The requested sample number exceeds the pool
            sel_indexes = indexes

        xyzs = []
        for index in sel_indexes:
            xyz = grid_mids[index]
            xyz += numpy.array([x * random.random() for x in margins])
            xyzs.append(xyz)
        return xyzs

[docs]    def growFragmentCell(self, density, avdw_scale):
        """
        Make a single attempt at building the cell by growing polymer fragments.

        :type density: float
        :param density: The density of the cell

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs

        :rtype: `schrodinger.structure.Structure` or None
        :return: The built cell, or None if an error occurred
        """

        self.initTangledMethod(density)
        self.setCellsForTangledChain()
        self.copyAllStructsIntoCell()
        self.preparePbcAndParams(avdw_scale)
        self.setSubstrateSpearRings()
        self.setVolGraphAndBitset()
        components = self.getAllComponents()
        multi_comp = len(components) > 1
        for ini_frags in components:
            if multi_comp and self.scaffold.scaffold_mode != CONTAINER:
                try:
                    self.setIniFrags(ini_frags)
                except InitiationError as err:
                    self.log(str(err))
                    return None
            else:
                try:
                    self.setIniFragsOnebyOne(ini_frags)
                except InitiationError as err:
                    self.log(str(err))
                    return None
            if self.grow_together and ini_frags != components[-1]:
                # Grow all molecules simultaneously:
                # continue to place INI frags into the cell until None left
                continue
            # Adjust structure of rest fragments in a molecule
            while self.frags_by_mol:
                # FIX ME: multi threads should help here due to
                # 1. Before chains meet each other, grow each chain in one thread,
                #    and then check clashes only between newly added atoms on sync
                # 2. When only a few chains cannot are left, grow the same single
                #    chain with different random numbers in each thread. If succeed
                #    by one thread, abandon other attempts in different threads
                for frags_and_trial_num in self.frags_by_mol:
                    (frags_in_one_mol, tried_per_mol) = frags_and_trial_num
                    self.cur_frag = frags_in_one_mol.pop(0)
                    self.cur_bitset = mmbitset.Bitset.from_list(
                        self.cell.atom_total, self.cur_frag.atom_ids)
                    self.cur_frag.hitted_num += 1
                    dihe_values = self.cur_frag.getDiheValues()
                    if len(dihe_values):
                        self.setDistanceCell()
                    for dihe_value in dihe_values:
                        self.cell.adjust(dihe_value, *self.cur_frag.dihedral)
                        if self.hasClashes():
                            # Continue to try another dihedral value
                            continue
                        self.cur_frag.dihe_value = dihe_value
                        break
                    else:
                        # Back move, as all dihedrals in frag lead to clashes
                        pre_frag = self.cur_frag.pre_frag
                        if pre_frag.is_branching:
                            # Track other branches and delete from pre-existing
                            self.deleteOtherBranches(pre_frag, frags_in_one_mol)
                        if not pre_frag.dihe_value:
                            # Dead chain: pre_frag is INI frag due to back move
                            tried_per_mol += 1
                            if self.relocateFailedMol(pre_frag,
                                                      frags_in_one_mol,
                                                      tried_per_mol):
                                frags_and_trial_num[1] = tried_per_mol
                                # Continue to try fragment in next molecule
                                continue
                            else:
                                self.log(
                                    f'Only {self.cur_struct_in_cell} molecules '
                                    f'succeeded. ({self.num_structs} requested)'
                                )
                                return None
                        # The previous dihedral of pre_frag won't be considered again
                        # why need if statement:
                        # 1. one dihe deleted from pre_frag pool due to first time back move
                        # 2. starting from the parent of pre_frag, pre_frag.dihe_value is set randomly
                        # 3. the second time performing back move on pre_frag, the randomly set dihe
                        # may not be in the pool
                        if pre_frag.dihe_value in pre_frag.dihe_pool:
                            pre_frag.dihe_pool.remove(pre_frag.dihe_value)
                        pre_frag.continue_grow = False
                        self.bitSetOff(self.existing_atom_bitset, pre_frag)
                        frags_in_one_mol.insert(0, pre_frag)
                        continue

                    self.bitSetOn(self.cur_frag)
                    for frag_of_same_generation in self.cur_frag.next_frags:
                        frag_of_same_generation.continue_grow = True
                        if frag_of_same_generation.in_sidegroup:
                            # insert as the first frag to be grown next time
                            frags_in_one_mol.insert(0, frag_of_same_generation)
                        else:
                            frags_in_one_mol.append(frag_of_same_generation)

                self.removeFinishedPolymer()

        self.log('Successfully grew %d structures' % self.num_structs)

        return self.cell

[docs]    def initTangledMethod(self, density):
        """
        Initialize some attributes for the tangled method.

        :param density: the density of the cell
        :type density: float
        """

        self.total_tried = 0
        self.cur_struct_in_cell = 0
        self.existing_rings = []
        self.existing_spear_rings = []
        self.pre_atom_ids = set()
        self.frags_by_mol = []
        num_per_chain = [x.num_struct for x in self.all_type_polymer_and_frags]
        self.num_structs = sum(num_per_chain)
        polymers = [x.template_polymer for x in self.all_type_polymer_and_frags]
        self.volume = self.getDesiredVolume(polymers,
                                            density,
                                            num_per_chain=num_per_chain)
        # Estimate the distance between initiators
        # Note: container may have inaccurate volume and self.r_per_ini_frag,
        # but self.r_per_ini_frag decreases automatically to solve the issue
        self.r_per_ini_frag = (self.volume / self.num_structs / 4 * 3 /
                               math.pi)**(1. / 3.)

[docs]    def setCellsForTangledChain(self):
        """
        Set cells for tangle chain method.
        """

        self.scaffold.defineBox(self.volume)
        self.cell = self.getNewCellStructure(self.volume)
        self.cell_template = self.cell.copy()
        self.ini_cell = self.cell_template.copy()

[docs]    def copyAllStructsIntoCell(self):
        """
        Copy all structures into the cell, but the bitsets are off.
        Deepcopy fragments.
        """

        self.all_mol_fragments = collections.defaultdict(list)
        self.existing_rings += [x for x in self.cell.ring]
        self.all_rings = self.existing_rings[:]
        total_ringnum = sum([
            one_type_polymer_and_frag.ringnum
            for one_type_polymer_and_frag in self.all_type_polymer_and_frags
        ])
        molnum = 0
        for atype_polym_frag in self.all_type_polymer_and_frags:
            for index in range(atype_polym_frag.num_struct):
                molnum += 1
                new_init_frag = atype_polym_frag.first_frag.deepCopy(molnum)
                new_init_frag.adjustAtomIds(self.cell.atom_total)
                self.all_mol_fragments[atype_polym_frag].append(new_init_frag)
                struct = atype_polym_frag.template_polymer.copy()
                self.cell.extend(struct)
                self.setRingsForOneMol(new_init_frag, total_ringnum)

[docs]    def preparePbcAndParams(self, avdw_scale):
        """
        Prepare pbc, contact parameters, max distance for distance cell.

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs
        """

        self.pbc = clusterstruct.create_pbc(self.cell)
        log_debug(f"PBC: {','.join(map(str, self.pbc.getBoxLengths()))}")
        self.cparams = infrastructure.ContactParams()
        self.cparams.setShow1_4(False)
        self.cparams.setCutoff(avdw_scale)
        max_radius = max(x.radius for x in self.cell.atom)
        self.max_distance = avdw_scale * max_radius * 2
        log_debug(f"Minimum Clash Distance: {self.max_distance}")

[docs]    def setSubstrateSpearRings(self):
        """
        Set Spear Rings for the substrate.
        """

        self.existing_spear_rings += [
            ringspear.SpearRing(self.cell, ring.getAtomIndices(), pbc=self.pbc)
            for ring in self.existing_rings
        ]
        self.is_cg = msutils.is_coarse_grain(self.cell, by_atom=True)

[docs]    def setVolGraphAndBitset(self):
        """
        Set volume graph and bitset.
        """

        self.vol_graph = XYZVolumeGraph(self.cell,
                                        spacing=2.0,
                                        scaffold=self.scaffold)
        self.existing_atom_bitset = mmbitset.Bitset(size=self.cell.atom_total)
        if not self.scaffold.scaffold:
            return
        for atom_id in range(1, self.scaffold.scaffold.atom_total + 1):
            self.existing_atom_bitset.set(atom_id)

[docs]    def setIniFragsOnebyOne(self, ini_frags):
        """
        Set the initiator fragments one by one.

        :param ini_frags: the initiators of some components to be placed into
            the cell and grow later.
        :type ini_frags: list of 'PolymerFragments'
        """

        if not ini_frags:
            return
        self.log(f"Placing {len(ini_frags)} initiators...")
        for ini_frag in ini_frags:
            if not self.placeOneIniInCell(ini_frag):
                raise InitiationError(
                    f'Only {self.cur_struct_in_cell} molecules '
                    'are successfully placed in cell, but '
                    f'{self.num_structs} are requested.')
            # Turn on the initiator bitset, set fragment state, and register to
            # the fragment pool to grow
            self.bitSetOn(ini_frag)
            for frag in ini_frag.next_frags:
                frag.continue_grow = True
            if ini_frag.next_frags:
                self.frags_by_mol.append([ini_frag.next_frags[:], 0])
                continue
            # Rigid body components finish growth after initiation
            self.cur_struct_in_cell += 1
            self.log('failed molecule attempt: %i    finished molecules: %i' %
                     (self.total_tried, self.cur_struct_in_cell))

[docs]    def setIniFrags(self, ini_frags):
        """
        Set the initiator fragments in batch mode.

        :param ini_frags: the initiators of some components to be placed into
            the cell and grow later.
        :type ini_frags: list of 'PolymerFragments'
        """

        if not ini_frags:
            return

        self.log(f"Placing {len(ini_frags)} initiators...")
        diameter = self.getIniDiameter(ini_frags)
        grid_mids, margins, indexes = self.getGrids(diameter)
        tries_per_mol = 0
        while ini_frags:
            failed_frags = self.initiateMultiMols(ini_frags, grid_mids, margins,
                                                  indexes)
            success_num = len(ini_frags) - len(failed_frags)
            if not success_num:
                tries_per_mol += 1
                if tries_per_mol > self.tries_per_mol:
                    raise InitiationError(
                        f'Only {self.cur_struct_in_cell} molecules '
                        'are successfully placed in cell, but '
                        f'{self.num_structs} are requested.')
            # Initiators placed into the cells, but some are not finished if
            # they need to grow but haven't
            self.log('failed molecule attempt: %i    finished molecules: %i' %
                     (tries_per_mol, self.cur_struct_in_cell))
            ini_frags = failed_frags

[docs]    def getIniDiameter(self, frags):
        """
        Get the largest diameters of the initiators by computing the longest
        span between any pairs of atoms.

        :param frags: list for fragments to be placed in
        :type frags: list of 'PolymerFragments'

        :return: the largest diameters of the initiators
        :rtype: float
        """

        dists = []
        for frag in frags:
            atoms = [self.cell.atom[x] for x in frag.atom_ids]
            atom_xyz = [x.xyz for x in atoms]
            dist = pdist(atom_xyz)
            sdist = squareform(dist)
            atom_id_pair = numpy.unravel_index(numpy.argmax(sdist), sdist.shape)
            radius_sum = sum([atoms[i].radius for i in atom_id_pair])
            max_dist = numpy.nanmax(dist) if len(dist) else 0.0
            mdist = self.cparams.getCutoff() * radius_sum + max_dist
            dists.append(mdist)

        return max(dists)

[docs]    def getAllComponents(self):
        """
        Get all components to be placed in the cell. Volume, fragment number,
        and replica number for each component are used to divide all component
        into several groups.

        :return: Large, soft, and fewer fragments first
        :rtype: list of 'PolymerFragment' lists
        """

        fragments = self.all_mol_fragments.values()
        if self.grow_together:
            return [list(itertools.chain.from_iterable(fragments)), []]

        counts = [len(x) for x in fragments]
        by_count = self.indexChanges(counts, reverse=True)
        mols = [x[0].template_polymer for x in fragments]
        radius_cubic = [sum([x.radius**3 for x in x.atom]) for x in mols]
        by_size = self.indexChanges(radius_cubic)
        frag_nums = [len(list(x[0].fragments())) for x in fragments]
        by_flex = self.indexChanges(frag_nums, threshold=20)
        by_all = [min(list(x)) for x in zip(by_count, by_size, by_flex)]
        components = [[] for x in range(max(by_all) + 1)]
        for index, fragments in zip(by_all, fragments):
            components[index].extend(fragments)

        return components

[docs]    @staticmethod
    def indexChanges(values, threshold=10., reverse=False):
        """
        Use the threshold as the ratio criterion to index the sudden change
        in the sorted sequence.

        :param values: input values to divide
        :type values: list of floats

        :param threshold: the ratio criterion to identify the sudden change
        :type threshold: float

        :param reverse: If True, smaller values are grouped with smaller indexes.
            Otherwise larger values are grouped with smaller indexes.
        :type reverse: bool

        :return: indexes are the group tokens
        :rtype: list of int
        """

        sorted_vals = sorted(values, reverse=True)
        ratios = [1.]
        ratios += [x / y for x, y in zip(sorted_vals[:-1], sorted_vals[1:])]
        index, ratio_indexes = 0, []
        for ratio in ratios:
            if ratio >= threshold:
                index += 1
            ratio_indexes.append(index)
        indexes = [ratio_indexes[sorted_vals.index(x)] for x in values]
        if not reverse:
            return indexes

        max_index = max(indexes)
        return [max_index - x for x in indexes]

[docs]    def setDistanceCell(self):
        """
        Set DistanceCell for the clash check of current fragment.
        """

        # Pre-existing atoms in distance_cell don't change position during
        # clash check for one certain fragment.
        self.distance_cell = infrastructure.DistanceCell(
            self.cell.handle, self.max_distance, self.pbc)
        if self.cur_frag.rings:
            # Ring spear uses a different cut distance
            self.ringspear_distance_cell = infrastructure.DistanceCell(
                self.cell.handle, ringspear.ROUGH_CUT_DISTANCE, self.pbc)

[docs]    def writeDihedralDis(self, file_name='tmp_dis.txt'):
        """
        Write the dihedral angle distribution of the structures in cell.
        """

        # TEST ONLY
        interval = old_div(360., (DIHEDRAL_NUM - 1))
        my_array = numpy.zeros((DIHEDRAL_NUM, 2))
        for idx in range(DIHEDRAL_NUM):
            my_array[idx, 0] = idx * interval
        for ini_frags in self.all_mol_fragments.values():
            for ini_frag in ini_frags:
                for frag in ini_frag.fragments(include_self=False):
                    dihe = self.cell.measure(*frag.dihedral) % 360
                    my_array[int(round(dihe, interval)), 1] += 1
        numpy.savetxt(file_name, my_array)

[docs]    def relocateFailedMol(self,
                          pre_frag,
                          frags_in_one_mol,
                          tried_per_mol,
                          undo_crystal_links=False):
        """
        Place one failed molecule back into the cell.

        :type pre_frag: {PolymerFragment}
        :param pre_frag: the ini fragment of one polymer

        :type frags_in_one_mol: list of {PolymerFragment}
        :param frags_in_one_mol: polymer fragment of this polymer
            within one polymer

        :type tried_per_mol: int
        :param tried_per_mol: failed trial number

        :type undo_crystal_links: bool
        :param undo_crystal_links: whether to unlink the chain from the crystals

        :rtype: bool
        :return: True, if successfully placed the ini fragment back
        """

        if tried_per_mol == self.tries_per_mol:
            # Reach the max trials for replacing one INT. Abort this cell
            self.log(
                'The growth of one molecule has failed %s times, reaching the '
                'the maximum attempts per molecule set by -tries_per_mol.' %
                self.tries_per_mol)
            return False
        self.total_tried += 1

        self.log('failed molecule attempt: %i    finished molecules: %i' %
                 (self.total_tried, self.cur_struct_in_cell))
        # temporarily turn off INT frag and prepare voids search
        self.bitSetOff(self.existing_atom_bitset, pre_frag)
        try:
            self.vol_graph.locateVoids(set(self.existing_atom_bitset))
        except VolumeMemoryError as msg:
            self.log(str(msg))
            sys.exit()

        if undo_crystal_links:
            self.undoCrystalLinks(pre_frag)
            self.resetOrigDihed(pre_frag)

        # Try to add the dead chain back to cell by rotation and relocate
        if self.placeOneIniInCell(pre_frag, new_chain=False):
            pre_frag.resetFragDihedralPool()
            for frag_of_same_generation in pre_frag.next_frags:
                frag_of_same_generation.continue_grow = True
                frags_in_one_mol.append(frag_of_same_generation)
            # Successfully relocate one dead molecule and continue
            self.bitSetOn(pre_frag)
            return True
        else:
            self.log('Failed to relocate dead chains.')
            return False

[docs]    def hasClashes(self):
        """
        Check whether current fragment has clashes and speared rings with
        pre-existing structure.

        :rtype: bool
        :return: True, if has clashes
        """

        if infrastructure.get_contacts(self.cell, self.cur_bitset,
                                       self.distance_cell,
                                       self.existing_atom_bitset, self.cparams):
            # Atoms in the fragment clash with pre-exsiting atoms in cell
            return True

        if self.cur_frag.rings:
            if ringspear.check_for_spears(self.cell,
                                          atoms=self.pre_atom_ids,
                                          rings=self.cur_frag.rings,
                                          pbc=self.pbc,
                                          cell=self.ringspear_distance_cell,
                                          return_bool=True,
                                          distorted=self.is_cg):
                # Pre-exsiting atoms spear rings in the fragment
                return True

        if self.existing_rings:
            # FIXME: shouldn't rebuild distance cell from scratch
            # Temporary fix: extract substructure
            # Add the bonds between fragments for ring spear check
            if self.cur_frag.dihedral:
                spear_struct = self.cell.extract(self.cur_frag.atom_ids +
                                                 [self.cur_frag.dihedral[2]])
            else:
                spear_struct = structure.Structure(
                    mm.mmct_ct_extract_atoms(self.cell.handle, self.cur_bitset),
                    True)
            if ringspear.check_for_spears(self.cell,
                                          spear_struct=spear_struct,
                                          rings=self.existing_rings,
                                          pbc=self.pbc,
                                          spear_rings=self.existing_spear_rings,
                                          return_bool=True,
                                          distorted=self.is_cg):
                # Atoms in the fragment spear rings in pre-exsiting atoms
                return True

        return False

[docs]    def removeFinishedPolymer(self):
        """
        Remove frags_by_mols that have no polymer fragments and update finished
        polymer number.
        """

        for frags_and_trial_num in reversed(self.frags_by_mol):
            if not frags_and_trial_num[0]:
                self.cur_struct_in_cell += 1
                self.log(
                    'failed molecule attempt: %i    finished molecules: %i' %
                    (self.total_tried, self.cur_struct_in_cell))
                self.frags_by_mol.remove(frags_and_trial_num)

[docs]    def bitSetOn(self, frag):
        """
        Set on the pre-existing atom bitset according to atom ids in the fragment;
        record rings and spear_rings according to rings in the fragment.

        :type frag: `PolymerFragment`
        :param frag: the polymer fragment providing the atom ids
        """

        for atom_id in frag.atom_ids:
            self.existing_atom_bitset.set(atom_id)
            if self.has_rings:
                self.pre_atom_ids.add(atom_id)
        self.existing_rings += frag.rings[:]
        frag.spear_rings = [
            ringspear.SpearRing(self.cell, ring.getAtomIndices(), pbc=self.pbc)
            for ring in frag.rings
        ]
        self.existing_spear_rings += frag.spear_rings[:]

[docs]    def bitSetOff(self, bitset, frag):
        """
        Set the bitset off according to the atom ids in the fragment;
        remove rings and spear_rings according to rings in the fragment.

        :type bitset: `Bitset`
        :param bitset: the bitset to set

        :type frag: `PolymerFragment`
        :param frag: the polymer fragment providing the atom ids
        """

        for atom_id in frag.atom_ids:
            bitset.unset(atom_id)
            if self.has_rings:
                self.pre_atom_ids.remove(atom_id)
        for ring in frag.rings:
            self.existing_rings.remove(ring)
        for spear_ring in frag.spear_rings:
            self.existing_spear_rings.remove(spear_ring)

[docs]    def getPosition(self, new_chain):
        """
        Find random positions in the cell according to the following rules.
        If new_chain=True, randomly pick a position in cell that is away from
        pre-existing initiators.
        If new_chain=False, randomly loop over all the gridded positions in the
        largest void in cell.

        :type new_chain: bool
        :param new_chain: if True, the fragment is from a new chain; else, the
            fragment is from a dead chain.

        :rtype: list
        :return: [x, y, z], a random position in cell
        """

        if new_chain:
            return self.getRandXYZ()
        else:
            return self.getRandGridXYZ()

[docs]    def getRandGridXYZ(self):
        """
        Get xyz iterator of random grid points in the largest void.

        :rtype: list
        :return: [x, y, z], a random position in cell
        """

        for xyz in self.vol_graph.getBuriedXYZ(self.vol_graph.getLargestVoid()):
            # Volume graph and the scaffold box define different origin positions
            # Shift xyz from volume graph into the scaffold box
            shift_vector = self.scaffold.box.getTranslationToBox(xyz)
            yield [b + a for a, b in zip(xyz, shift_vector)]

[docs]    def getRandXYZ(self, max_num=2000, max_failed_num=2000):
        """
        Return a random point in the space and no other initiators and scaffold
        within a pre-defined raduis.

        :type max_num: int
        :param max_num: max number of xyz position returned

        :type max_failed_num: int
        :param max_failed_num: max number of attemps when finding points
            self.r_per_ini_frag away from scaffold and other initiator atoms.

        :rtype: iterator of list
        :return: iterator of [x, y, z], a random position in cell
        """

        self.ini_cell.addAtom('Br', 0, 0, 0)
        added_atom_id = self.ini_cell.atom_total
        point_num = 0
        while point_num < max_num:
            near_point = True
            failed_num = 0
            while near_point:
                random_point = self.scaffold.getPointInBox()
                self.ini_cell.atom[added_atom_id].xyz = random_point
                near_point = False
                for atom_id in range(1, self.ini_cell.atom_total):
                    distance = self.ini_cell.measure(added_atom_id, atom_id)
                    if distance < self.r_per_ini_frag:
                        near_point = True
                        failed_num += 1
                        if failed_num > max_failed_num:
                            failed_num = 0
                            self.r_per_ini_frag *= 0.9
                        break
            point_num += 1
            yield random_point

[docs]    def deleteOtherBranches(self, pre_frag, frags_in_one_mol):
        """
        Remove the fragments from the next_frags pool and
        set off the bitset of moved fragments.

        :type pre_frag: `PolymerFragment`
        :param pre_frag: the branching parent polymer fragment

        :type frags_in_one_mol: list of `PolymerFragment`
        :param frags_in_one_mol: the pool of PolymerFragment to be grown
        """

        frags_to_remove = pre_frag.next_frags[:]
        frags_to_remove.remove(self.cur_frag)
        while frags_to_remove:
            frag_to_remove = frags_to_remove.pop(0)
            if self.existing_atom_bitset.get(frag_to_remove.atom_ids[0]):
                self.bitSetOff(self.existing_atom_bitset, frag_to_remove)
                frags_to_remove += frag_to_remove.next_frags[:]
            else:
                frags_in_one_mol.remove(frag_to_remove)

[docs]    def setPolymerFragments(self, vdw_scale):
        """
        Set polymer fragments for tangled chain method.

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs
        """

        # PolymerFragments order in this list is the order, according to which
        # the initiators of the structures are placed into the cell.
        # Rigid bodies are treated as huge initiators, and are first placed into
        # the cell as they can be large and won't grow bit by bit.
        self.all_type_polymer_and_frags = []
        self.all_type_polymer_and_frags += self.getPolymerFragments(
            vdw_scale, get_rigid=True)
        self.all_type_polymer_and_frags += self.getPolymerFragments(
            vdw_scale, get_rigid=False)

[docs]    def getPolymerFragments(self,
                            vdw_scale,
                            get_rigid=False,
                            restore_template=None):
        """
        Break polymers (if needed), prepare them, and return the corresponding
        PolymerFragments objects.

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs

        :type get_rigid: bool
        :param get_rigid: If True, pick those marked as rigid bodies. If False,
            skip those marked as rigid bodies.

        :type restore_template: bool or None
        :param restore_template: restore the template polymer to the original one.
            If None, debug mode determines

        :rtype: list of 'PolymerFragments'
        :return: Each item contains fragment information for one molecule.
        """

        polymer_and_frags = []
        for index, (struct,
                    num_struct) in enumerate(zip(self.structs,
                                                 self.composition)):
            is_rigid = bool(struct.property.get(RIGID_BODY_PROP))
            if get_rigid != is_rigid:
                # Request rigid body, but the struct is not marked as rigid, or
                # Doesn't Request rigid body, but the struct is marked as rigid
                continue
            # multiple molecules may present in one ws or pt entry
            for mol_idx, molecule in enumerate(struct.molecule):
                mol_struct = molecule.extractStructure(copy_props=True)
                indexing = '_'.join(map(str, [index, mol_idx]))
                polymFrags = PolymerFragments(mol_struct,
                                              num_struct,
                                              forcefield=self.forcefield,
                                              vdw_scale=vdw_scale,
                                              temperature=self.dihe_temperature,
                                              logger=self.logger,
                                              indexing=indexing)
                restore_template = not schrodinger.in_dev_env(
                ) if restore_template is None else restore_template
                polymFrags.breakIntoFragments(is_rigid=is_rigid,
                                              restore_template=restore_template)
                prepare_building_block(polymFrags.template_polymer,
                                       index,
                                       recolor=self.recolor,
                                       preserve_res=self.preserve_res)
                polymer_and_frags.append(polymFrags)
        return polymer_and_frags


[docs]class AmorphousCellBuilder(BuilderWithClashDetection, BuilderGrowInCellMixin):
    """
    Builder for an amorphous cell
    """

[docs]    def __init__(self,
                 scaffold=None,
                 system=True,
                 population=1,
                 composition=None,
                 density=0.5,
                 amorphous_vdw_scale=1.0,
                 obey=OBEY_DENSITY,
                 tries_per_dc=5,
                 tries_per_mol=5,
                 title="",
                 allow_increased_density=True,
                 forcefield=OPLS2005,
                 grow=False,
                 dihe_temperature=None,
                 grow_together=False,
                 recolor=False,
                 preserve_res=True,
                 split_components=False,
                 wam_type=None,
                 **kwargs):
        """
        Create an AmorphousCellBuilder object

        :type Scaffold: `Scaffold`
        :param Scaffold: The Scaffold object for the cell to use that defines
            the molecule the cell should be built around (or inside or on top of)

        :type system: bool
        :param system: Whether a Desmond system should be built with the final
            amorphous cell

        :type population: int
        :param population: The number of molecules to include in the amorphous
            cell

        :type composition: list
        :param composition: A list of the same length and order as structs which
            gives the number of structures of each component

        :type density: float
        :param density: The initial target density for building the cell. Does
            not include the scaffold volume or mass

        :type amorphous_vdw_scale: float
        :param amorphous_vdw_scale: The initial VdW scale factor to use when
            looking for clashes in the amorphous cell

        :type obey: str
        :param obey: A module constant indicating which initial target to keep
            constant when trying to build an amorphous cell - either OBEY_DENSITY to
            keep the density constant, OBEY_CLASH to keep the VdW scale factor
            constant, or OBEY_BOTH to keep both constant. The latter will prevent
            the builder from relaxing one of these constraints to find a cell
            without clashes.

        :type tries_per_dc: int
        :param tries_per_dc: The number of tries to make a cell at a given
            density and VdW clash scale factor before relaxing the parameter that
            is not specified by obey

        :type tries_per_mol: int
        :param tries_per_mol: The number of times to try placing a
            molecule in the cell without clases before scrapping the cell and
            starting over

        :type title: str
        :param title: The title to give the final cell structure

        :type allow_increased_density: bool
        :param allow_increased_density: If True and obey=OBEY_CLASH, allow
            attempts to build cells at increasing density as long as a cell was
            built successfully

        :type forcefield: str
        :param forcefield: The name of the force field to use. The default is
            'OPLS_2005'.

        :type grow: bool
        :param grow: whether grow polymers via self-avoiding random walk

        :type dihe_temperature: float, or None
        :param dihe_temperature: the temperature to calculate dihedral angle distribution

        :type grow_together: bool
        :param grow_together: Grow all the molecules together if True

        :type recolor: bool
        :param recolor: Whether to recolor the components so that each molecule
            of the same component is the same color

        :type preserve_res: bool
        :param preserve_res: Whether to preserve the current residue information

        :type split_components: bool
        :param split_components: Whether to split system in components in the
            system build

        :type wam_type: int or None
        :param wam_type: One of the enums defined in workflow_action_menu.h if
            the results should have a Workflow Action Menu in Maestro

        See the parent class for additional keyword documentation
        """

        BuilderWithClashDetection.__init__(self, **kwargs)
        if scaffold is None:
            self.scaffold = Scaffold()
        else:
            self.scaffold = scaffold
        self.system = system
        self.population = population
        self.composition = composition
        self.density = density
        self.avdw_scale = amorphous_vdw_scale
        self.obey = obey
        self.tries_per_dc = tries_per_dc
        self.tries_per_mol = tries_per_mol
        self.title = title
        self.allow_increased_density = allow_increased_density
        self.forcefield = forcefield
        self.grow = grow
        self.dihe_temperature = dihe_temperature
        self.grow_together = grow_together
        self.recolor = recolor
        self.preserve_res = preserve_res
        self.split_components = split_components
        self.wam_type = wam_type
        self.debug = schrodinger.in_dev_env()

[docs]    def setDensityIncreaseAllowed(self, is_allowed):
        """
        Set whether increased cell densities can be attempted after a successful
        cell is built using OBEY_CLASH

        :type is_allowed: bool
        :param is_allowed: Whether increased density attempts can be tried
        """

        self.allow_increased_density = is_allowed

[docs]    def build(self, generate_only=False, center=True):
        """
        Build the cell and do all the post-processing

        :type generate_only: bool
        :param generate_only: Only generate the cell, do not write it to a file,
            write final density, or attempt to create a Desmond system with it

        :type center: bool
        :param center: Center the structure in the PBC unit cell, assuming the
            cell has its origin at 0, 0, 0

        :rtype: `schrodinger.structure.Structure` or None
        :return: The created structure or None if no structure was found
        """

        self.log('Creating amorphous cell')
        # Actually build the cell
        cell = self.buildCellDriver()
        if not cell:
            return

        if not generate_only:
            self.log('Final cell density = %.3f' % get_density(cell))

        # Allow custom post-treatment of the raw structure before the system is
        # built
        cell = self.postTreatBuiltCell(cell)

        # Color if necessary
        if self.color == COLOR_BY_MOLECULE:
            self.log('Coloring the molecules...')
            self.colorByMolecule(cell)

        if center:
            self.log('Translating the cell in space...')
            # Center the structures in the unit cell (unit cell has one corner
            # at the origin and all other vertices in the +xyz directions)
            centroid = transform.get_centroid(cell)
            half = old_div(self.getPBCSide(cell), 2.0)
            center = [half, half, half]
            move = [x - y for x, y in zip(center, centroid)]
            transform.translate_structure(cell, *move)

        if msutils.is_coarse_grain(cell, by_atom=True):
            coarsegrain.set_as_coarse_grain(cell)

        if generate_only:
            return cell

        # Create a Desmond system or just write out this structure
        if self.system:
            self.log('Creating the Desmond system...')
            cms_name = desmondutils.run_system_builder(
                cell,
                self.basename,
                rezero_system=True,
                forcefield=self.forcefield,
                split_components=self.split_components,
                wam_type=self.wam_type)
            if cms_name:
                if self.backend:
                    self.backend.addOutputFile(cms_name)
                    self.backend.setStructureOutputFile(cms_name)
            else:
                self.log('Unable to create the Desmond system')
        else:
            amcell_file = self.basename + AMCELL_NO_SYSTEM_OUT
            self.log('Writing final cell to %s' % amcell_file)
            cell.write(amcell_file)
            if self.backend:
                self.backend.addOutputFile(amcell_file)
                self.backend.setStructureOutputFile(amcell_file)
        return cell

[docs]    def postTreatBuiltCell(self, cell):
        """
        The default implementation of this does nothing, but subclasses can use
        this method to modify the raw cell immediately after the last structure
        is added to it but before any other processing (such as centering,
        coloring, writing out or building a Desmond system)

        :type cell: `schrodinger.structure.Structure`
        :param cell: The built cell

        :rtype: `schrodinger.structure.Structure`
        :return: The modified cell
        """

        return cell

[docs]    def buildCell(self, density, avdw_scale):
        """
        Make a single attempt at building the cell.

        :type density: float
        :param density: The density of the cell

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs

        :rtype: `schrodinger.structure.Structure` or None
        :return: The built cell, or None if an error occurred
        """

        try:
            if self.grow:
                cell = self.growFragmentCell(density, avdw_scale)
            else:
                cell = self.rotateRigidBodyCell(density, avdw_scale)
        except ScaffoldError as msg:
            self.log(str(msg))
            return None

        return cell

[docs]    def buildCellDriver(self):
        """
        Driver function for cell building. Tries multiple cells until one
        succeeds. Adjust initial parameters to try to create more dense or
        tighter class cells as needed.

        :rtype: `schrodinger.structure.Structure` or None
        :return: The built cell, or None if an error occurred
        """

        num_structs = self.population
        density = self.density
        avdw_scale = self.avdw_scale
        to_density = self.obey == OBEY_DENSITY
        to_clash = self.obey == OBEY_CLASH
        to_both = not to_density and not to_clash

        if to_density:
            # Adjust the clash vdw scale to build to the specific density
            value = avdw_scale
            parameter_adjuster = self.adjustACellClashVDWScale
            parameter_string = 'clash vdw scale'
            minimum_value = 0.03
        elif to_clash:
            # Adjust the density to build to the specific clash vdw scale
            value = density
            parameter_adjuster = self.adjustACellDensity
            parameter_string = 'density'
            minimum_value = 0.001
        else:
            # Only attempt the given density and clash value
            value = density
            parameter_adjuster = parameter_string = minimum_value = None

        original_value = value

        if self.structs is None:
            # PolymerAmorphousCellBuilder doesn't have self.structs as input
            self.structs, self.composition = self.getBuildingBlocks(num_structs)
        if not self.structs:
            # Failed to build single polymers
            return None

        if self.grow:
            # Prepare building blocks for tangled chain method
            vdw_min = minimum_value if to_density else None
            vdw_scale = self.prepareTangledChainBlocks(avdw_scale, vdw_min)
            if vdw_scale is None:
                return None
            if to_density:
                value = vdw_scale
            else:
                avdw_scale = vdw_scale

        def attempt_cell():
            """
            Attempt to create the cell with the current parameters

            :rtype: `schrodinger.structure.Structure` or None
            :return: The built cell, or None if an error occurred
            """

            cell = None
            msg = ('Attempt %d at a cell with density = %.3f and clash '
                   'scale = %.3f')
            for atry in range(self.tries_per_dc):
                if to_density:
                    self.log(msg % (atry + 1, density, value))
                    cell = self.buildCell(density, value)
                else:
                    self.log(msg % (atry + 1, value, avdw_scale))
                    cell = self.buildCell(value, avdw_scale)
                if cell:
                    self.log('Amorphous cell populated')
                    self.log('')
                    return cell
            return cell

        # Try at ever decreasing parameter value until we succeed in packing all
        # molecules
        self.log('Creating the amorphous cell...')
        cell = None
        while not cell:
            cell = attempt_cell()
            if not cell:
                # No successful cell was found - try making the value smaller
                # (either less dense cell, or smaller clash vdw scale)
                if to_both:
                    self.log('Unable to find cell with the given density and '
                             'VDW clash values')
                    return
                value = parameter_adjuster(value, down=True)
                if value < minimum_value:
                    self.log('Unable to find cell with %s > %.3f' %
                             (parameter_string, minimum_value))
                    return

        successful_value = value
        best_cell = cell
        # If we succeeded at the initial values and density is the adjustable
        # parameter, try to increase the density value as much as possible up to
        # a point.
        if (to_clash and successful_value == original_value and
                self.allow_increased_density):
            while cell:
                value = parameter_adjuster(value, down=False)
                self.log('Increasing %s to %.2f' % (parameter_string, value))
                cell = attempt_cell()
                if cell:
                    best_cell = cell
                    successful_value = value
                if value > 2:
                    break
        cell = best_cell

        if to_density:
            # Monte Carlo and steric pack steps may change density
            self.log('Final %s = %.3f' % (parameter_string, successful_value))
        return cell

[docs]    def prepareTangledChainBlocks(self, vdw_scale, vdw_min):
        """
        Break structures into polymer fragments and handle vdw scale adjustment.

        :type vdw_scale: float
        :param vdw_scale: the vdw scale to check clashes.

        :type vdw_min: float or None
        :param vdw_min: If float, this is the minimum allowable vdw scale after
            adjustment. If None, don't allow vdw adjustment.

        :rtype: float or None
        :return: If float, this is the vdw scale.
        """

        while True:
            try:
                self.setPolymerFragments(vdw_scale)
            except RotationError as msg:
                if vdw_min is None:
                    self.log(str(msg))
                    return None
                elif vdw_scale < vdw_min:
                    self.log('Unable to find cell with clash vdw scale > %.3f' %
                             vdw_scale)
                    return None
                vdw_scale = self.adjustACellClashVDWScale(vdw_scale, down=True)
                self.log(
                    'Decreasing vdw scale to %.2f due to unvoidable clashes' %
                    vdw_scale)
            else:
                return vdw_scale

[docs]    def getBuildingBlocks(self, number):
        """
        Get the structures to place into the amorphous cell. Must be overridden
        in the subclass to produce structures

        :type number: int
        :param number: The number of structures to return

        :rtype: list
        :return: list of `schrodinger.structure.Structure` objects to place
            into the cell
        """

        return []

[docs]    def rotateRigidBodyCell(self, density, avdw_scale):
        """
        Make a single attempt at building the cell by random placing and rotating
        rigid structures.

        :type density: float
        :param density: The density of the cell

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs

        :rtype: `schrodinger.structure.Structure` or None
        :return: The built cell, or None if an error occurred
        """

        volume = self.getDesiredVolume(self.structs,
                                       density,
                                       num_per_chain=self.composition)
        self.scaffold.defineBox(volume)
        cell = self.getNewCellStructure(volume)
        cell_rand = cell.copy()
        scaffold_mol_num = len(cell_rand.molecule)
        has_custom_mmffld = detect_custom_mmffld_data(self.structs)

        # A list of structures in the order the population is randomly sampled
        all_structs = []
        for struct, num_struct in zip(self.structs, self.composition):
            all_structs += [struct.copy() for num in range(num_struct)]
        struct_order = [x for x, y in enumerate(all_structs)]
        # Randomize the structure order when placing them into the cell
        random.shuffle(struct_order)

        # Place the structures in the cell one by one
        for index, pick_idx in enumerate(struct_order):
            # Let the user know that provided (monomer) structure has
            # intra-structure clashes
            raw_clashes = self.checkForIntraStructureClashes(
                all_structs[pick_idx], scale=avdw_scale)
            if raw_clashes:
                msg = 'WARNING: molecule "%s" has intra-structure clashes.'
                self.log(msg % struct.title)

            success = self.placeStructureInCell(cell_rand,
                                                all_structs[pick_idx],
                                                avdw_scale)
            if not success:
                break
        if not success:
            self.log('Successfully placed only %d structures' % index)
            return None

        # To be consistent with the log info: "Chain 1: AAB; Chain 2: BCA",
        # Restore the structure order to the polymer chain build order
        for orig_idx, struct in enumerate(all_structs):
            mol_id = struct_order.index(orig_idx) + scaffold_mol_num + 1
            new_xyz = cell_rand.molecule[mol_id].extractStructure().getXYZ()
            struct.setXYZ(new_xyz)
            if has_custom_mmffld:
                # This call is expensive, so we only do it if we have to
                # preserve mmffld custom charge blocks (MATSCI-8373)
                cell = structure.Structure(
                    mm.mmffld_extendCTwithCustomCharges(cell, struct))
            else:
                cell.extend(struct)

        return cell

[docs]    def placeStructureInCell(self,
                             cell,
                             struct,
                             avdw_scale,
                             no_rotation=False,
                             rotate_velocities=False):
        """
        Place a structure in the cell. Make multiple rotations and translations
        to try to find a spot that the structure fits in with no clashes

        :type cell: `schrodinger.structure.Structure`
        :param cell: The current cell with structures that have already been
            placed

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to attempt to place into the cell

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs

        :param bool no_rotation: If True, do not rotate the structure

        :param bool rotate_velocities: If True, besides the structure, also
            rotate FFIO atom velocities (if present)

        :rtype: bool
        :return: Whether the structure was placed in the cell successfully

        """
        transform.translate_centroid_to_origin(struct)
        clashes = True
        attempts = 0
        cell_rings = self.findRings(cell)
        struct_rings = self.findRings(struct)
        # Find all existing clashes in the structure before the PBC and before
        # adding to the cell - we'll ignore these as they are the result of
        # building the structure to the user's specifications and might have
        # been unavoidable
        raw_clashes = self.checkForIntraStructureClashes(struct,
                                                         scale=avdw_scale)
        # Keep trying different rotations and translations until one works with
        # no clashes
        while clashes and attempts < self.tries_per_mol:
            attempts += 1
            candidate = self.locateAndRotate(
                struct,
                no_rotation=no_rotation,
                rotate_velocities=rotate_velocities)
            candidate_rings = self.duplicateRings(struct_rings, candidate)

            try:
                clashes = self.checkForClashes(cell, candidate, avdw_scale,
                                               raw_clashes, cell_rings,
                                               candidate_rings)
            # Can be raised by self.getClashes (MATSCI-2620, MATSCI-4296)
            except ValueError as msg:
                self.log('Cannot detect clashes: ' + str(msg))
                continue

        if not clashes:
            # Success! Add the structure to the cell
            cell.extend(candidate)
        return not clashes

[docs]    def duplicateRings(self, original_rings, new_struct):
        """
        Take a list of _Ring objects referenced to one structure and convert to
        a list of _Ring objects referenced to a new structure. This is much
        faster than finding rings in the new structure. It is implied that the
        new and old structure have identical bonding.

        :type original_rings: iterable of _Ring objects
        :param original_rings: The _Ring objects to duplicate

        :param new_struct: `schrodinger.structure.Structure`
        :param new_struct: The new structure the ring objects should reference

        :rtype: list
        :return: List of `schrodinger.structure._Ring` objects that reference
            atoms in new_structure
        """

        new_rings = []
        for ring in original_rings:
            atoms = ring.getAtomIndices()[:]
            new_ring = structure._Ring(new_struct, ring._ringnum, atoms, None)
            new_rings.append(new_ring)
        return new_rings

[docs]    def locateAndRotate(self,
                        struct,
                        no_rotation=False,
                        rotate_velocities=False):
        """
        Randomly rotate a copy of struct and translate it to a position in
        the cell

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to attempt to place into the cell

        :param bool no_rotation: If True, do not rotate the structure

        :param bool rotate_velocities: If True, besides the structure, also
            rotate FFIO atom velocities (if present)

        :rtype: `schrodinger.structure.Structure`
        :return: A copy of struct that has been randomly rotated and translated
        """

        scopy = struct.copy()
        # Randomly rotate the structure
        centroid, rmatrix = random_rotation(scopy,
                                            no_rotation=no_rotation,
                                            rotate_velocities=rotate_velocities)

        # Only dump the rotation matrix in DEBUG
        textlogger.log(self.logger,
                       'Rotation matrix:',
                       level=textlogger.logging.DEBUG)
        textlogger.log(self.logger,
                       str(rmatrix),
                       level=textlogger.logging.DEBUG)

        # Move to random location
        xyz = self.scaffold.getPointInBox()
        new_xyz = scopy.getXYZ() + xyz
        scopy.setXYZ(new_xyz)
        return scopy

[docs]    def adjustACellDensity(self, density, down=True):
        """
        Adjust the cell density up or down - the amount it is adjusted depends
        on its current value and the direction it is being adjusted

        :type density: float
        :param density: The current cell density

        :type down: bool
        :param down: Whether to adjust the density down (or up if False)

        :rtype: float
        :return: The adjusted density
        """

        if down:
            if density > 0.15:
                density = density - 0.1
            else:
                density = density * 0.5
        else:
            if density < 0.1:
                density = density * 1.5
            else:
                density = density + 0.1
        return density

[docs]    def adjustACellClashVDWScale(self, avdw_scale, down=True):
        """
        Adjust the cell clash scaling up or down - the amount it is adjusted
        depends on its current value and the direction it is being adjusted

        :type avdw_scale: float
        :param avdw_scale: The current clash vdw scale factor

        :type down: bool
        :param down: Whether to adjust the scale factor down (or up if False)

        :rtype: float
        :return: The adjusted scale factor
        """

        if down:
            if avdw_scale > 1:
                avdw_scale = avdw_scale * 0.9
            elif avdw_scale <= 0.11:
                avdw_scale = avdw_scale - 0.03
            else:
                avdw_scale = avdw_scale - 0.1
        else:
            if avdw_scale < 1:
                avdw_scale = avdw_scale + 0.1
            else:
                avdw_scale = avdw_scale * 1.1
        return avdw_scale

[docs]    def checkForClashes(self, cell, candidate, avdw_scale, raw_clashes,
                        cell_rings, candidate_rings):
        """
        Check for clashes between the candidate structure and structures already
        within the cell, and also intrastructure clashes within the candiate
        that are the result of the periodic boundary condition. If
        intrastructure clashes are found, interstructure clashes are not checked
        for.

        :type cell: `schrodinger.structure.Structure`
        :param cell: The current cell with structures that have already been
            placed

        :type candidate: `schrodinger.structure.Structure`
        :param candidate: The structure to attempt to place into the cell

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs

        :type raw_clashes: dict
        :param raw_clashes: Those clashes that exist in the candidate without
            accounting for the PBC

        :type cell_rings: list
        :param cell_rings: Each item of the list is a
            `schrodinger.structure._Ring` object returned by the findRings()
            method.

        :type candidate_rings: list
        :param candidate_rings: Each item of the list is a
            `schrodinger.structure._Ring` object returned by the findRings()
            method.

        :rtype: dict
        :return: Any clashes found. Keys are atom indexes values are all the
            atom indexes that clash with that atom
        """

        pbc = []
        prop = desmondutils.CHORUS_PROP_PREFIX + '%s%s'
        for letter in 'abc':
            for m in 'xyz':
                pbc.append(cell.property[prop % (letter, m)])

        # Intrastructure clashes
        clashes = self.checkForIntraStructureClashes(candidate,
                                                     scale=avdw_scale,
                                                     pbc=pbc)
        if clashes:
            # We only care about those caused by the PBC
            pbc_only_clashes = self.removeIgnoredClashes(clashes, raw_clashes)
            if pbc_only_clashes:
                return pbc_only_clashes

        # Interstructure clashes
        clashes = self.checkForInterStructureClashes(cell, candidate,
                                                     avdw_scale, pbc,
                                                     cell_rings,
                                                     candidate_rings)
        return clashes

[docs]    def checkForInterStructureClashes(self, cell, candidate, avdw_scale, pbc,
                                      cell_rings, candidate_rings):
        """
        Check for clashes of the structure with other structures in the cell,
        including the periodic boundary condition. Also check for ring spears.
        The first check that finds clashes will return those clashes and no more
        checks will be made.

        :type cell: `schrodinger.structure.Structure`
        :param cell: The current cell with structures that have already been
            placed

        :type candidate: `schrodinger.structure.Structure`
        :param candidate: The structure to attempt to place into the cell

        :type avdw_scale: float
        :param avdw_scale: The VDW scale factor for clash cutoffs


        :type pbc: None, infrastructure.PBC, or list
        :param pbc: If periodic boundary conditions should be used, provide
            either an infrastructure.PBC object or the parameters to construct one.
            Allowed constructors:
            * a, b, c : box lengths
            * a, b, c, alpha, beta, gamma box : box lengths and angles
            * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors

        :type cell_rings: list
        :param cell_rings: Each item of the list is a
            `schrodinger.structure._Ring` object returned by the findRings()
            method.

        :type candidate_rings: list
        :param candidate_rings: Each item of the list is a
            `schrodinger.structure._Ring` object returned by the findRings()
            method.

        :rtype: dict
        :return: Any clashes found. Keys are atom indexes values are all the
            atom indexes that clash with that atom
        """

        if cell.atom_total == 0:
            # No interstructure clashes possible
            return {}

        # Note: Timings indicate that for systems on the order of 5000 atoms,
        # this get_contacts approach is competitive with using
        # DistanceCellIterator and
        # creating the distance cell once and then using that cell for each
        # candidate.  For systems of 100,000 atoms, the DistanceCellIterator
        # appears to be about 25X faster (depending on how many candidates are
        # checked for each unchanged cell). But DistanceCellIterator doesn't use
        # PBC currently, so we are sticking with get_contacts for now.

        clashes = self.getClashes(cell, avdw_scale, struct2=candidate, pbc=pbc)
        if clashes:
            return clashes

        # Find ring spears where the candidate spears a cell ring
        clashes = self.findRingSpears(cell,
                                      spear_struct=candidate,
                                      rings=cell_rings,
                                      ring_based=False,
                                      pbc=pbc)
        if clashes:
            return clashes

        # Find ring spears where the cell spears a candidate ring
        clashes = self.findRingSpears(candidate,
                                      spear_struct=cell,
                                      rings=candidate_rings,
                                      pbc=pbc)
        return clashes

[docs]    def getPBCSide(self, cell):
        """
        Get the length of one side of the periodic boundary condition

        :type cell: `schrodinger.structure.Structure`
        :param cell: The current cell with structures that have already been
            placed

        :rtype: float
        :return: The length of one side of the periodic boundary condition. 0 is
            returned if no PBC has been set on the cell
        """

        return cell.property.get('r_pdb_PDB_CRYST1_a', 0.0)

[docs]    def getNewCellStructure(self, volume):
        """
        Get a new, empty structure with the periodic boundary condition set

        :type volume: float
        :param volume: The desired volume of the periodic boundary condition

        :rtype: `schrodinger.structure.Structure`
        :return: The structure to add the properties to
        """

        cell = self.scaffold.getNewCell()
        self.scaffold.addPBCProperties(cell, volume)
        cell.title = 'amorphous %s' % self.title
        return cell

[docs]    def getDesiredVolume(self, structures, density, num_per_chain=None):
        """
        Get the volume of a cube that will have the given density when the
        given structures are placed entirely inside it

        :type structures: list
        :param structures: The list of structures to be placed into the
            box

        :type density: float
        :param density: The desired density (g/cm3)

        :type num_per_chain: list of int
        :param num_per_chain: The number of each structure. If provided,
            this list must be of the same length as the above structure list

        :rtype: float
        :return: The desired box side
        """

        if num_per_chain:
            mass = sum([
                struct.total_weight * num
                for struct, num in zip(structures, num_per_chain)
            ])
        else:
            # It's possible that not all structures have the same mass
            mass = sum([x.total_weight for x in structures])
        volume = AMU_PER_A3_TO_G_PER_CM3 * mass / density
        volume = volume + self.scaffold.getExcludedVolume()
        return volume


[docs]class EnergySurface(object):
    """
    Calculate energy surface, and dihedral angle probability distrubtion.
    """

[docs]    def __init__(self, orig_dihe_values):
        """
        Save original dihedral values and initialize energy.

        :type orig_dihe_values: list of float
        :param orig_dihe_values: The original dihedral values
        """

        self.dihe_num = len(orig_dihe_values)
        self.dihe_values = numpy.array(orig_dihe_values)
        self.energy = numpy.zeros(self.dihe_num)

[docs]    def setUniformDistribution(self):
        """
        Set uniform distribution.
        """

        self.torsion_probability = numpy.full(self.dihe_num,
                                              old_div(1., self.dihe_num))

[docs]    def setBoltzmannDistribution(self, struct, dihedral, forcefield_num,
                                 force_constant, beta):
        """
        Calculate torsion energy surface. If any energies on the surface are invalid,
        set uniform distribution. If all are valid, set Boltzmann distribution

        :type struct: {schrodinger.Structure.structure}
        :param struct: the local structure for dihedral angle energy surface

        :type dihedral: list of int
        :param dihedral: the 4 ints are the atom ids of target dihedral angle

        :type forcefield_num: int
        :param forcefield_num: the mmffld integer for the given forcefield

        :type force_constant: float
        :param force_constant: the energy constant for torsion restraints

        :type beta: float
        :param beta: 1/kT to get Boltzmann factor (E / kT)
        """

        # minimize, rotate, restrained minimize...  as L{BondRotator}
        # Canonicalize positions so that Minimizer works on reasonable initial
        # struct
        try:
            canonicalizer = fast3d.Canonicalizer()
            canonicalizer.canonicalizePositions(struct)
        except RuntimeError:
            # 'fragmentation failed' may happen as in canonicalizer_test.py
            self.setUniformDistribution()
            return

        with minimize.minimizer_context(
                struct=struct, no_zob_restrain=True,
                ffld_version=forcefield_num) as minimizr:
            for idx, dihe_value in enumerate(self.dihe_values):
                struct.adjust(dihe_value, *dihedral)
                minimizr.addTorsionRestraint(*(dihedral +
                                               [force_constant, dihe_value]))
                min_res = minimizr.minimize()
                self.dihe_values[idx] = minimizr.getStructure().measure(
                    *dihedral)
                self.energy[idx] = min_res.potential_energy
                minimizr.deleteAllRestraints()

        if any(numpy.isinf(self.energy)):
            # After minimization, E = inf
            self.setUniformDistribution()
            return

        # Calculate Boltzmann distribution
        # Shift the energy minimum to zero so that all energy values are positive
        # and the max probability value is 1
        self.energy -= min(self.energy)
        self.torsion_probability = numpy.exp(-self.energy * beta)
        # No zero probability values, and thus numpy.random.choice always generate
        # a full list of dihedral angle values
        self.torsion_probability[self.torsion_probability < 10**-10] = 10**-10
        p_sum = numpy.sum(self.torsion_probability)
        self.torsion_probability /= p_sum


[docs]class RotationError(Exception):
    pass


[docs]class PolymerFragments(BuilderWithClashDetection):
    """
    Save the struct and break it into fragments.
    """

    LETTER_NUM = 26

[docs]    def __init__(self,
                 struct,
                 num_struct,
                 dihedral_min=0.,
                 dihedral_max=360.,
                 dihedral_num=DIHEDRAL_NUM,
                 dihedral_exclude=False,
                 only_backbone=False,
                 vdw_scale=1.,
                 forcefield=OPLS2005,
                 temperature=None,
                 pop_local_clash=True,
                 logger=None,
                 indexing='0'):
        """
        Prepare infomation for in-cell grow of one single polymer chain.

        :type struct: `schrodinger.structure.Structure`
        :param struct: the structure to be breaked into fragments

        :type num_struct: int
        :param num_struct: copy the struct by num_struct times when
            placing them into the cell

        :type dihedral_min: float
        :param dihedral_min: lower limit of dihedral angle

        :type dihedral_max: float
        :param dihedral_max: upper limit of dihedral angle

        :type dihedral_num: int
        :param dihedral_num: the number of dihedral values

        :type dihedral_exclude: bool
        :param dihedral_exclude: (0, dihedral_min) and (dihedral_max, 360)
            are used as dihedral angle range, if True.

        :type only_backbone: bool
        :param only_backbone: only the rotatable bonds in backbone is considered

        :type vdw_scale: float
        :param vdw_scale: VDW scale factor to use

        :type forcefield: str
        :param forcefield: The name of the force field to use. The default is
            'OPLS_2005'.

        :type temperature: float, or None
        :param temperature: the temperature to calculate dihedral angle distribution
            using boltzmann factor. If None, a uniform distribution is used.

        :type pop_local_clash: bool
        :param pop_local_clash: whether remove dihedral angle values that lead to
            local clashes

        :type logger: `logging.Logger`
        :param logger: logger for output info

        :type indexing: `str`
        :param indexing: the indexing based on compositions and molecules
        """

        self.original_polymer = struct
        self.num_struct = num_struct
        self.dihedral_min = dihedral_min
        self.dihedral_max = dihedral_max
        self.dihedral_num = dihedral_num
        self.dihedral_exclude = dihedral_exclude
        self.only_backbone = only_backbone
        self.vdw_scale = vdw_scale
        self.forcefield = forcefield
        self.temperature = temperature
        self.logger = logger
        self.debug = schrodinger.in_dev_env()
        self.backend = jobcontrol.get_backend()
        super(PolymerFragments, self).__init__(vdw_scale=vdw_scale,
                                               logger=self.logger)
        self.template_polymer = struct.copy()
        self.moieties = None
        # template_polymer will be broken into fragments, and some of them are
        # further instantiated as Monomer objects. setupTacticity method in
        # Monomer constructor throws warnings when get_chiral_atoms method
        # analyzes struct with 's_st_Chirality_' property
        msutils.remove_properties(self.template_polymer,
                                  matches=['s_st_Chirality_'])
        self.template_polymer_rings = self.template_polymer.find_rings(
            sort=True)
        self.ringnum = len(self.template_polymer_rings)
        self.has_rings = bool(self.ringnum)
        self.torsion_energy_surf = None
        # Only enable pop_local_clash for long chains with large vdw scale
        self.pop_local_clash = (pop_local_clash and
                                self.template_polymer.atom_total > 1000 and
                                self.vdw_scale > 0.6)
        self.setDiheInit()
        self.debug_file = indexing + '.mae'

[docs]    def setDiheInit(self):
        """
        Set descrete dihedral values.
        """

        if self.dihedral_exclude:
            self.dihe_init = []
            for dihe in numpy.linspace(self.dihedral_max - 360,
                                       self.dihedral_min, self.dihedral_num):
                if dihe <= 0:
                    dihe += 360.0
                self.dihe_init.append(dihe)
        else:
            self.dihe_init = list(
                numpy.linspace(self.dihedral_min, self.dihedral_max,
                               self.dihedral_num))
            if self.dihedral_min == 0.0:
                # Since dihedral range is (0, 360], convert 0 to 360
                self.dihe_init.remove(self.dihedral_min)
                if self.dihedral_max != 360.0:
                    self.dihe_init.append(360.0)

[docs]    def breakIntoFragments(self, is_rigid=False, restore_template=True):
        """
        Break polymer into fragments.

        :type is_rigid: bool
        :param is_rigid: set the molecule as one fragment.

        :type restore_template: bool
        :param restore_template: restore the template polymer to the original one.
        """

        if not self.template_polymer:
            return

        if not is_rigid and self.checkAndmarkPolymerProp():
            self.setAllfragments()
        else:
            # Rigid body found
            self.setAsOneFragment()
            return

        if self.pop_local_clash or self.temperature:
            self.setLocalStructs()
        if self.pop_local_clash:
            self.popLocalClashes()
        if self.temperature:
            self.torsionEnergySurf(temperature=self.temperature)
        # Overwrite any changes to template_polymer beyond xyz
        self.original_polymer.setXYZ(self.template_polymer.getXYZ())
        if restore_template:
            self.template_polymer = self.original_polymer
        self.first_frag.template_polymer = self.template_polymer
        for atom in self.template_polymer.atom:
            # MATSCI-11822: remove alt_xyz so that no bonds look broken
            atom.alt_xyz = None
            atom.property.pop('r_m_pdb_occupancy', None)

[docs]    def checkAndmarkPolymerProp(self):
        """
        Check the struct and mark with necessary polymer properties.

        :rtype: bool
        :return: True, if the structure is successfully marked with polymer
                properties.
        """

        # Coarsegrain structures check s_m_atom_name instead
        if msutils.is_coarse_grain(self.template_polymer):
            is_polymer = self.isCgPolymer()
        else:
            is_polymer = self.isAllAtomPolymer()

        if not is_polymer:
            # Polymer residue information is assigned to non-polymer struct,
            # if the non-polymer struct has rotatable bonds.
            is_polymer = self.setAndMarkResidues()

        if is_polymer:
            self.extractMoieties()
            self.updateOrigAtomIdx()
            self.markMoietiesProps()
            try:
                self.moieties = Moieties('',
                                         structs=self.moiety_structs.values(),
                                         one_letter=False)
            except MoietyError:
                return False

        return is_polymer

[docs]    def setLocalStructs(self):
        """
        Set the local structures.
        """

        self.findNeighborFrags()
        self.setLocalStructAtomIds()
        self.saveLocalStructs()

[docs]    def setAsOneFragment(self):
        """
        Set the whole molecule as one fragment.
        """

        self.first_frag = PolymerFragment(
            0, 0, template_polymer=self.template_polymer)
        self.first_frag.atom_ids = list(
            range(1, self.template_polymer.atom_total + 1))
        # MATSCI-5503: rigid molecules should be centered
        self.structToOriginByFrag()

[docs]    def setAllfragments(self):
        """
        Set all fragments.
        """

        if not self.only_backbone:
            # Break the monomer moieties into more small moieties
            # Update monomer, all_moieties and residue (template polymer)
            for moiety in list(self.moieties.monomers.values()):
                moiety.setAsFragment()
                moiety.template_polymer = self.template_polymer
                for sub_moiety in moiety.breakTemplatePolymer():
                    moiety_pdbre = sub_moiety.pdbres.rstrip()
                    self.moieties.monomers[moiety_pdbre] = sub_moiety
                    self.moieties.all_moieties.append(sub_moiety)

        for moiety in self.moieties.all_moieties:
            moiety.setAsFragment()

        # Renumber resnum so that no two residues use the same resnum
        self.renumberResGetM2PMap()
        self.updateAllAtomIndexMaps()
        try:
            self.createFragForInitiator()
        except KeyError:
            # MATSCI-8369: renumberResGetM2PMap() modified the self.atom_idx_map_m2p
            # in a way that createFragForInitiator() gives a traceback when residue_num = 3.
            # The issue is narrowed down to the conflict between MONOMER_ORIG_ATOM_IDX_PROP
            # modified by breakTemplatePolymer() and empty return from getDelNextResnumByResnum()
            # However, I am not able to identify the exact bug and this is only a workaround
            # to ignore some rotatable bonds and make the builder job successful
            self.renumberResGetM2PMap(
                monomer_orig_idx_prop=MONOMER_ORIG_IDX_PROP2)
            self.updateAllAtomIndexMaps()
            self.createFragForInitiator()

        self.finishPolymerFrags()
        self.appendMissedFrags()
        self.assignRingAtomIds()
        self.first_frag.markBranchingFrag()
        self.first_frag.setDihedrals(self.dihe_init)
        self.first_frag.markSidegroupFrag()
        self.structToOriginByFrag()

[docs]    def saveLocalStructs(self):
        """
        Save local structs and update dihedrals according to the atom ids in
        newly saved local structs.
        """

        self.local_structs = {}
        is_coarse_grain = msutils.is_coarse_grain(self.template_polymer)
        capper_terminator = self.getCapperTerminator(is_coarse_grain)
        smiles_generator = smiles.SmilesGenerator(wantAllH=not is_coarse_grain)
        for frag in self.first_frag.fragments(include_self=False):
            # atom ids change due to struct extraction
            struct, atom_id_map = self.extractWithMap(
                self.template_polymer, frag.local_struct_atom_ids)
            grow_markers = []
            for atom in struct.atom:
                atom.property[ORIG_ATOM_IDX_PROP] = atom.index
            # cap atom in broken bonds with H or C atoms
            for end_atom_id in frag.end_atom_ids:
                grow_marker = atom_id_map[end_atom_id]
                grow_markers.append(grow_marker)
                grower = next(struct.atom[grow_marker].bonded_atoms).index
                capper_terminator.addToChain(struct,
                                             grower,
                                             grow_marker,
                                             orientation=HEADFIRST,
                                             remove_marker=False)
                # copy marker ORIG_ATOM_IDX_PROP to H/C capper
                struct.atom[struct.atom_total].property[ORIG_ATOM_IDX_PROP] = \
                struct.atom[grow_marker].property[ORIG_ATOM_IDX_PROP]
            struct.deleteAtoms(grow_markers)
            # atom ids change due to adding capper atoms
            atom_id_map2 = {}
            for atom in struct.atom:
                orig_atom_id = atom.property.get(ORIG_ATOM_IDX_PROP)
                atom_id_map2[orig_atom_id] = atom.index
            # atom ids change due to reorder_atoms
            smile_str, smile_map = smiles_generator.getSmilesAndMap(struct)
            if smile_str not in self.local_structs:
                try:
                    struct = build.reorder_atoms(struct, smile_map)
                    self.local_structs[smile_str] = struct
                except:
                    # Some invalid structs (e.g. delete some H atoms from a valid struct)
                    # cause Exception: The 'new_order' list must have xx elements.
                    self.local_structs[smile_str] = None
                    frag.local_struct_smiles = smile_str
                    continue
            # three-step map: struct extraction, capping with H/C; smile_map reorder_atoms
            dihe_atom_ids = [
                smile_map.index(atom_id_map2[atom_id_map[x]]) + 1
                for x in frag.dihedral
            ]
            mapped_local_struct_side_dihedrals = []
            for local_struct_side_dihedral in frag.local_struct_side_dihedrals:
                mapped_local_struct_side_dihedrals.append([
                    smile_map.index(atom_id_map2[atom_id_map[x]]) + 1
                    for x in local_struct_side_dihedral
                ])
            frag.local_struct_dihedral = dihe_atom_ids
            frag.local_struct_side_dihedrals = mapped_local_struct_side_dihedrals
            frag.local_struct_smiles = smile_str
            frag.torsion_pattern = ' '.join(
                [smile_str, ','.join(map(str, dihe_atom_ids))])

[docs]    def popLocalClashes(self):
        """
        Try dihedral value one by one for the target torsion in local structs,
        and eliminate those leading to inevitable local clashes by incomplete
        conformer search of local structs.
        """

        no_clash_dihe = {}
        for frag in self.first_frag.fragments(include_self=False):
            if frag.torsion_pattern in no_clash_dihe:
                # This dihedral type has been calculated previously
                frag.dihe_rand = no_clash_dihe[frag.torsion_pattern][:]
                frag.pool = frag.dihe_rand[:]
                continue
            # Calculate new no-clash dihedral candidates for new type of dihedral
            local_struct = self.local_structs[frag.local_struct_smiles]
            if not local_struct:
                no_clash_dihe[frag.torsion_pattern] = frag.dihe_rand[:]
                continue
            dihedral = frag.local_struct_dihedral
            dihe_range = frag.dihe_rand[:]
            side_dihe_num = len(frag.local_struct_side_dihedrals)
            rings = local_struct.ring
            self.has_rings = bool(rings)
            # Remove dihedral values leading to unavoidable local clashes
            # 1. Copy the local structure around one center dihedral
            # 2. Set the center dihedral to one possible candidate value
            # 3. Randomly rotate side dihedrals in the local structure to
            #    generate MAX_HIT_NUM conformers
            # 4. If all of the conformers have intra clashes, remove that
            #    dihedral value from the center dihedral candinates
            for dihe_value in dihe_range:
                struct = local_struct.copy()
                struct.adjust(dihe_value, *dihedral)
                local_clash = self.checkForIntraStructureClashes(struct,
                                                                 rings=rings)
                if not local_clash:
                    continue
                # FIXME: this functions is to slow to do full conformational search
                # If local struct has 5 side rotatable bonds, 73^5 generates too many
                # conformers, and the checkForIntraStructureClashes is too slow.
                # Currently randomly pick some out of all conformers
                for conformer_idx in range(MAX_HIT_NUM * side_dihe_num):
                    for side_dihe_idx, side_dihe_value in enumerate(
                            numpy.random.choice(dihe_range, side_dihe_num)):
                        struct.adjust(
                            side_dihe_value,
                            *frag.local_struct_side_dihedrals[side_dihe_idx])
                    if not self.checkForIntraStructureClashes(struct,
                                                              rings=rings):
                        local_clash = False
                        break
                if local_clash:
                    # If MAX_HIT_NUM * side_dihe_num conformers all have clashes,
                    # remove this value from dihedral angle candidates
                    frag.dihe_rand.remove(dihe_value)
            no_clash_dihe[frag.torsion_pattern] = frag.dihe_rand[:]
            frag.dihe_pool = frag.dihe_rand[:]
            if not frag.dihe_rand:
                msg = ("No clash-free rotation can be found for atoms %s, "
                       "please use a smaller VDW scale factor." %
                       '-'.join(map(str, frag.dihedral)))
                raise RotationError(msg)

[docs]    @common.mmffld_environment()
    def torsionEnergySurf(self, force_constant=10., temperature=300.):
        """
        Sample torsion energy surface for local structs, and initialize the
        dihedral angle distribution using Boltzmann distribution.

        :type force_constant: float
        :param force_constant: the energy constant for torsion restraints

        :type temperature: float, or None
        :param temperature: the temperature to calculate dihedral angle distribution
        """

        self.torsion_energy_surf = {}
        beta = old_div(1.0, (BOLTZMANN * temperature))
        forcefield_num = mm.opls_name_to_version(self.forcefield)
        for frag in self.first_frag.fragments(include_self=False):
            if frag.torsion_pattern in self.torsion_energy_surf:
                # This torsion energy surf has been calculated previously
                frag.torsion_probability = self.torsion_energy_surf[
                    frag.torsion_pattern].torsion_probability.copy()
                # randomly sample according to the Boltzmann distribution
                frag.dihe_pool = list(
                    numpy.random.choice(frag.dihe_rand,
                                        len(frag.dihe_rand),
                                        replace=False,
                                        p=frag.torsion_probability))
                continue
            # generate energy surface for this new torsion_pattern
            local_struct = self.local_structs[frag.local_struct_smiles]
            energy_surf = EnergySurface(frag.dihe_rand)
            # if invalid struct, assign uniform distribution
            if not local_struct or desmondutils.find_forcefield_invalid_structures(
                [local_struct], forcefield_num):
                energy_surf.setUniformDistribution()
            else:
                struct = local_struct.copy()
                dihedral = frag.local_struct_dihedral
                energy_surf.setBoltzmannDistribution(struct, dihedral,
                                                     forcefield_num,
                                                     force_constant, beta)
                self.torsion_energy_surf[frag.torsion_pattern] = energy_surf
                frag.torsion_probability = energy_surf.torsion_probability.copy(
                )
            # randomly sample according to the Boltzmann distribution
            frag.dihe_pool = list(
                numpy.random.choice(frag.dihe_rand,
                                    len(frag.dihe_rand),
                                    replace=False,
                                    p=frag.torsion_probability))

[docs]    def extractWithMap(self, struct, indices, copy_props=False):
        """
        Return a new structure object which contains the atoms of
        the current structure that appear in the specified list and
        a dict which maps atom ids in current structure to those in
        newly fextracted substructure.

        :type struct: `structure.Structure`
        :param struct: the structure to extract substructs from

        :type indices: list of int
        :param indices: the ids of atoms to be extracted

        :type copy_props: bool
        :param copy_props: whether to copy the structure level property

        :rtype: `structure.Structure`, dict
        :return: substructure, dict mapping to atom ids in substructure
        """

        for atom_id in indices:
            struct.atom[atom_id].property[ORIG_ATOM_IDX_PROP] = atom_id
        sub_struct = struct.extract(indices, copy_props=copy_props)
        map_atom_id = {
            atom.property[ORIG_ATOM_IDX_PROP]: atom.index
            for atom in sub_struct.atom
        }

        return sub_struct, map_atom_id

[docs]    def getCapperTerminator(self, is_coarse_grain):
        """
        Get a terminator moiety to cap the atom in a broken bond.

        :type is_coarse_grain: bool
        :param is_coarse_grain: whether the structure is coarse grain

        :rtype: `Terminator`
        :return: terminator moiety of one single H or C atom with Br marker.
        """

        struct = get_generic_end_structure(is_coarse_grain=is_coarse_grain)
        return Terminator(struct)

[docs]    def findNeighborFrags(self, separated_bond_num=2):
        """
        Search for and save neighboring fragments for each fragment.
        For any fragments, the bond between dihe 2nd and 3rd atoms is rotatable.
        If any other fragments has any atoms that are within separated_bond_num bonds
        away from the dihe 2nd and 3rd atoms of one certain fragment, they are
        considered neighbor fragments of that fragment.

        :type separated_bond_num: int
        :param separated_bond_num: criteria for defining neighbor fragments
        """

        graph = analyze.create_nx_graph(self.template_polymer)

        for cur_frag in self.first_frag.fragments(include_self=False):
            pre_frag = cur_frag.pre_frag
            # Search for the most faraway parent fragment
            while pre_frag.pre_frag:
                if networkx.shortest_path_length(
                        graph,
                        source=cur_frag.dihedral[2],
                        target=pre_frag.dihedral[2]) <= separated_bond_num:
                    pre_frag = pre_frag.pre_frag
                else:
                    break
            # Add the the most faraway parent fragment whose dihe 3rd is outside the
            # searching range, but dihe 4th is inside the searching range
            cur_frag.neighbor_frags.append(pre_frag)
            next_frags = pre_frag.next_frags[:]
            # search and save other parent fragments, itself, and child fragments
            while next_frags:
                frag = next_frags.pop()
                bond_num_to_dihe_2nd = networkx.shortest_path_length(
                    graph, source=cur_frag.dihedral[1], target=frag.dihedral[2])
                bond_num_to_dihe_3rd = networkx.shortest_path_length(
                    graph, source=cur_frag.dihedral[2], target=frag.dihedral[2])
                if min([bond_num_to_dihe_3rd, bond_num_to_dihe_2nd
                       ]) <= separated_bond_num:
                    cur_frag.neighbor_frags.append(frag)
                    next_frags += frag.next_frags[:]

[docs]    def setLocalStructAtomIds(self):
        """
        Set the local struct atom ids and end atom ids defined by neighbor fragments.
        """

        for cur_frag in self.first_frag.fragments(include_self=False):
            ini_as_neighbor = False
            # add atom ids in neighbor_frags to local_struct_atom_ids
            for frag in cur_frag.neighbor_frags:
                cur_frag.local_struct_atom_ids.update(frag.atom_ids)
                if not frag.pre_frag:
                    ini_as_neighbor = True
                    skip_ini_end_atom_id = set()
            # fine tune local_struct_atom_ids for those atoms in rotatable bonds
            excluded_side_frags = {cur_frag.fragnum}
            neighbor_frags = set(cur_frag.neighbor_frags)
            for frag in cur_frag.neighbor_frags:
                if not frag.pre_frag:
                    for next_frag in frag.next_frags:
                        if next_frag in neighbor_frags and \
                        next_frag.fragnum not in excluded_side_frags:
                            # Bond between INI frag and next_frag is not broken
                            # save side rotatable bonds for conformer search
                            cur_frag.local_struct_side_dihedrals.append(
                                next_frag.dihedral[:])
                            excluded_side_frags.add(next_frag.fragnum)
                    continue
                # this is not INI frag
                if frag.dihedral[2] not in cur_frag.local_struct_atom_ids:
                    # cur_frag is the most faraway parent fragment other than INI
                    cur_frag.local_struct_atom_ids.update(frag.dihedral[1:3])
                    cur_frag.end_atom_ids.append(frag.dihedral[1])
                # useful when INI has multiple R groups
                if not frag.pre_frag.pre_frag:
                    # This is the dihe 3rd atom saved in a frag connected to INI
                    skip_ini_end_atom_id.add(frag.dihedral[2])
                for next_frag in frag.next_frags:
                    if next_frag.dihedral[
                            3] not in cur_frag.local_struct_atom_ids:
                        # frag is one of the most faraway child fragments
                        cur_frag.end_atom_ids.append(next_frag.dihedral[2])
                    elif next_frag.fragnum not in excluded_side_frags:
                        # Bond between frag and next_frag is not broken
                        # save side rotatable bonds for conformer search
                        cur_frag.local_struct_side_dihedrals.append(
                            next_frag.dihedral[:])
                        excluded_side_frags.add(next_frag.fragnum)

            if ini_as_neighbor:
                for atom_id in self.first_frag.bonded_atom_ids:
                    if atom_id not in skip_ini_end_atom_id:
                        cur_frag.end_atom_ids.append(atom_id)

[docs]    def isCgPolymer(self):
        """
        Coarse grain particles of one type are considerred as one type
        of polymer residue. If one cg structure has INI, TRM, and others
        as residues, consider it built from polymer builder, assuming
        all bonds between particles are rotatable.

        :rtype: bool
        :return: True, if the cg structure is originally from polymer builder
            and prepared properly.
        """

        int_trm_set = {INI, TRM}
        residue_name = {}
        residue_number = 0
        monomer_idx = 0
        for atom in self.template_polymer.atom:
            residue_number += 1
            if atom.name not in residue_name:
                if atom.name in int_trm_set:
                    # INT and TRM do not change pdbres
                    residue_name[atom.name] = atom.name
                else:
                    # Monomers have A-Z as pdbres, which means 0-25 for
                    # monomer_idx.
                    if monomer_idx == 26:
                        # This is not a Cg polymer as the number of monomer
                        # types exceeds 26
                        return False
                    atom_name = string.ascii_uppercase[monomer_idx].ljust(4)
                    residue_name[atom.name] = atom_name
                    monomer_idx += 1
            atom.pdbres = residue_name[atom.name]
            atom.resnum = residue_number
            atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = 1
            atom.property[HEAD_ATOM_PROP] = True
            # Original Monomers have TAIL_ATOM_PROP
            # For newly broken ones from side groups, extra markers cause no trouble
            atom.property[TAIL_ATOM_PROP] = True
            if atom.bond_total > 2:
                atom.property[BRANCH_ATOM_PROP] = True

        return len(residue_name) > 2 and (INI
                                          in residue_name) and (TRM
                                                                in residue_name)

[docs]    def renumberResGetM2PMap(
            self, monomer_orig_idx_prop=msprops.MONOMER_ORIG_ATOM_IDX_PROP):
        """
        Loop over all the residues and create mapping from atom id in residue
        to that in polymer.

        :param monomer_orig_idx_prop: The atom property to define the original
            atom index
        :type monomer_orig_idx_prop: str
        """

        self.connected_resnums = {}
        self.atom_idx_map_m2p = {}
        for residue_num, residue in enumerate(self.template_polymer.residue, 1):
            residue.resnum = residue_num
            self.atom_idx_map_m2p[residue_num] = {}
            for atom in residue.atom:
                # Create mapping between atom index to connected resnum
                self.connected_resnums.setdefault(atom.index, [])
                # Create map from atom id in monomer to that in polymer
                # xx_atom_id_monomer refers to the atom id in monomer
                self.atom_idx_map_m2p[residue_num][
                    atom.property[monomer_orig_idx_prop]] = atom.index

[docs]    def updateAllAtomIndexMaps(self):
        """
        Get information from previous polymer build for polymer grow in cell.
        """

        self.residue_connectivity = {}
        self.backbone_side_atoms = {}
        self.backbone = {}
        self.backbone_to_marked_end = {}
        self.rotatable_bonds = {}
        self.setInitiatorResidue()
        for residue in self.template_polymer.residue:
            residue_type = residue.pdbres.rstrip()
            moiety = self.moieties.getMoiety(residue_type)
            residue_num = residue.resnum
            self.setResidueConnectivity(moiety, residue_num)
            self.setBackboneAndSideGroup(moiety, residue_num)
            if residue_type == INI or residue_type == TRM:
                continue
            self.setPathtoBranchingAtom(moiety.backbone_to_marked_end,
                                        residue_num)
            self.setRotatableBond(moiety.rotatable_bonds, residue_num)

[docs]    def setInitiatorResidue(self):
        """
        Save the residue containing the polymer initiator.
        """

        for residue in self.template_polymer.residue:
            if residue.pdbres.rstrip() == INI:
                self.initiator_res = residue
                return

[docs]    def setResidueConnectivity(self, moiety, residue_num):
        """
        Create mapping from connected resnum pair to connected atom pair.

        :type moiety: `schrodinger.structure.Structure`
        :param moiety: one moiety used to construct part of the polymer

        :type residue_num: int
        :param residue_num: residue num
        """

        # residue_connectivity[residue num][connected resnum] =
        # [atom id in residue, connected atom in connected resnum]
        self.residue_connectivity[residue_num] = {}
        for marker_id in moiety.markers:
            # atom in current residue bonded to another residue
            atom_id = self.atom_idx_map_m2p[residue_num][next(
                moiety.raw_struct.atom[marker_id].bonded_atoms).index]
            for neighbor in self.template_polymer.atom[atom_id].bonded_atoms:
                if neighbor.resnum != residue_num:
                    self.residue_connectivity[residue_num][neighbor.resnum] = [
                        atom_id, neighbor.index
                    ]
                    if neighbor.resnum not in self.connected_resnums[atom_id]:
                        self.connected_resnums[atom_id].append(neighbor.resnum)

[docs]    def setBackboneAndSideGroup(self, moiety, residue_num):
        """
        Update backbone and side group maps.

        :type moiety: `schrodinger.structure.Structure`
        :param moiety: one moiety used to construct part of the polymer

        :type residue_num: int
        :param residue_num: residue num
        """

        self.backbone_side_atoms[residue_num] = OrderedDict()
        # Backbone atom id in monomer (bb_atm_id_m)
        for bb_atm_id_m in moiety.backbone_path_indexes:
            # Backbone atom id in polymer (bb_atm_id_p)
            bb_atm_id_p = self.atom_idx_map_m2p[residue_num][bb_atm_id_m]
            self.backbone_side_atoms[residue_num][bb_atm_id_p] = []
            # Side atom id in monomer (side_atm_id_m)
            for side_atm_id_m in moiety.backbone_side_atoms[bb_atm_id_m]:
                side_atm_id_p = self.atom_idx_map_m2p[residue_num][
                    side_atm_id_m]
                # [residue_num][backbone_atom_id] = [side_group_atom_ids]
                self.backbone_side_atoms[residue_num][bb_atm_id_p].append(
                    side_atm_id_p)

[docs]    def setPathtoBranchingAtom(self, moiety_bb_to_marked_end, residue_num):
        """
        Update maps for the path between backbone atom to atom branching point.

        :type moiety_bb_to_marked_end: dict
        :param moiety_bb_to_marked_end: Keys are backbone atom indexes, values are
            dictionaries with keys being the branching atom and values being the path
            between backbone atom and branching atom

        :type residue_num: int
        :param residue_num: residue num
        """

        for backbone_atom_id, branching_iterm in moiety_bb_to_marked_end.items(
        ):
            polymer_backbone_atom_id = self.atom_idx_map_m2p[residue_num][
                backbone_atom_id]
            self.backbone_to_marked_end[polymer_backbone_atom_id] = {}
            for branching_atom_id, path_with_ends in branching_iterm.items():
                new_path_with_ends = []
                for atom_id in path_with_ends:
                    new_path_with_ends.append(
                        self.atom_idx_map_m2p[residue_num][atom_id])
                bonded_to_marker = self.atom_idx_map_m2p[residue_num][
                    branching_atom_id]
                self.backbone_to_marked_end[polymer_backbone_atom_id][
                    bonded_to_marker] = new_path_with_ends

[docs]    def setRotatableBond(self, moiety_rotatable_bonds, residue_num):
        """
        Update maps for the path between backbone atom to atom branching point.

        :type moiety_rotatable_bonds: list of tuple
        :param moiety_rotatable_bonds: (atom 1, atom 2) between which bond is rotatable

        :type residue_num: int
        :param residue_num: residue num
        """

        rotatable_bonds = []
        for atom0, atom1 in moiety_rotatable_bonds:
            rotatable_bonds.append({
                self.atom_idx_map_m2p[residue_num][atom0],
                self.atom_idx_map_m2p[residue_num][atom1]
            })
        # Rotatable_bonds[residue_type] = (set(atom1, atom2)...)
        # and the bond between atom1 and atom2 is rotatable
        self.rotatable_bonds[residue_num] = tuple(rotatable_bonds)

[docs]    def createFragForInitiator(self):
        """
        Create the the first polymer fragment from the initiator and
        append the following children of it.
        """

        cur_resnum = self.initiator_res.resnum
        self.first_frag = PolymerFragment(
            cur_resnum, 0, template_polymer=self.template_polymer)
        for atom in self.initiator_res.atom:
            self.first_frag.atom_ids.append(atom.index)
        # Multiple Rx groups in INI
        next_resnum = list(self.getDelNextResnumByResnum(cur_resnum))
        for next_resnum, [atom_id, next_atom_id] in next_resnum:
            sub_backbone = []
            # Try to grab another atom in INI as dihedral[0]
            atom_id_ini0 = self.bondedAtomIdInSameRes(atom_id)
            if atom_id_ini0:
                # Add the atom connected to the terminating atom in INI
                sub_backbone.append(atom_id_ini0)
                self.first_frag.pre_backbone.append(atom_id_ini0)
                self.backbone_to_marked_end[atom_id_ini0] = {}
            # Add terminating atom in INI to sub_backbone and pre_backbone
            sub_backbone.append(atom_id)
            self.first_frag.pre_backbone.append(atom_id)
            backbone_atom_ids = self.resBackbone(next_resnum, next_atom_id)
            for idx, backbone_atom_id in enumerate(backbone_atom_ids):
                # Add first atom in first monomer
                sub_backbone.append(backbone_atom_id)
                self.first_frag.atom_ids.append(backbone_atom_id)
                self.first_frag.pre_backbone.append(backbone_atom_id)
                if len(sub_backbone) > 2 and (set(
                        sub_backbone[-2::]) in self.rotatable_bonds[next_resnum]
                                              or idx == 0):
                    # dihe 2nd-3rd is found rotatable
                    try:
                        dihe_4th_atom_id = backbone_atom_ids[idx + 1]
                    except IndexError:
                        # dihe 3rd is the last atom in monomer
                        dihe_4th_atom_id = None
                    self.first_frag.resnum = next_resnum
                    # Rotatable bond found before M-M bond
                    self.createEndSubFragment(self.first_frag, next_resnum,
                                              sub_backbone, next_resnum,
                                              dihe_4th_atom_id)
                    break
                else:
                    # dihe 1st-2rd or non-rotatable dihe 2nd-3rd
                    # Add side atoms and continue to add more backbone atom
                    self.first_frag.atom_ids += self.backbone_side_atoms[
                        next_resnum][backbone_atom_id]
                    continue
            else:
                # When no rotatable bonds in M or struct H-C (H-M as polymer)
                cur_resnum = next_resnum
                # This for loop is to tackle branching at dihe 2nd
                for next_resnum, [atom_id, next_atom_id
                                 ] in self.getDelNextResnumByResnum(cur_resnum):
                    self.first_frag.atom_ids.append(next_atom_id)
                    backbone_atom_ids = self.resBackbone(
                        next_resnum, next_atom_id)
                    dihe_4th_atom_id = None if len(
                        backbone_atom_ids) == 1 else backbone_atom_ids[1]
                    self.first_frag.resnum = next_resnum
                    # Rotatable M-M bond
                    ordered_backbone = self.getInitiatorSubBackbone(
                        sub_backbone, next_atom_id)
                    self.createEndSubFragment(self.first_frag, next_resnum,
                                              ordered_backbone + [next_atom_id],
                                              next_resnum, dihe_4th_atom_id)
        for frag in self.first_frag.next_frags:
            self.addBranchingAtoms(frag)

        # save the end atom ids for ini fragment
        self.first_frag.bonded_atom_ids = []
        for frag in self.first_frag.next_frags:
            self.first_frag.bonded_atom_ids.append(frag.dihedral[2])

[docs]    def getInitiatorSubBackbone(self, sub_backbone, dihe_3rd):
        """
        Return a list of atom ids: the last two atoms are bonded and the last
        bonds with the dihe 3rd atom.

        :type sub_backbone: list
        :param sub_backbone: the last two atoms are bound and the last one binds
            with the dihe 3rd atom.

        :type dihe_3rd: int
        :param dihe_3rd: the 3rd atom in a dihedral
        """

        # Dihedral angle atoms are bonded: dihe_1st - dihe_2nd - dihe_3rd
        # By default sub_backbone[-2:] = [dihe_1st, dihe_2nd]
        if self.template_polymer.areBound(sub_backbone[-1], dihe_3rd):
            # No modification needed, since dihe_2nd and dihe_3rd are bonded
            return sub_backbone

        # Redefine dihe_2nd by searching the atoms in first_frag
        for dihe_2nd in self.first_frag.atom_ids:
            if self.template_polymer.areBound(dihe_2nd, dihe_3rd):
                # The dihe_2nd atom is in first_frag and bonds with dihe_3rd
                break

        for dihe_1st_atom in self.template_polymer.atom[dihe_2nd].bonded_atoms:
            if dihe_1st_atom.index != dihe_3rd:
                # The dihe_1st_atom binds with dihe_2nd and is not dihe_3rd
                break

        return [dihe_1st_atom.index, dihe_2nd]

[docs]    def finishPolymerFrags(self):
        """
        Generate all polymer fragments from the first generation fragment
        containing INT and the following generation fragments.
        """

        fragments = [self.first_frag] + self.first_frag.next_frags[:]
        while fragments:
            frag = fragments.pop(0)
            # dihe_4th_atom_ids saves extra branching sites
            for dihe_4th_atom_id, dihe_4th_resnum, sub_backbone in frag.dihe_4th_atom_ids:
                backbone_atom_ids = self.continueResBackbone(dihe_4th_atom_id)
                new_frag = None
                # Along backbone_atom_ids to find rotatable a bond in each branch
                for idx, backbone_atom_id in enumerate(backbone_atom_ids):
                    sub_backbone.append(backbone_atom_id)
                    if backbone_atom_id not in frag.atom_ids:
                        frag.atom_ids.append(backbone_atom_id)
                    rotatable = set(
                        sub_backbone[-2::]) in self.rotatable_bonds[frag.resnum]
                    if rotatable or dihe_4th_resnum != frag.resnum:
                        dihe_4th_atom_id = None if backbone_atom_id == backbone_atom_ids[
                            -1] else backbone_atom_ids[idx + 1]
                        new_frag = self.createEndSubFragment(
                            frag, dihe_4th_resnum, sub_backbone,
                            dihe_4th_resnum, dihe_4th_atom_id)
                        if new_frag:
                            self.addBranchingAtoms(new_frag)
                            fragments.append(new_frag)
                        break
                    else:
                        # The dihe 2-3rd bond is not a rotatable bond within
                        # residue and is not a cross-residue bond either
                        frag.atom_ids += self.backbone_side_atoms[
                            dihe_4th_resnum][backbone_atom_id]
                        frag.pre_backbone.append(backbone_atom_id)
                        continue
                else:
                    # No 2-3 rotatable bonds found, but reach the end of the residue
                    for next_resnum, [
                            atom_id, next_atom_id
                    ] in self.getDelNextResnumByAtomID(backbone_atom_id):
                        connected_backbone_atom_ids = self.resBackbone(
                            next_resnum, next_atom_id)
                        if len(connected_backbone_atom_ids) > 1:
                            dihe_4th_atom_id = connected_backbone_atom_ids[1]
                        else:
                            dihe_4th_atom_id = None
                        frag.atom_ids.append(next_atom_id)
                        new_frag = self.createEndSubFragment(
                            frag, next_resnum, sub_backbone + [next_atom_id],
                            next_resnum, dihe_4th_atom_id)
                        if new_frag:
                            self.addBranchingAtoms(new_frag)
                            fragments.append(new_frag)
                        resnum = self.template_polymer.atom[
                            backbone_atom_id].resnum
                        if not self.residue_connectivity.get(resnum):
                            # MATSCI-10158: the in-place modification on self.residue_connectivity
                            # empties the content, which causes tracebacks in the next loop.
                            break
                    else:
                        self.addBranchingAtoms(frag)

[docs]    def appendMissedFrags(self):
        """
        When polymer fragments don't find all the atoms in the original structure,
        this methods first groups them by bond connections and appends each group
        to a found fragment.
        """

        # This is a temporary workaround that misses the rotatable bonds between
        # the missed atom groups and the found fragments.
        # This also ignores all rotatable bond within the missed atom group.
        # Collect the edge cases for a general fix all-togather: MATSCI-7009
        all_atom_ids = set(self.template_polymer.getAtomIndices())
        self.first_frag.adjustAtomIds(0)
        all_missed_atom_ids = all_atom_ids.difference(
            set(self.first_frag.atom_ids_in_mol))
        if not all_missed_atom_ids:
            return

        missed_atom_id_groups = get_missed_atom_id_groups(
            self.template_polymer, all_missed_atom_ids)
        connection_infos = self.getConnectionInfos(missed_atom_id_groups,
                                                   all_missed_atom_ids)
        msg = (
            f"{connection_infos[0].connecting_atom_ids} in found fragments connect "
            f"missed_atom_id_groups {connection_infos[0].missed_atom_ids}")
        if self.logger:
            self.logger.debug(msg)
        for connection_info in connection_infos:
            for frag in self.first_frag.fragments():
                intersection_ids = connection_info.connecting_atom_ids.intersection(
                    frag.atom_ids)
                if intersection_ids:
                    # This assumes that the linking between the missed frag
                    # and the found one is not rotatable and all bonds within
                    # the missed atoms are not rotatable
                    frag.atom_ids += connection_info.missed_atom_ids

[docs]    def getConnectionInfos(self, missed_atom_id_groups, all_missed_atom_ids):
        """
        Return the connecting atoms, which belong to the atoms in the found
        fragment and connect to the missed atoms.

        :type missed_atom_id_groups: list of list
        :param missed_atom_id_groups: each sublist has missed atoms connected by
            covalent bonds.

        :type all_missed_atom_ids: list of int
        :param all_missed_atom_ids: a flat list of all missed atom indexes

        :rtype: list of set
        :return: each set has the connecting atoms, which belong to the found
            atoms in the fragment and are connected to the missed atoms.
        """

        connection_infos = []
        for missed_atom_id_group in missed_atom_id_groups:
            connecting_atom_group = []
            for atom_id in missed_atom_id_group:
                connecting_atom_group += [
                    at.index
                    for at in self.template_polymer.atom[atom_id].bonded_atoms
                ]
            connecting_atom_group = set(connecting_atom_group)
            connecting_atom_group = connecting_atom_group.difference(
                all_missed_atom_ids)
            connection_info = ConnectionInfo(
                missed_atom_ids=missed_atom_id_group,
                connecting_atom_ids=connecting_atom_group)
            connection_infos.append(connection_info)

        return connection_infos

[docs]    def assignRingAtomIds(self):
        """
        Assign ring atom indexes to each fragment.
        """

        # FIXME: rings_atom_ids shoud be searched in monomers and mapped to polymer
        for frag in self.first_frag.fragments():
            for ring in self.template_polymer.ring:
                rings_atom_ids = ring.getAtomIndices()
                shared_atom_ids = set(
                    frag.atom_ids).intersection(rings_atom_ids)
                if len(shared_atom_ids) > 1:
                    frag.rings_atom_ids.append(rings_atom_ids[:])

[docs]    def structToOriginByFrag(self):
        """
        Move the structure so the centroid of the fragment
        is at the origin.
        """

        center = self.getFragCentroid(self.template_polymer, self.first_frag)
        self.template_polymer.setXYZ(self.template_polymer.getXYZ() - center)

[docs]    def getFragCentroid(self, struct, frag):
        """
        Calculate the centroid of polymer fragment.

        :type frag: `schrodinger.structure.Structure`
        :param frag: polymer structure to get atom positions from

        :type frag: `Polymerfragment`
        :param frag: The polymer fragment to get atom ids from

        :rtype: list
        :return: the centroid of polymer fragment
        """

        return transform.get_centroid(struct, atom_list=frag.atom_ids)[:3]

[docs]    def getDelNextResnumByResnum(self, cur_resnum):
        """
        Given resnum, find all the connected residues and atoms.
        Delete the connectivity from current residue to next residue
        and the connectivity from next residue to current residue.

        :type cur_resnum: int
        :param cur_resnum: the residue number which the atom id belongs to

        :type del_connection: bool
        :param del_connection: del the connection from current atom and residue
            to the connected atom and residue

        :rtype: iterator to generate int, [int, int]
        :return: the connected residue number, [atom id, connected atom id]
        """

        for next_resnum, [atom_id, next_atom_id] in list(
                self.residue_connectivity[cur_resnum].items()):
            self.delConnectivity(cur_resnum, next_resnum, atom_id, next_atom_id)
            yield next_resnum, [atom_id, next_atom_id]

[docs]    def delConnectivity(self, cur_resnum, next_resnum, atom_id, next_atom_id):
        """
        Delete the connectivity from current residue to next residue
        and the connectivity from next residue to current residue.

        :type cur_resnum: int
        :param cur_resnum: the residue number which the current atom id belongs to

        :type next_resnum: int
        :param next_resnum: the residue number which the connected atom id belongs to

        :type atom_id: int
        :param atom_id: the atom id from which other connections are searched

        :type next_atom_id: int
        :param next_atom_id: one connected atom id
        """

        self.residue_connectivity[next_resnum].pop(cur_resnum)
        self.connected_resnums[next_atom_id].remove(cur_resnum)
        self.residue_connectivity[cur_resnum].pop(next_resnum)
        self.connected_resnums[atom_id].remove(next_resnum)

[docs]    def createEndSubFragment(self,
                             cur_frag,
                             dihe_3rd_resnum,
                             sub_backbone,
                             dihe_4th_resnum,
                             dihe_4th_atom_id,
                             append_to_frag=None,
                             last_created_frag=False):
        """
        Create one child PolymerFragment or append atoms to append_to_frag.

        :type cur_frag: `PolymerFragment`
        :param cur_frag: current polymer fragment

        :type dihe_3rd_resnum: int
        :param dihe_3rd_resnum: the resnum of the 3rd atom in dihedral

        :type sub_backbone: list of int
        :param sub_backbone: the backbone whose last atom (dihedral 3rd) will
            be connected to new fragment

        :type dihe_4th_resnum: int
        :param dihe_4th_resnum: a guess of the resnum of the 4th atom in res

        :type dihe_4th_atom_id: int or None
        :param dihe_4th_atom_id: the 4th atom of dihedral, if int; None, if
            the 4th atom is in the adjacent res

        :type append_to_frag: PolymerFragment or None
        :param append_to_frag: if None, the found 'branching' is new polymer
            fragment, and new PolymerFragment is created with dihedral_4th_atom.
            if not None, the found 'branching' is part of the old polymer fragment
            and no new PolymerFragment is created. Add dihedral_4th_atom to old
            polymer fragment.

        :type last_created_frag: bool
        :param last_created_frag: if True, will not return fragment

        :rtype: `PolymerFragment` or None
        :return: the newly created `PolymerFragment`, or None if atoms are
            appended to append_to_frag fragment
        """

        dihe_3rd_atom_id = sub_backbone[-1]
        if not dihe_4th_atom_id:
            # Dihedral 3rd and 4th atoms in the different residues
            # in other words, bond connects 2 residues
            # dihe_4th_resnum and dihe_4th_atom_id are assigned by for loop
            for dihe_4th_resnum, [
                    atom_id, dihe_4th_atom_id
            ] in self.getDelNextResnumByAtomID(dihe_3rd_atom_id):
                # At the end of the residue, dihedral 3rd may connect to
                # multiple residues, get one as the connecting residue
                break
            else:
                # dihedral 3rd is the termination atom of current moiety
                dihe_4th_atom_id = self.bondedAtomIdInSameRes(dihe_3rd_atom_id)
                # FIXME: moiety broken from side group chain has no attached TRM
                if dihe_4th_atom_id and self.template_polymer.atom[
                        dihe_4th_atom_id].pdbres.rstrip() == TRM:
                    # The bond between TRM and the rest of a polymer is considered
                    # rotatable, as long as TRM has more than one atom.
                    last_created_frag = True
                else:
                    cur_frag.atom_ids += self.backbone_side_atoms[
                        dihe_3rd_resnum][dihe_3rd_atom_id]
                    return None

        if not append_to_frag:
            new_frag = PolymerFragment(dihe_3rd_resnum,
                                       cur_frag.generation + 1,
                                       pre_frag=cur_frag,
                                       pre_backbone=sub_backbone)
            new_frag.dihedral = sub_backbone[-3::] + [dihe_4th_atom_id]
            new_frag.dihe_4th_resnum = dihe_4th_resnum
            new_frag.atom_ids += self.backbone_side_atoms[dihe_3rd_resnum][
                dihe_3rd_atom_id]
            cur_frag.next_frags.append(new_frag)
        else:
            new_frag = append_to_frag
        # Save branching dihe 4th atoms and related pre_backbone
        new_frag.dihe_4th_atom_ids.append(
            tuple([dihe_4th_atom_id, dihe_4th_resnum, sub_backbone[-2::]]))
        return None if (append_to_frag or last_created_frag) else new_frag

[docs]    def addBranchingAtoms(self, cur_frag):
        """
        Append extra dihedral 4th atoms for branching. Add to the parent of cur_frag
        instead of cur_frag, if the current fragment and new branch share the 1st,
        2nd, and 3rd atoms in a dihedral angle.

        :type cur_frag: `PolymerFragment`
        :param cur_frag: branching points will be added to the this fragment
        """

        # cur_frag.dihedral[2] is the dihe 3rd atom in cur_frag
        dihe_3rd_and_all = [cur_frag.dihedral[2]] + cur_frag.atom_ids
        for idx, atom_id in enumerate(cur_frag.pre_backbone):
            if atom_id not in self.backbone_to_marked_end:
                # 1 atom in INI could hit this
                continue

            for bonded_to_marker, path_with_ends in self.backbone_to_marked_end[
                    atom_id].items():
                branching_atom_id = path_with_ends[-1]
                sub_backbone = cur_frag.pre_backbone[:idx] + path_with_ends
                for dihe_4th_resnum, [
                        cur_atom_id, dihe_4th_atom_id
                ] in self.getDelNextResnumByAtomID(branching_atom_id):
                    if branching_atom_id not in dihe_3rd_and_all:
                        frag = cur_frag.pre_frag
                    else:
                        frag = cur_frag
                    frag.dihe_4th_atom_ids.append(
                        tuple([
                            dihe_4th_atom_id, dihe_4th_resnum,
                            sub_backbone[-2::]
                        ]))

[docs]    def resBackbone(self, resnum, atom_id):
        """
        Given residue number and starting atom id, return the short
        backbone path (from the starting atom id to the end) and set
        head/tail using dictionary.

        :type resnum: int
        :param resnum: the residue number

        :type atom_id: int
        :param atom_id: backbone path starts from this atom id

        :rtype: list of int
        :return: the atom ids of backbone from atom_id to the other end
        """

        backbone = list(self.backbone_side_atoms[resnum])
        # Monomer may be connected as headfirst or tailfirst
        is_headfirst = backbone[0] == atom_id
        if is_headfirst:
            self.backbone[resnum] = backbone
        else:
            self.backbone[resnum] = backbone[::-1]
        return self.backbone[resnum]

[docs]    def continueResBackbone(self, atom_id):
        """
        Return rest of the backbone path continuing from the atom_id.

        :type atom_id: int
        :param atom_id: atom id of the starting point

        :rtype: list
        :return: the atom ids of backbone from atom_id to the other end
        """

        resnum = self.template_polymer.atom[atom_id].resnum
        try:
            backbone = self.backbone[resnum]
        except KeyError:
            # set backbone by head-tail and return backbone
            return self.resBackbone(resnum, atom_id)
        index = backbone.index(atom_id)
        return backbone[index:]

[docs]    def bondedAtomIdInSameRes(self, atom_id):
        """
        Find the atom id of a neighbor atom within the same residue.

        :type atom_id: int
        :param atom_id: atom id

        :rtype: int or None
        :return: the atom id of a neighbor atom in the same residue
        """

        atom = self.template_polymer.atom[atom_id]
        atom_resnum = atom.resnum
        for neighbor in atom.bonded_atoms:
            if neighbor.resnum == atom_resnum:
                return neighbor.index
        return None

[docs]    def getDelNextResnumByAtomID(self, atom_id):
        """
        Given atom id, find all the connected residues and atoms.
        Delete the connectivity from current residue to next residue
        and the connectivity from next residue to current residue.

        :type atom_id: int
        :param atom_id: the atom id from which all connections are returned

        :type del_connection: bool
        :param del_connection: del the connection from current atom and residue
            to the connected atom and residue

        :rtype: iterator to generate int, [int, int]
        :return: the connected residue number, [atom id, connected atom id]
        """

        cur_resnum = self.template_polymer.atom[atom_id].resnum
        for next_resnum in self.connected_resnums[atom_id][:]:
            next_atom_id = self.residue_connectivity[cur_resnum][next_resnum][1]
            self.delConnectivity(cur_resnum, next_resnum, atom_id, next_atom_id)
            yield next_resnum, [atom_id, next_atom_id]

[docs]    def isAllAtomPolymer(self):
        """
        Each polymer built by polymer has one INI, TRM, and at least one MOMONOMER;
        each atom in polymer has MONOMER_ORIG_ATOM_IDX_PROP property; residues from
        the same moiety should have same number of atoms.

        :rtype: bool
        :return: True, if the structure is built using polymer builder.
        """

        residue_names = {}
        for residue in self.template_polymer.residue:
            for atom in residue.atom:
                if not atom.property.get(msprops.MONOMER_ORIG_ATOM_IDX_PROP):
                    # manually added atoms do not have MONOMER_ORIG_ATOM_IDX_PROP
                    # when users add some atoms to one polymer
                    return False
            residue_type = residue.pdbres.rstrip()
            res_appeared_before = residue_type in residue_names
            if residue_type == INI:
                if res_appeared_before:
                    # one single INI for each polymer
                    return False
            elif res_appeared_before:
                if len(residue.getAtomIndices()) != residue_names[residue_type]:
                    # residues from the same moiety should have same number of atoms
                    # when users del some atoms from one polymer
                    return False
                continue
            residue_names[residue_type] = len(residue.getAtomIndices())

        return (INI in residue_names) and (TRM in residue_names) and (
            len(residue_names) > 2)

[docs]    def breakIntoMols(self):
        """
        Make a copy of the template polymer and break the structure into
        molecules serving as INI, TRM, and Monomer.

        :rtype: {schrodinger.Structure.structure}, int
        :return: the structure copy broken into molecules serving as a moiety,
            atom id of one atom in INI
        """

        head_atom_id, _ = self.findHeadTail(self.template_polymer)
        side_atom_ids = self.template_polymer.getAtomIndices()
        side_atom_ids.remove(head_atom_id)
        nr_bonds = nearest_rotatable_bonds(self.template_polymer,
                                           self.template_polymer_rings,
                                           head_atom_id,
                                           side_atom_ids,
                                           pre_atom_prop=HEAD_ATOM_PROP,
                                           atom_prop=HEAD_ATOM_PROP)
        log_debug(f"breakIntoMols: head atom id: {head_atom_id}")
        log_debug(f"breakIntoMols: nearest rotatable bonds: {nr_bonds}")
        if self.debug:
            # Mark the bond to be break for debugging mode
            for atom1, atom2 in nr_bonds:
                bond = self.template_polymer.getBond(atom2, atom1)
                bond.setStyle(structure.ATOM_BALLNSTICK)

        struct_copy = self.template_polymer.copy()
        for atom_ids in nr_bonds:
            struct_copy.deleteBond(*atom_ids)

        head_atom_ids = [atom_ids[1] for atom_ids in nr_bonds]
        for mol in struct_copy.molecule:
            mol_atom_ids = set(mol.getAtomIndices())
            if head_atom_id in mol_atom_ids:
                # this mol is INI
                continue
            # Longest simple path with end marked as tail atom
            mono_head_atom_id = mol_atom_ids.intersection(head_atom_ids).pop()
            # Mark the backbone for debugging mode
            _, mono_tail_atom_id = self.findHeadTail(self.template_polymer,
                                                     atom_ids=mol_atom_ids,
                                                     source=mono_head_atom_id,
                                                     set_style=self.debug)
            mono_tail_atom = self.template_polymer.atom[mono_tail_atom_id]
            mono_tail_atom.property[TAIL_ATOM_PROP] = True

        if self.debug:
            # Mark the rings and write out intermediate st for debugging mode
            for idx, rings in enumerate(self.template_polymer_rings):
                color = get_color(idx)
                for atom_id in rings:
                    self.template_polymer.atom[atom_id].color = color
            filename = 'break_into_mols_' + self.debug_file
            self.template_polymer.write(filename)
            jobutils.add_outfile_to_backend(filename, backend=self.backend)
            log_debug(f"breakIntoMols: {filename} has nearest rotatable bonds"
                      " and atoms in rings marked.")

        return struct_copy, head_atom_id

[docs]    @staticmethod
    def findHeadTail(struct, atom_ids=None, source=None, set_style=False):
        """
        Find the head and tail atoms of the longest path. If multiple longest
        paths are found, rank those longest paths by the head atom ids and then
        tail atom ids. Choose the longest path with the smallest head/tail atom
        ids.

        :param struct: the structure to find head and tail
        :type struct: "schrodinger.structure.Structure"

        :param atom_ids: list of int or None
        :type atom_ids: atom ids of the target molecule. If None, all the atoms
            in the struct are used.

        :param source: node (head atom index), Starting node for path.
            If not specified, compute shortest path lengths using all nodes as
            source nodes.
        :type source: int

        :param set_style: Mark the head, tail, and backbone atoms
        :type source: bool

        :rtype: int, int
        :return: atom ids for head and tail atoms
        """

        if not atom_ids:
            atom_ids = struct.getAtomIndices()

        graph = analyze.create_nx_graph(struct, atoms=atom_ids)

        smallest_atom_id = min(atom_ids)
        head_tail_pairs = collections.defaultdict(list)
        # In case there is only one atom
        head_tail_pairs[smallest_atom_id].append(smallest_atom_id)
        max_path_length = 0
        shortest_path_length = networkx.shortest_path_length(graph,
                                                             source=source)
        if source:
            shortest_path_length = [(source, shortest_path_length)]
        for tmp_head_atom_id, tail_and_path_length in shortest_path_length:
            for tmp_tail_atom_id, path_length in tail_and_path_length.items():
                if max_path_length < path_length:
                    # A longer path found: clear the stored head / tail pairs
                    max_path_length = path_length
                    head_tail_pairs = collections.defaultdict(list)
                if max_path_length == path_length:
                    # Multiple longest paths found: append new head / tail pair
                    head_tail_pairs[tmp_head_atom_id].append(tmp_tail_atom_id)
        # Get the minimum of the head and tail atom id sets so that the pair is
        # unique for one structure
        head_atom_id = min(head_tail_pairs.keys())
        tail_atom_id = min(head_tail_pairs[head_atom_id])
        log_debug(
            f"shortest_path: {networkx.shortest_path(graph, source=head_atom_id, target=tail_atom_id)}"
        )
        if set_style:
            spath = networkx.shortest_path(graph,
                                           source=head_atom_id,
                                           target=tail_atom_id)
            for atom1, atom2 in zip(spath[:-1], spath[1:]):
                bond12 = struct.getBond(atom1, atom2)
                bond12.setStyle(structure.ATOM_CPK)
                bond21 = struct.getBond(atom2, atom1)
                bond21.setStyle(structure.ATOM_CPK)
            h_atom = struct.atom[spath[0]]
            h_atom.style = structure.ATOM_BALLNSTICK
            t_atom = struct.atom[spath[-1]]
            t_atom.style = structure.ATOM_BALLNSTICK
        return head_atom_id, tail_atom_id

[docs]    def setAndMarkResidues(self):
        """
        Break the molecule into fragments by rotatable bonds and set each fragment
        as one residue.

        :rtype: bool
        :return: True, if struct_copy has more than one molecule, serving as
            INI, TRM, and Monomers
        """

        if self.template_polymer.atom_total < 4:
            # Structures with less than 4 atoms don't have rotatable bonds
            return False

        # non-polymer struct may be obtained by modifying polymer struct
        for atom in self.template_polymer.atom:
            for prop in ATOM_PROP_MARKER:
                atom.property[prop] = False

        struct_copy, head_atom_id = self.breakIntoMols()
        if len(struct_copy.molecule) == 1:
            return False

        if len(struct_copy.molecule) > self.LETTER_NUM**2:
            return False

        for residue_number, mol in enumerate(struct_copy.molecule, 1):
            atom_indexes = set(mol.getAtomIndices())
            if head_atom_id in atom_indexes:
                residue_name = INI
            else:
                floor_div = residue_number // self.LETTER_NUM
                residue_name = chr(65 + floor_div) if floor_div else ''
                residue_name += chr(65 + residue_number % 26)
                residue_name = residue_name.ljust(4)
            self.setResProperties(residue_name, atom_indexes, residue_number)

        if self.debug:
            for residue_number, mol in enumerate(struct_copy.molecule, 1):
                color = get_color(residue_number)
                atom_indexes = set(mol.getAtomIndices())
                for atom_index in atom_indexes:
                    self.template_polymer.atom[atom_index].color = color
            # Residues of different colors and additional validation for debug mode
            filename = 'set_and_mark_residues_' + self.debug_file
            self.template_polymer.write(filename)
            jobutils.add_outfile_to_backend(filename, backend=self.backend)
            log_debug(
                f"setAndMarkResidues: {filename} has residues in different colors."
            )
            assert len(struct_copy.molecule) == len(
                self.template_polymer.residue)
        return True

[docs]    def setResProperties(self, residue_name, atom_indexes, residue_number):
        """
        Reassign atom properties so that atoms of atom_indexes are treated
        as one residue with proper MONOMER_ORIG_ATOM_IDX_PROP.

        NOTE: MONOMER_ORIG_ATOM_IDX_PROP MODIFIED BASED ON ATOMS IN RESIDUE

        :type residue_name: str
        :param residue_name: the residue name for atoms in this residue

        :type atom_indexes: list of int
        :param atom_indexes: the atom ids for atoms in one residue

        :type residue_number: int
        :param residue_number: the residue number for atom_indexes
        """

        for orig_atom_id, atom_id in enumerate(atom_indexes, 1):
            atom = self.template_polymer.atom[atom_id]
            # All atoms in the same residue must have the same residue number,
            # residue name, chain and insertion code
            atom.resnum = residue_number
            atom.pdbres = residue_name
            atom.chain = " "
            atom.inscode = " "
            atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = orig_atom_id
            atom.property[MONOMER_ORIG_IDX_PROP2] = orig_atom_id
            atom.property.pop(msprops.BACKBONE_ADJOINING_ATOM_PROP, None)
            atom.property.pop(msprops.BACKBONE_ATOM_PROP, None)
            atom.property.pop(msprops.SIDE_GROUP_ATOM_PROP, None)

[docs]    def extractMoieties(self):
        """
        Extract residue structures in a polymer and save one for each residue type.
        """

        self.moiety_structs = {}
        for residue in self.template_polymer.residue:
            residue_type = residue.pdbres.rstrip()
            if residue_type in self.moiety_structs:
                continue
            residue_struct = residue.extractStructure(copy_props=True)
            if residue_type == INI:
                moiety_prop = INITIATOR.capitalize()
            elif residue_type == TRM:
                moiety_prop = TERMINATOR.capitalize()
            elif residue_type == CAS:
                moiety_prop = CASCADER.capitalize()
            else:
                moiety_prop = 'Monomer_%s' % residue_type
            residue_struct.property[MOIETY_PROP] = moiety_prop
            polym = self.template_polymer if self.debug else None
            assign_markers(residue_struct, polym=polym)
            self.moiety_structs[residue_type] = residue_struct

[docs]    def updateOrigAtomIdx(self):
        """
        Update the MONOMER_ORIG_ATOM_IDX_PROP property in each polymer atom
        with the atom index of atoms in moiety_structs set by setResProperties().
        """

        orig_prop = msprops.MONOMER_ORIG_ATOM_IDX_PROP
        if self.debug:
            ids_moiety = []
            for pdbres, struct in self.moiety_structs.items():
                orig_ids = [x.property.get(orig_prop) for x in struct.atom]
                ids_moiety += [x for x in orig_ids if x is not None]
                # moiety_structs has one INI and multiple monomers (CAS and TRM
                # are optional)
                if pdbres == TRM:
                    # TRM is directly connected to one monomer
                    assert 1 == orig_ids.count(None)
                elif pdbres == INI:
                    # INI is not directly connected to TRM and CAS
                    snum = 1
                    if TRM in self.moiety_structs:
                        snum += 1
                    if CAS in self.moiety_structs:
                        snum += 1
                    none_num = len(self.moiety_structs) - snum
                    # '<' for Multi-site, '=' for single-site INI
                    assert none_num <= orig_ids.count(None)
                elif pdbres == CAS:
                    # CAS is directly connected to multiple moieties
                    assert 1 < orig_ids.count(None)
                else:
                    marker_num = [[x.property.get(y)
                                   for y in ATOM_PROP_MARKER]
                                  for x in struct.atom]
                    none_num = sum([x.count(True) for x in marker_num])
                    # Monomer may be connected to one type of moiety multiple times
                    assert none_num <= orig_ids.count(None)
            ids = [
                x.property.get(orig_prop) for x in self.template_polymer.atom
            ]
            # polymer may have multiple residues with the same pdbres name
            assert set(ids) == set(ids_moiety)

        for residue in self.template_polymer.residue:
            residue_type = residue.pdbres.rstrip()
            for res_atom in residue.atom:
                atoms = [
                    x for x in self.moiety_structs[residue_type].atom
                    if x.property.get(orig_prop) == res_atom.property[orig_prop]
                ]
                res_atom.property[orig_prop] = atoms[0].index

[docs]    def markMoietiesProps(self):
        """
        Copy and add additional properties to moiety_structs to pass
        the validations in `Moieties`. However, these properties are
        not used, since polymer is already built.

        :type polymer: `schrodinger.structure.Structure`
        :param polymer: structure of a polymer
        """

        for residue_struct in self.moiety_structs.values():
            for key, value in self.template_polymer.property.items():
                if not residue_struct.property.get(MOIETY_PROP):
                    residue_struct.property[key] = value
            if residue_struct.property[MOIETY_PROP].startswith('Monomer_'):
                residue_struct.property[TACTICITY_PROP] = TACTICITY_ISO
                residue_struct.property[BRANCH_PCT_PROP] = 0.5
                residue_struct.property[BRANCH_MAX_PROP] = 0.8
                residue_struct.property[BBTRANS_PROP] = True
            elif residue_struct.property[MOIETY_PROP].startswith('Cascader'):
                residue_struct.property[CASCADE_GEN_PROP] = 2
            if self.debug:
                # Write out all moieties with Br atoms
                filename = 'mark_moieties_props_' + self.debug_file
                residue_struct.append(filename)
                jobutils.add_outfile_to_backend(filename, backend=self.backend)
                log_debug(f"markMoietiesProps: {filename} has all moieties.")


[docs]class PolymerFragment(object):

[docs]    def __init__(self,
                 resnum,
                 generation,
                 pre_frag=None,
                 is_branching=False,
                 pre_backbone=None,
                 template_frag=None,
                 molnum=None,
                 template_polymer=None):
        """
        PolymerFragment class stores the connectivity, dihedral angle, and
        anthoer other information for tangled chain method.

        :type resnum: int
        :param resnum: the resnum of the 3rd atom in dihedral angle

        :type generation: int
        :param generation: the generation of the fragment

        :type pre_frag: `PolymerFragment`
        :param pre_frag: The parent fragment

        :type is_branching: bool
        :param is_branching: True, if current fragment has more than children

        :type pre_backbone: list of int
        :param pre_backbone: the last atom in pre_backbone is the 3rd dihedral atom

        :type template_frag: `PolymerFragment` or None
        :param template_frag: the polymer fragment to be copied

        :type molnum: int
        :param molnum: molecule number

        :type template_polymer: `schrodinger.structure.Structure`
        :param template_polymer: the structure of one polymer chain
        """

        self.resnum = resnum
        self.generation = generation
        self.in_sidegroup = False
        self.atom_ids = []
        self.dihedral = []
        self.dihe_4th_atom_ids = []
        self.orig_dihedral = []
        self.next_frags = []
        self.pre_backbone = pre_backbone or []
        self.pre_frag = pre_frag
        self.is_branching = is_branching
        self.dihe_pool = []
        self.dihe_rand = []
        self.dihe_value = None
        self.hitted_num = 0
        self.max_hit_num = MAX_HIT_NUM
        self.molnum = None
        self.rings = []
        self.rings_atom_ids = []
        self.spear_rings = []
        self.template_polymer = template_polymer
        self.neighbor_frags = []
        self.local_struct_atom_ids = set()
        self.end_atom_ids = []
        self.local_struct_side_dihedrals = []
        self.torsion_pattern = None
        self.torsion_probability = None
        self.tail_atom = None
        self.debug = schrodinger.in_dev_env()
        if template_frag:
            self.preDeepCopy(template_frag, molnum)

[docs]    def preDeepCopy(self, original_frag, molnum):
        """
        Deep copy the fragment attributes except those linking
        different fragments.

        :type original_frag: `PolymerFragment`
        :param original_frag: Polymerfragment object to be copied

        :type molnum: int
        :param molnum: molecule number
        """

        self.molnum = molnum
        self.generation = original_frag.generation
        self.in_sidegroup = original_frag.in_sidegroup
        self.is_branching = original_frag.is_branching
        self.torsion_pattern = original_frag.torsion_pattern
        self.atom_ids = original_frag.atom_ids[:]
        self.dihedral = original_frag.dihedral[:]
        self.dihe_pool = original_frag.dihe_pool[:]
        self.dihe_rand = original_frag.dihe_rand[:]
        # [[atom ids in first ring],[atom ids in second ring],..]
        self.rings_atom_ids = copy.deepcopy(original_frag.rings_atom_ids)
        self.next_frags = original_frag.next_frags[:]
        self.template_polymer = original_frag.template_polymer
        self.torsion_probability = original_frag.torsion_probability
        self.tail_atom = original_frag.tail_atom

[docs]    def deepCopy(self, molnum):
        """
        Deep copy the fragment class from the INI fragment.

        :type molnum: int
        :param molnum: molecule number

        :rtype: `PolymerFragment`
        :return: new INI fragment directing to all copied fragments
        """

        first_frag = PolymerFragment(self.resnum,
                                     self.generation,
                                     template_frag=self)
        first_frag.molnum = molnum
        next_frags = [first_frag]
        while next_frags:
            frag = next_frags.pop(0)
            for next_original_frag in frag.next_frags[:]:
                frag_copied = PolymerFragment(next_original_frag.resnum,
                                              next_original_frag.generation,
                                              pre_frag=frag,
                                              template_frag=next_original_frag,
                                              molnum=molnum)
                next_frags.append(frag_copied)
                frag.next_frags.remove(next_original_frag)
                frag.next_frags.append(frag_copied)
        return first_frag

[docs]    def setDihedrals(self, dihe_init):
        """
        Set the dihedral angle pool and rand for each fragment.
        dihedral angle pool and rand are lists of possible dihedral
        angles. When the polymer chain grows, move to next polymer fragment
        and set the dihedral angle using a value randomly picked from rand;
        When the polymer growing site dies, move to previous polymer fragment,
        pop up the used dihedral angle from the pool, and randomly
        pick a new one from the left dihedral values in pool.

        :type dihe_init: list of floats
        :param dihe_init: list of possible dihedral angles
        """

        self.dihe_init = dihe_init
        fragnum = 1
        self.fragnum = fragnum
        for frag in self.fragments(include_self=False):
            fragnum += 1
            frag.fragnum = fragnum
            frag.dihe_rand = dihe_init[:]
            frag.dihe_pool = dihe_init[:]
            random.shuffle(frag.dihe_pool)
            frag.max_hit_num = MAX_HIT_NUM

[docs]    def markSidegroupFrag(self):
        """
        Mark the side group polymer fragment, if the 3rd atom in the dihedral
        angle is in side group (not backbone atom).
        """

        for frag in self.fragments(include_self=False):
            atom_id = frag.dihedral[2]
            if self.template_polymer.atom[atom_id].pdbres.rstrip()[-1] != '0':
                frag.in_sidegroup = True

[docs]    def resetFragDihedralPool(self):
        """
        Reset the dihedral angle pool of all following fragment according to
        rand in each fragment.
        """

        for frag in self.fragments(include_self=False):
            frag.dihe_pool = frag.dihe_rand[:]
            random.shuffle(frag.dihe_pool)
            frag.hitted_num = 0

[docs]    def fragments(self, include_self=True):
        """
        Yield the first fragment of next_frags and append the children of
        that fragment to next_frags for recursion.

        :type include_self: bool
        :param include_self: If True, loop over itself and all child fragments.
            If False, only loop over its child polymer fragments.

        :rtype: iterator to generate `PolymerFragment`
        :return: one child polymer fragment
        """

        next_frags = [self] if include_self else self.next_frags[:]

        while next_frags:
            this_frag = next_frags.pop(0)
            yield this_frag
            next_frags += this_frag.next_frags[:]

[docs]    def adjustAtomIds(self, pre_atomnum):
        """
        Adjust atom ids in PolymerFragment when multipe chains are
        added into the cell.

        :type pre_atomnum: int
        :param pre_atomnum: number of atoms before adding current molecule
        """

        mol_atom_ids = []
        for frag in self.fragments():
            frag.atom_ids = [x + pre_atomnum for x in frag.atom_ids]
            mol_atom_ids += frag.atom_ids
            for findex, ring_indexes in enumerate(frag.rings_atom_ids):
                frag.rings_atom_ids[findex] = [
                    x + pre_atomnum for x in ring_indexes
                ]
            frag.dihedral = [x + pre_atomnum for x in frag.dihedral]
            if frag.tail_atom:
                frag.tail_atom += pre_atomnum
        self.atom_ids_in_mol = sorted(mol_atom_ids)
        self.atom_gids_in_mol = [
            atom_ids - 1 for atom_ids in self.atom_ids_in_mol
        ]

[docs]    def markBranchingFrag(self):
        """
        Mark the branching fragments.
        """

        for frag in self.fragments():
            if len(frag.next_frags) > 1:
                frag.is_branching = True

[docs]    def getDiheValues(self):
        """
        Get the possbile dihedral values of current fragment.

        :rtype: list
        :return: all polymer dihedral vaules
        """

        if self.continue_grow:
            if self.torsion_probability is not None:
                return numpy.random.choice(self.dihe_rand,
                                           len(self.dihe_rand),
                                           replace=False,
                                           p=self.torsion_probability)
            dihe_values = self.dihe_rand[:]
            random.shuffle(dihe_values)
        elif self.hitted_num < self.max_hit_num:
            dihe_values = self.dihe_pool[:]
        else:
            dihe_values = []
        return dihe_values


[docs]def assign_markers(residue_struct, polym=None):
    """
    Find and add marker atoms to the extracted structures.

    :type residue_struct: `schrodinger.structure.Structure`
    :param residue_struct: The structure to add the markers to

    :type polym: `schrodinger.structure.Structure`
    :param polym: If provided, the corresponding head/tail atoms are located in
        this reference structure, and the position of Br is determined by bonding
        and residue informations
    """

    marker_ids = {0: [], 1: [], 2: []}
    for atom in residue_struct.atom:
        m_types = [y for x, y in ATOM_PROP_MARKER_IDX if atom.property.get(x)]
        if not m_types:
            continue
        if polym:
            patoms = [x for x in polym.atom if x.pdbres == atom.pdbres]
            patoms = [
                x for x in patoms
                if x.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] ==
                atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP]
            ]
            patom = patoms[0]
            bonded_atoms = [x for x in patom.bonded_atoms]
            batoms = [x for x in bonded_atoms if x.pdbres != atom.pdbres]
            #
            m_types += [m_types[-1]] * (len(batoms) - len(m_types))
        for index, marker_type in enumerate(m_types):
            # Br atoms are used to mark head, tail, branch, and cas atoms
            # As the polymer chain is already built, the Br position is
            # not used. Putting them near the head and tail atoms helps
            # to visualize which atoms are marked.
            if polym:
                try:
                    batom = batoms[index]
                except IndexError:
                    # Br placed away from the residue (no bonds between residues)
                    batom = bonded_atoms[0]
                    xyz = numpy.array(patom.xyz) * 2 - numpy.array(batom.xyz)
                else:
                    xyz = (numpy.array(patom.xyz) + numpy.array(batom.xyz)) / 2
                residue_struct.addAtom('Br', *xyz)
            else:
                residue_struct.addAtom('Br', atom.x + 1., atom.y + 1.,
                                       atom.z + 1.)
            residue_struct.addBond(atom.index, residue_struct.atom_total, 1)
            marker_ids[marker_type].append(residue_struct.atom_total)

    marker_id_list = []
    for marker_type, atom_ids in marker_ids.items():
        marker_id_list += atom_ids
    residue_struct.property[MARKER_PROP] = ','.join(map(str, marker_id_list))


[docs]class VolumeMemoryError(Exception):
    pass


[docs]class LowMemoryVolGraph(object):
    """
    This class mimics a networkx graph for the XYZVolumeGraph class. It has only
    minimal networkx functionality. It is necessary because networkx graphs
    are giant memory hogs and can't be used for even moderately-sized
    sparse grid graphs. This class takes a tiny fraction - a few percent or so -
    of the memory for an equivalent networkx graph.
    """

[docs]    def __init__(self):
        """
        Create a LowMemoryVolGraph instance
        """

        self._all_nodes = set()
        # Keys of _all_edges are nodes, values are a set of nodes connected to
        # that node
        self._all_edges = defaultdict(set)
        # Blobs are groups of connected nodes - the equivalent of
        # networkx.connected_components. Blobs are somewhat expensive timewise
        # to find, so they are cached. Each time a change is made to the number
        # of nodes or connectivity of the nodes the blob cache is erased.
        self._all_blobs = []
        self.machine_memory_mb = psutil.virtual_memory().total / float(2**20)

[docs]    def __len__(self):
        """
        The length of the graph is just the number of nodes

        :rtype: int
        :return: The number of nodes in the graph
        """

        return len(self._all_nodes)

[docs]    def checkMemoryLimit(self):
        """
        Check that the amount of memory used is not approaching the total memory
        available

        :raise VolumeMemoryError: If the amount of memory used is too large
            compared to the total system memory
        """

        mymemory = jobutils.memory_usage_psutil()
        # Use 0.80 as some jobs fail at 84% memory usage
        if mymemory / self.machine_memory_mb > 0.80:
            raise VolumeMemoryError('The size of the system combined with the '
                                    'free space in the system is too large to '
                                    'fit in memory on this machine.')

[docs]    def addNode(self, node):
        """
        Add a new node to the graph.

        :type node: tuple (or hashable, sortable object)
        :param node: The node to add
        """

        self._all_nodes.add(node)
        self._all_blobs = []

[docs]    def addEdge(self, node1, node2):
        """
        Add a new edge (a connection between two nodes) to the graph

        :type node1: tuple (or hashable, sortable object)
        :param node1: One of the nodes to create the edge between. Node order
            does not matter.

        :type node2: tuple (or hashable, sortable object)
        :param node2: The other node to create the edge between. Node order
            does not matter.

        :raise KeyError: If one of the nodes is not found in the graph
        """

        for anode in (node1, node2):
            if anode not in self._all_nodes:
                raise KeyError('Node %s is not in graph' % str(anode))
        self._all_edges[node1].add(node2)
        self._all_edges[node2].add(node1)
        self._all_blobs = []

[docs]    def removeNode(self, node):
        """
        Remove a node from the graph. Also removes all edges attached to this
        node

        :type node: tuple (or hashable, sortable object)
        :param node: The node to remove

        :raise KeyError: If one of the nodes is not found in the graph
        """

        self._all_nodes.remove(node)
        for bonded in self._all_edges[node]:
            self._all_edges[bonded].remove(node)
        del self._all_edges[node]
        self._all_blobs = []

[docs]    def removeEdge(self, node1, node2):
        """
        Remove an edge (a connection between two nodes) from the graph. The
        nodes are not removed.

        :type node1: tuple (or hashable, sortable object)
        :param node1: One of the nodes for the existing edge. Node order
            does not matter.

        :type node2: tuple (or hashable, sortable object)
        :param node2: The other node to for the existing edge. Node order
            does not matter.

        :raise KeyError: If one of the nodes is not found in the graph
        """

        self._all_edges[node1].remove(node2)
        self._all_edges[node2].remove(node1)
        self._all_blobs = []

[docs]    def nodes(self):
        """
        A generator over all the nodes in the graph

        :rtype: node type
        :return: Iterates over each node in the graph
        """

        for node in self._all_nodes:
            yield node

[docs]    def edges(self):
        """
        A generator over all edges in the graph

        :rtype: (node, node)
        :return: Iterates over each pair of connected nodes. Nodes are returned
            in sort order.
        """

        for node, bonded in self._all_edges.items():
            for bonder in bonded:
                if bonder > node:
                    yield (node, bonder)

    def _gatherBlobs(self, force=False):
        """
        Find all the blobs in the system. A blob is a group of connected nodes.

        :type force: bool
        :param force: Recomputed the blobs even if the blob cache exists

        :raise VolumeMemoryError: If memory usage grows too large
        """

        if self._all_blobs:
            if force:
                self._all_blobs = []
            else:
                return

        # We start with a random node. We iterate through all its connected
        # neighbors, adding them to the blob. Then we iterate through the
        # connected neighbors of each neighbor, etc. recursive, etc.
        unblobbed_nodes = self._all_nodes.copy()
        self.checkMemoryLimit()
        count = 0
        while unblobbed_nodes:
            count += 1
            node = unblobbed_nodes.pop()
            # "added" contains all nodes added to the blob that we haven't yet
            # checked the neighbors of
            added = [node]
            this_blob = set(added)
            while added:
                # Grab a new node to check
                check_node = added.pop()
                for next_node in self._all_edges[check_node]:
                    # Add any neighbors that we haven't already added
                    if next_node not in this_blob:
                        count += 1
                        # Add to the list of nodes whose neighbors we must check
                        added.append(next_node)
                        # Add to the blob
                        this_blob.add(next_node)
                        # Remove from the set of unblobbed nodes
                        unblobbed_nodes.remove(next_node)
                if not count % 10000:
                    self.checkMemoryLimit()
            self._all_blobs.append(this_blob)

        # Sort the blob cache so that the largest blob is first
        self._all_blobs.sort(key=len, reverse=True)

[docs]    def blobs(self):
        """
        A generator for all the blobs (groups of connected nodes) in the graph

        :rtype: set
        :return: Iterates over blobs. Blobs are returned in order of largest to
            smallest
        """

        if not self._all_blobs:
            self._gatherBlobs()
        for blob in self._all_blobs:
            yield blob

[docs]    def degree(self, node):
        """
        Return the number of edges for the node

        :type node: tuple (or hashable, sortable object)
        :param node: The node to check

        :rtype: int
        :return: The number of edges for a node

        :raise KeyError: If the node is not found in the graph
        """

        if node not in self._all_edges:
            # The edges dict is a defaultdict so must raise error manually
            raise KeyError('Node %s not found in graph' % str(node))
        return len(self._all_edges[node])


[docs]class XYZVolumeGraph(object):
    """
    Create a networkx graph to search the voids in structure.
    """

[docs]    def __init__(self, struct, spacing=2.0, scaffold=None):
        """
        Create networkx graph based on the structure PBC or coordinates.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to compute the graph over

        :type spacing: float
        :param spacing: The approximate spacing (Angstroms) between graph nodes.
            The actual grid spacing will be adjusted in each direction to ensure
            uniform grid point distribution, and the actual spacing used in each
            direction can be found in the self.xyz_spacings list

        :type scaffold: `Scaffold` or None
        :param scaffold: a structure that occupies space as a cluster of
            molecules.
        """

        self.struct = struct
        self.origin = [0.0, 0.0, 0.0]
        self.box_lengths = [0.0, 0.0, 0.0]

        try:
            # May find incorrect largest void for interface scaffold
            self.pbc = clusterstruct.create_pbc(self.struct)
        except ValueError:
            # no PBC
            self.pbc = None

        # Determine X, Y and Z lengths; origin may be redefined
        if scaffold and scaffold.scaffold:
            self.defineBoxFromScaffold(scaffold)
        elif self.pbc:
            self.defineBoxFromPBC()
        else:
            self.defineBoxFromNonPBCStruct()

        self.num_xyz = [
            int(math.ceil(old_div(size, spacing))) for size in self.box_lengths
        ]
        self.xyz_spacings = numpy.array([
            old_div(length, num)
            for num, length in zip(self.num_xyz, self.box_lengths)
        ])
        self.shifted_origin = self.origin + self.xyz_spacings * 0.5

        self.total_points = 0

[docs]    def defineBoxFromScaffold(self, scaffold):
        """
        Define the grid box as the scaffold box size

        :type scaffold: `Scaffold`
        :param scaffold: a structure that occupies space as a cluster of
            molecules.
        """

        for axis in range(3):
            [region_min, region_max] = scaffold.box.getMinMax(axis)
            self.box_lengths[axis] = region_max - region_min
            self.origin[axis] = region_min

        self.hull = None

[docs]    def defineBoxFromPBC(self):
        """
        Define the box from the PBC. This method works for triclinic cells by
        virtue of creating a hull that can be used to eliminate grid points
        outside the arbitrary-shaped PBC region.
        """

        # Create the 8 vertices of the PBC
        vecs = self.pbc.getBoxVectors()
        # The first two for statements loop over every combination of 0, 1, 2 or
        # 3 vectors - 8 in total
        vertices = []
        # Use anywhere from 0 (gives origin) to 3 vectors
        for numvecs in range(0, 4):
            # Select all possible combinations of numvecs vectors - the [0, 1,
            # 2] list provides the indexes for the x, y and z vectors
            for selection in itertools.combinations([0, 1, 2], numvecs):
                vertices.append(numpy.zeros(3))
                for vecdex in selection:
                    vertices[-1] += vecs[vecdex]
        vertices = numpy.array(vertices)
        self.box_lengths = vertices.ptp(0)

        # Determine the origin by finding the center of the structure and then
        # going "back" -1/2 each box length. This will center the box on the
        # structure.
        coords = self.struct.getXYZ()
        mins = numpy.min(coords, 0)
        maxes = numpy.max(coords, 0)
        center = old_div((mins + maxes), 2.0)
        self.origin = [
            center[a] - old_div(self.box_lengths[a], 2) for a in range(3)
        ]

        # Create a convex hull that encloses the 8 vertices
        vertices += self.origin
        self.hull = spatial.Delaunay(vertices)

[docs]    def defineBoxFromNonPBCStruct(self):
        """
        Define a box that encompasses the structure, including the VDW radii of
        the extreme atoms
        """

        xyz = self.struct.getXYZ()
        min_gids = numpy.argmin(xyz, axis=0)
        for dimens, gid in enumerate(min_gids):
            self.origin[dimens] = (xyz[gid][dimens] -
                                   self.struct.atom[gid + 1].radius)

        max_gids = numpy.argmax(xyz, axis=0)
        for dimens, gid in enumerate(max_gids):
            self.box_lengths[dimens] = (xyz[gid][dimens] +
                                        self.struct.atom[gid + 1].radius -
                                        self.origin[dimens])

        self.hull = None

[docs]    def getNodeXYZ(self, node):
        """
        Convert networkx node (tuple of X, Y, Z index) to XYZ coordinates

        :type node: tuple
        :param node: the node to convert

        :rtype: tuple
        :return: A tuple of x, y z coordinates in Angstroms
        """

        return tuple(self.shifted_origin +
                     self.xyz_spacings * numpy.array(node))

[docs]    @staticmethod
    def getDistanceCellDistance(struct, probe):
        """
        Get the distance that will be used as the cutoff for atoms close to a
        point

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure that will be checked

        :type probe: float
        :param probe: The probe radius that will be used

        :rtype: float
        :return: The distance cutoff that will be used
        """

        maxrad = max(x.radius for x in struct.atom)
        return maxrad + 0.1 + probe

[docs]    def locateVoids(self, atom_ids=None, vdw_scale=1.0, probe=0.0, logger=None):
        """
        Remove all nodes that overlap a bitset-on atom (all atoms by default)
        in current structure. And connect all grid points that are part of the
        same void.

        :type atom_ids: set or None
        :param atom_ids: if not None, only check clashes for atoms in this set

        :type vdw_scale: float
        :param vdw_scale: VDW scale factor to use

        :type probe: float
        :param probe: A radius to add to each atom's vdw radius in order to
            determine whether the atom encompasses a point or not. This is the
            thought equivalent of rolling a ball around the vdw surface of the
            atoms and tracing out the center of the ball.

        :type logger: `Logger`
        :param logger: If given, progress will be logged using this object

        :raise VolumeMemoryError: If memory usage grows too large
        """

        # Create an empty graph with no nodes
        self.graph = LowMemoryVolGraph()

        dcell_distance = self.getDistanceCellDistance(self.struct, probe)
        # Distance cells are expensive, only create it once
        self.distance_cell, self.distance_pbc = \
                clusterstruct.create_distance_cell(
                    self.struct, dcell_distance, pbc=self.pbc)

        # For each potential point in the graph, we only add it if a) it is
        # inside the box hull (important for triclinic cells that are not cubic
        # like the grid is) and b) it is not encompassed by an atom. It is much
        # better memory-wise to only put valid points in the graph than it is to
        # put all points in the graph and then remove the invalid ones.
        total_points = self.num_xyz[0] * self.num_xyz[1] * self.num_xyz[2]
        per_ten = old_div(total_points, 10)
        since_last_log = 0
        for xval in range(self.num_xyz[0]):
            for yval in range(self.num_xyz[1]):
                for zval in range(self.num_xyz[2]):
                    node = (xval, yval, zval)

                    xyz = self.getNodeXYZ(node)
                    if self.hull and self.hull.find_simplex(xyz) < 0.0:
                        # Point is outside the PBC box
                        continue

                    # Track the total number of possible points in the box not
                    # counting those outside the hull
                    self.total_points += 1

                    # Get list of atoms close to xyz point using distance cell
                    all_matches = self.distance_cell.query_atoms(*xyz)
                    overlap = False
                    for match in all_matches:
                        atom_id = match.getIndex()
                        # Use vdw_scale & VDW radius of each atom to determine
                        # if it overlaps
                        if atom_ids and atom_id not in atom_ids:
                            continue
                        vdw = self.struct.atom[atom_id].radius
                        size = vdw * vdw_scale + probe
                        if match.getDistanceSquared() <= (size)**2:
                            overlap = True
                            break
                    if overlap:
                        continue

                    # This is a node inside the box but outside any atom
                    self.graph.addNode(node)
                    if not self.total_points % 10000:
                        self.graph.checkMemoryLimit()

            if logger:
                # Log progress about every 10% of steps
                since_last_log += self.num_xyz[1] * self.num_xyz[2]
                if since_last_log > per_ten:
                    now = (xval + 1) * self.num_xyz[1] * self.num_xyz[2]
                    pct = 100.0 * now / total_points
                    logger.info('Processed %.1f%% of %d gridpoints' %
                                (pct, total_points))
                    since_last_log = 0

        # Now create edges between neighboring nodes
        self.bondRemainingNodes()

[docs]    def bondRemainingNodes(self):
        """
        Connect each node with its neighbor nodes that remain in the graph.
        Nodes that differ by 1 in 1 coordinate are considered neighbors. Any PBC
        is also accounted for so that neighboring nodes on the opposite side of
        the cell are also bonded.

        :raise VolumeMemoryError: If memory usage grows too large
        """

        # FIXME: Does not work for neighbors on the boundary of triclinic cells
        for count, node in enumerate(self.graph.nodes()):
            for neighbor in self.gridNeighbors(node, self.num_xyz, self.pbc):
                try:
                    self.graph.addEdge(node, neighbor)
                except KeyError:
                    # Neighbor is not in graph, that's fine
                    pass
            # Check memory every 10000 nodes
            if not count % 10000:
                self.graph.checkMemoryLimit()

[docs]    @staticmethod
    def gridNeighbors(node, num_xyz, pbc):
        """
        A generator over all potential node values for the neighbors of the
        given node. The nodes may or may not exist in the graph. Neighbors are
        considered to have one of x, y or z values that differs by exactly 1 and
        both other values are the same. The PBC is used to connect neighbors on
        opposite edges of the cell.

        :type node: tuple
        :param node: the node to get the neighbors for

        :type num_xyz: list
        :param num_xyz: The number of grid points in the x, y and z directions

        :type pbc: `schrodinger.infra.structure.PBC` or None
        :param pbc: The PBC if one exists

        :rtype: tuple
        :return: Iterates over potential (x, y, z) neighbors of the given node.

        """

        # FIXME: Does not work for neighbors on the boundary of triclinic cells
        for coord in range(3):
            for delta in (-1, 1):
                newval = XYZVolumeGraph.getNeighborCoordVal(
                    node, delta, coord, num_xyz[coord], pbc)
                if newval is None:
                    continue
                other = list(node)
                other[coord] = newval
                other = tuple(other)
                yield other

[docs]    @staticmethod
    def getNeighborCoordVal(node, delta, coord, numpts, pbc):
        """
        Get the (x, y, z) tuple for a neighbor of node that differs by delta in
        coord. (x, y, z) is modified based on the PBC if necessary

        :type node: tuple
        :param node: the node to convert

        :type delta: int
        :param delta: How much the coord value differs from the given node.
            Typically 1 or -1 to give a neighboring node.

        :type coord: int
        :param coord: 0, 1 or 2 for the x, y or z direction

        :type numpts: int
        :param numpts: The number of grid points in the coord direction

        :type pbc: `schrodinger.infra.structure.PBC` or None
        :param pbc: The PBC if one exists

        :rtype: tuple or None
        :return: The (x, y, z) tuple formed by modifying coord of node by delta,
            or None if there is no pbc and the modified coord would be outside
            0:numpts-1
        """

        val = node[coord] + delta
        if val < 0:
            if pbc:
                return numpts - 1
            else:
                return None
        elif val == numpts:
            if pbc:
                return 0
            else:
                return None
        return val

[docs]    def voids(self):
        """
        A generator that returns groups of connected nodes that define voids in
        the structure.

        :rtype: tuple
        :return: Each returned value is a set of nodes that are all connected
            and define a void blob. The sets are returned in order of size, largest
            to smallest
        """

        for blob in self.graph.blobs():
            yield blob

[docs]    def getLargestVoid(self):
        """
        Get the largest void.

        :rtype: tuple
        :return: The set of nodes that form the largest void in the structure
        """

        for blob in self.graph.blobs():
            return blob

[docs]    def getSurfaceNodes(self, blob):
        """
        Get a list of nodes in blob that have fewer than 6 connections

        :type blob: set
        :param blob: The set of nodes to search for surface nodes in

        :rtype: list
        :return: Each item of the list is a node that has fewer than 6
            connections
        """

        # FIXME: May not work for triclinic cells
        return [node for node in blob if self.graph.degree(node) != 6]

[docs]    def getBuriedNode(self, blob):
        """
        A generator that returns nodes that is buried inside the blob of
        connected nodes.  Buried nodes have lots of connections to other nodes.

        :type blob: a set of tuples
        :param blob: Each item of the set is a node

        :rtype: tuple
        :return: The buried node
        """

        nodes = [node for node in blob]
        # Shuffle to get a randomized order
        random.shuffle(nodes)
        # Sort shuffled nodes by number of connections
        for node in sorted(nodes, key=self.graph.degree, reverse=True):
            yield node

[docs]    def getBuriedXYZ(self, blob):
        """
        Like getBuriedNode but return the xyz coordinates.

        :type blob: a set of tuples
        :param blob: Each item of the set is a node

        :rtype: tuple
        :return: A tuple of x, y z coordinates in Angstroms
        """

        for node in self.getBuriedNode(blob):
            yield self.getNodeXYZ(node)


[docs]def find_attached_atom_index(struct, mark_index, prop=None, propval=True):
    """
    Find the index of the atom attached to the atom with the given index,
    optionally setting a property/value on that atom

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to search for the atom

    :type mark_index: int
    :param mark_index: The index of the atom that we want to find an atom
        attached to.

    :type prop: str
    :param prop: An atom property name

    :param propval: The value to set prop to on the found attached atom. The
        type of this object depends on the type of property being set

    :rtype: int
    :return: The index of an atom attached to the mark_index atom. If more than
        one atom is attached to mark_index atom, the first one is returned.
    """

    mark_atom = struct.atom[mark_index]
    for neighbor in mark_atom.bonded_atoms:
        # Only one neighbor allowed for marker atoms
        break
    else:
        raise ValueError('Expected marker atom %d to be bound to another atom '
                         'but it is not')
    if prop:
        neighbor.property[prop] = propval
    return neighbor.index


[docs]def get_other_item(two_list, item):
    """
    In a two item list, return the item that isn't the one passed in

    :type two_list: list
    :param two_list: A list two items long

    :param item: One of the items from two_list

    :return: The item in two_list that is not the one passed in
    """

    return two_list[abs(two_list.index(item) - 1)]


[docs]def propagate_chain_data_from_atom(struct, chain_atom):
    """
    Set the chain data for all atoms in struct to the same data as found on atom

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to set atom properties on

    :type chain_atom: `schrodinger.structure._StructureAtom`
    :param chain_atom: The atom object to take the chain information from.
        Might not be from the same structure object as struct
    """

    cid = chain_atom.property.get(CHAIN_ID_PROP)
    if cid is not None:
        name = chain_atom.chain
        for atom in struct.atom:
            atom.chain = name
            atom.property[CHAIN_ID_PROP] = cid


[docs]def get_random_nonzero_to_one():
    """
    Get a random float: 0 > x >= 1

    :rtype: float
    :return: A floating point number in the range (0, 1]
    """

    ranval = random.random()
    # ranval is [0, 1), but we really want (0, 1] - the line below
    # accomplishes that
    return 1.0 - ranval


[docs]class TorsionAngler(object):
    """
    Class to handle cycling a structure though a series of torsion values
    """

[docs]    def __init__(self, options, minimum_steps=5, max_stepsize=15.):
        """
        Create a TorsionAngler object

        :type options: `argparse.Namespace`
        :param options: Object with dihedral_min and dihedral_max properties
            that describe the minimum and maximum allowed dihedral angles

        :type minimium_steps: int
        :param minimum_steps: The minimum steps to take.  The actual number of
            steps may be larger if the allowed range of torsions is greather than
            max_stepsize*minimum_steps.

        :type max_stepsize: float
        :param max_stepsize: The largest number of degrees to move the torsion
            per step.
        """

        self.minval = options.dihedral_min
        self.maxval = options.dihedral_max
        span = self.maxval - self.minval
        # We use n+1 in the line below because we don't want to get back to the
        # starting point by the nth step.
        self.delta_angle = min(old_div((span), float(minimum_steps + 1)),
                               max_stepsize)
        self.steps = int(round(old_div(span, self.delta_angle))) - 1

[docs]    def setData(self, struct, torsion_atoms):
        """
        Set the structure to operate on and the 4 atoms involved in the torsion

        :type struct: `schrodinger.structure.Structure`
        :param struct: The struct containing the torsion to rotate

        :type torsion_atoms: list
        :param torsion_atoms: A four int list, each int is the atom index of an
            atom in the torsion, in the order A-B-C-D, where B-C is the bond that
            rotates when the torsion changes.
        """

        self.struct = struct
        self.torsion_atoms = torsion_atoms
        self.torsion = self.getTorsionValueInRange()

[docs]    def getTorsionValueInRange(self):
        """
        Get a value for the torsion that falls within the specified range. The
        user may have supplied 150-210, but the measured value might be -170
        instead of 190. Convert the measured value to fall within the range.

        :rtype: float
        :return: The torsion value converted to fall within the range specified
            for the min/max of the torsion values.
        """

        raw_torsion = self.struct.measure(*self.torsion_atoms)
        if self.minval <= raw_torsion <= self.maxval:
            return raw_torsion

        sign = old_div(raw_torsion, abs(raw_torsion))
        full_circle = -sign * 360.
        new_torsion = full_circle + raw_torsion
        if self.minval <= new_torsion <= self.maxval:
            return new_torsion

        # This should never happen, but punt because it isn't the worst thing in
        # the world
        return raw_torsion

[docs]    def rotomers(self):
        """
        A generator for structures that step the torsion through the desired
        values.

        :rtype: `schrodinger.structure.Structure`
        :return: Each yield gives the structure with the specified torsion
            marched through the specified values.
        """

        for step in range(self.steps):
            self.incrementTorsionValue()
            self.struct.adjust(self.torsion, *self.torsion_atoms)
            yield self.struct

[docs]    def incrementTorsionValue(self):
        """
        Sets the torsion property to the next value to try for a dihedral as we
        rotate it through its allowed range.
        """

        self.torsion = self.torsion + self.delta_angle
        if self.torsion > self.maxval:
            self.torsion = self.minval + (self.torsion - self.maxval)


[docs]class MoietyError(Exception):
    """
    Raised if there is an error in creating a Moiety subclass object
    """


[docs]class InitiationError(Exception):
    """
    Raised if there is an error in initiating the amorphous cell.
    """


[docs]class BaseMoiety(object):
    """
    Base class for all polymer units - initiator, terminator, cascader, monomers
    """

    # This is a class variable so that all subclass instances have access to the
    # same cache
    bond_length_cache = {}
    RX_PROP = 'i_matsci_rx'

[docs]    def __init__(self, struct, markers=None, name=None):
        """
        Create a BaseMoiety object

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure of this Moiety

        :type markers: list
        :param markers: A list of the indexes of the Rx atoms. If not supplied,
            information will be read from the structure properties
        """

        if markers:
            self.markers = markers
            self.name = name
        else:
            self.readFromStructure(struct)
        self.raw_struct = struct
        if self.name:
            for atom in self.raw_struct.atom:
                atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP] = atom.index
                atom.property[msprops.MONOMER_NAME_PROP] = self.name
        self.head = None
        # self.rings is a list of sets, each set is the atom indexes for a ring
        self.rings = [set(x) for x in self.raw_struct.find_rings()]
        self.has_rings = bool(self.rings)
        self.is_cg = msutils.is_coarse_grain(struct)
        self.backbone_path_indexes = []

[docs]    def findAttachedAtomIndex(self, mark_index, prop=None, propval=True):
        """
        Find the index of the atom attached to the atom with the given index,
        optionally setting a property/value on that atom

        :type mark_index: int
        :param mark_index: The index of the atom that we want to find an atom
            attached to.

        :type prop: str
        :param prop: An atom property name

        :param propval: The value to set prop to on the found attached atom. The
            type of this object depends on the type of property being set

        :rtype: int
        :return: The index of an atom attached to the mark_index atom. If more
            than one atom is attached to mark_index atom, the first one is returned.
        """

        index = find_attached_atom_index(self.raw_struct,
                                         mark_index,
                                         prop=prop,
                                         propval=propval)
        return index

[docs]    def moveHeadToOrigin(self):
        """
        Move the structure so that the head atom is at the origin
        """

        translate = [-a for a in self.raw_struct.atom[self.head].xyz]
        transform.translate_structure(self.raw_struct, *translate)

[docs]    def alignToVector(self, struct, current_vector, vector):
        """
        Rotate the structure so that the current_vector is aligned to a new
        vector

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to rotate. The coordinates of this
            structure will be modified by this method

        :type current_vector: list
        :param current_vector: The current vector that will be aligned to the
            new vector

        :type vector: list
        :param vector: The new vector that current_vector should be aligned to

        :return: There is no return value, the structure is modified directly
        :rtype: `schrodinger.structure.Structure`

        :raises ValueError: If either vector has zero magnitude
        """

        current_array = numpy.array(current_vector)
        new_array = numpy.array(vector)
        try:
            matrix = transform.get_alignment_matrix(current_array, new_array)
        except ValueError:
            raise ValueError(
                'Unable to align fragments due to overlapped atoms.')
        transform.transform_structure(struct, matrix)

[docs]    def getStructureForCoupling(self,
                                orientation,
                                location=None,
                                vector=None,
                                remove_marker=True,
                                residue_number=None,
                                mark_bond=False):
        """
        Get the appropriate structure to couple to the existing unit

        :type orientation: str
        :param orientation: Should be the HEADFIRST or TAILFIRST constant,
            indicates which end of the moiety will bind

        :type location: list
        :param location: XYZ coordinates that the coupling atom (head or tail
            depending on orientation) should be moved to (moving the rest of this
            moiety with it)

        :type vector: list
        :param vector: The vector that the coupler-coupling marker vector should
            be aligned to

        :type remove_marker: bool
        :param remove_marker: Whether the marker atom should be removed.

        :type residue_number: int or None
        :param residue_number: If not None, the resnum property of every atom in
            the returned structure will be set to this value and the residue name
            will be assigned.

        :type mark_bond: bool
        :param mark_bond: If True, the created bond between two monomers is marked

        :rtype: (`schrodinger.structure.Structure`, `AttachementAtoms`)
        :return: A copy of the structure of this moiety rotated and translated
            as requested so that it can bind in the specified orientation. An
            AttachedAtoms tuple that gives the coupler, grower and marker indexes
            for this structure.
        """

        struct = self.getNextStructure(orientation)
        attachments = self.getCouplerAndGrower(orientation)
        # Mark the grower and coupler atoms for this entity
        struct.atom[attachments.coupler].property[COUPLER_ATOM_PROP] = True
        if attachments.grower:
            struct.atom[attachments.grower].property[GROWER_ATOM_PROP] = True
            if mark_bond:
                self.markHeadTailLabels(struct.atom[attachments.grower],
                                        struct.atom[attachments.coupler])

        if vector:
            current_vector = struct.atom[attachments.coupler_marker].xyz
            self.alignToVector(struct, current_vector, vector)
        if location:
            transform.translate_structure(struct, *location)
        if remove_marker:
            mapping = struct.deleteAtoms([attachments.coupler_marker],
                                         renumber_map=True)
            coupler = mapping[attachments.coupler]
            grower = mapping.get(attachments.grower)
            grow_marker = mapping.get(attachments.grow_marker)
            attachments = AttachmentAtoms(coupler, None, grower, grow_marker)
        # If requested, set the residue name and number
        if residue_number is not None:
            resname = self.getPDBName().ljust(4)
            for atom in struct.atom:
                atom.resnum = residue_number
                atom.pdbres = resname

        return struct, attachments

[docs]    def markHeadTailLabels(self, grower, coupler):
        """
        Mark the atom unique label based on element, monomer lettter, and
        'Head or Tail' orientation.

        :param grower: the atom connecting chain
        :type grower: 'structure.Structure.Atom'

        :param coupler: the atom waiting for other coming monomer
        :type coupler: 'structure.Structure.Atom'
        """

        letter = self.getPDBName()
        if len(letter) != 1:
            # def getLetter in MonomerRow (polymer_builder_gui.py) defines
            # a one letter code, and def getStructureForCoupling assigns the
            # letter to monomer atoms when building the polymer
            # INI, TRM or UNK (three letters )are assigned to initiator, terminators
            # or unknown monmers
            return

        monomer_index = str(ord(letter) - 64)
        # Only the bonds between known monomers are marked
        g_label = grower.element + monomer_index
        c_label = coupler.element + monomer_index

        if not self.directional or g_label != c_label:
            grower.property[UNIQUE_ATOM_PROP] = g_label
            coupler.property[UNIQUE_ATOM_PROP] = c_label
            return

        label = g_label
        for atom in [grower, coupler]:
            for prop, ht_char in zip(HT_ATOM_PROPS, ['H', 'T']):
                if atom.property.get(prop):
                    ht_lb = coarsegrain.CHIRAL_SEPARATOR.join([label, ht_char])
                    atom.property[UNIQUE_ATOM_PROP] = ht_lb
                    break

[docs]    def getBondLength(self, atom1, atom2, cg_bond_factor=0.8):
        """
        Get the desired bond length between the grower atom of the existing
        unit and the coupling atom of this moiety

        :type atom1: `_StructureAtom`
        :param atom1: One of the atoms to form a bond in between.

        :type atom2: `_StructureAtom`
        :param atom2: The other atoms to form a bond in between.

        :type cg_bond_factor: float
        :param cg_bond_factor: The pre-factor to calcuate the bond length based
            on the particle radius.

        :rtype: float
        :return: The desired bond distance
        """

        # Check for cached value
        key = tuple(sorted([atom1.element, atom2.element]))
        try:
            return self.bond_length_cache[key]
        except KeyError:
            pass

        if self.is_cg:
            # This 2.35 is from the 'Use Martini defaults' in sketcherGUIElements.cpp
            radius_sum = sum([atom.radius for atom in [atom1, atom2]])
            self.bond_length_cache[key] = radius_sum * cg_bond_factor
            return self.bond_length_cache[key]

        # Create a structure with both atoms and use mmideal to deliver (what is
        # very probably a very much NOT) ideal bond length (SHARED-3432)
        struct = structure.create_new_structure(0)
        for element in key:
            # Add as 'C' to avoid atom typing warning messages for strange
            # elements
            atom = struct.addAtom('C', 0.0, 0.0, 0.0)
            buildcomplex.transmute_atom(atom, element)
        struct.addBond(1, 2, 1)
        build.add_hydrogens(struct)
        struct.retype()
        # Note that this ideal bond order doesn't account for the hybridization
        # of the atoms, but this is a secondary effect that will easily be
        # cleaned up by simulation/QM calculation.
        self.bond_length_cache[key] = mm.mmideal_get_stretch(struct, [1, 2])
        return self.bond_length_cache[key]

[docs]    def determineCouplerLocation(self, xyz, vector, bond_length):
        """
        Figure out the XYZ coordinates where the coupler atom should go

        :type xyz: list
        :param xyz: The XYZ coordinates of the grower atom

        :type vector: list
        :param vector: The desired bond vector for coupling (goes from
            grower->coupler)

        :type bond_length: float
        :param bond_length: the bond length between grower and coupler

        :rtype: list
        :return: The XYZ coordinates where the coupler atom should be placed
        """

        normalv = transform.get_normalized_vector(numpy.array(vector))
        scaledv = [a * bond_length for a in normalv]
        location = [a + b for a, b in zip(xyz, scaledv)]
        return location

[docs]    def addToChain(self,
                   chain,
                   grower,
                   grow_marker,
                   orientation,
                   remove_marker=True,
                   set_residue_data=False,
                   propagate_chain_data=False,
                   chain_namer=None,
                   mark_bond=False):
        r"""
        Bind this moiety to an existing chain

        The grower, grow_marker, coupler and coupler_marker are defined so that
        the binding occurs like this:

            chain-grower                   coupler_marker
                        \                                \
                         grow_marker                      coupler-this_moiety

            goes to:

            chain-grower
                        \
                         coupler-this_moiety


        :type chain: `schrodinger.structure.Structure`
        :param chain: The existing chain to add this moiety to

        :type grower: int
        :param grower: The index of the atom in the existing chain that this
            moiety should bond with

        :type grow_marker: int
        :param grow_marker: The index of the Rx atom that marks the attachment
            point - the grower->grow_marker bond vector indicates the desired
            grower->coupler bond vector.

        :type orientation: str
        :param orientation: Whether this moiety should add head first or tail
            first. Should be a module HEADFIRST or TAILFIRST constant.

        :type remove_marker: bool
        :param remove_marker: Whether the chains grower_marker atom should be
            removed. Set to False if adding multiple moieties and you don't want
            existing atom numbers to change during the process.

        :type set_residue_data: bool
        :param set_residue_data: Whether to set the residue number property
            for all atoms added by this method to 1 greater than the residue number
            of the grow atom on the passed in chain. If False, no residue numbers
            will be set. A residue name is also assigned if this value is True.

        :type propagate_chain_data: bool
        :param propagate_chain_data: Whether to set the chain data
            for all atoms added by this method to the same chain information as
            the grow atom on the passed in chain. If False or the grower atom
            contains no chain information, this will not be done.
            This parameter is used if the added moiety should have the same chain
            information as the existing moiety. See also the chain_namer parameter.

        :type chain_namer: function
        :param chain_namer: A function that should be called with the structure
            that will be added to chain. The purpose of this function should be to
            add chain name data to the added structure. Supply this function if the
            moiety to be added is a full chain. If the moiety to be added is just
            another monomer, terminator, etc during the building of a single chain,
            do not supply this function. This parameter is used if the added moiety
            should have different chain information from the existing moiety, and it
            is up to the chain_namer function to set that data.
            See also the propagate_chain_data paramter.

        :type mark_bond: bool
        :param mark_bond: If True, the created bond between two monomers is marked

        :rtype: tuple(AttachmentAtoms, dict)
        :return: First item is the updated attachment atoms with coupler atom,
            the new grower atom and the new grow marker. Second item is
            dictionary with updated atom indices after removing markers where
            keys are atom numbers before deleting, and value for each is the new
            atom number, or None if that atom was deleted.

        """

        num_existing_atoms = chain.atom_total

        # Figure out the location to place the coupling atom for the proper bond
        # angle and length to the chain
        coupler_atom = self.getCouplerAtom(orientation)
        grower_atom = chain.atom[grower]
        grower_xyz = grower_atom.xyz
        grow_marker_xyz = chain.atom[grow_marker].xyz
        if set_residue_data:
            residue_number = grower_atom.resnum + 1
        else:
            residue_number = None
        # coupling_vector runs in the direction from my coupling atom to the
        # chain grower atom. Grow vector runs in the other direction
        coupling_vector = [a - b for a, b in zip(grower_xyz, grow_marker_xyz)]
        grow_vector = [-a for a in coupling_vector]
        bond_length = self.getBondLength(coupler_atom, grower_atom)
        location = self.determineCouplerLocation(grower_xyz, grow_vector,
                                                 bond_length)

        # Attach a copy of this monomer
        struct, attachments = self.getStructureForCoupling(
            orientation,
            location=location,
            vector=coupling_vector,
            residue_number=residue_number,
            mark_bond=mark_bond)
        # Set any requested chain data
        if chain_namer:
            chain_namer(struct)
        elif propagate_chain_data:
            propagate_chain_data_from_atom(struct, grower_atom)

        # Different residues should have different residue numbers
        residue_number = len(chain.residue)
        for increment, residue in enumerate(struct.residue, 1):
            residue.resnum = residue_number + increment

        chain.extend(struct)

        # Create the bond
        coupler = attachments.coupler + num_existing_atoms
        chain.addBond(grower, coupler, 1)
        if mark_bond:
            self.markBond(chain, grower, coupler)

        if attachments.grower:
            new_grower = attachments.grower + num_existing_atoms
            new_grow_marker = attachments.grow_marker + num_existing_atoms
        else:
            # This is an end group, no backbone/grower atom
            new_grower = None
            new_grow_marker = None

        # We no longer need the chain's marker atom - delete it and get the new
        # atom numbers
        mapping = {}
        if remove_marker:
            mapping = chain.deleteAtoms([grow_marker], renumber_map=True)
            coupler = mapping[coupler]
            if new_grower:
                new_grower = mapping[new_grower]
                new_grow_marker = mapping[new_grow_marker]

        new_attachment_atoms = AttachmentAtoms(coupler, None, new_grower,
                                               new_grow_marker)

        return new_attachment_atoms, mapping

[docs]    def markBond(self, chain, grower, coupler):
        """
        Mark the directional bond from grower to coupler.

        :param chain: the structure whose bond is to be marked
        :type chain: 'structure.Structure'

        :param grower: the core atom connecting to the just added monomer
        :type grower: int

        :param coupler: the just added monomer's atom connecting to the core
        :type coupler: int
        """

        bond = chain.getBond(grower, coupler)
        for prop, index in {
                coarsegrain.CG_UNIQUE_BOND_PARTICLE_1_LABEL_KEY: grower,
                coarsegrain.CG_UNIQUE_BOND_PARTICLE_2_LABEL_KEY: coupler
        }.items():
            label = chain.atom[index].property.pop(UNIQUE_ATOM_PROP, None)
            if label:
                bond.property[prop] = label

[docs]    def findBackbone2BranchingPoint(self, exclude_markers):
        """
        Find the branching point and the shortest path to the nearest atom
        in backbone_path_indexes. The braching point may have multiple branches
        connected to, but only one single pair of [backbone atom][branching atom]
        = [path with ends] is recorded.

        :type exclude_markers: list of int
        :param exclude_markers: the atom ids of markers to be excluded
        """

        branching_markers = list(set(self.markers) - set(exclude_markers))
        self.backbone_to_marked_end = {}
        for atom_index in self.backbone_path_indexes:
            self.backbone_to_marked_end[atom_index] = {}
            for marker in branching_markers[:]:
                bonded_atom = next(self.raw_struct.atom[marker].bonded_atoms)
                bonded_to_marker = bonded_atom.index
                path_with_ends = None
                if bonded_to_marker in self.backbone_side_atoms[atom_index]:
                    # the branching point is in the side group
                    path_with_ends = analyze.find_shortest_bond_path(
                        self.raw_struct, atom_index, bonded_to_marker)
                elif bonded_to_marker == atom_index:
                    # branching from backbone
                    path_with_ends = [bonded_to_marker]
                if path_with_ends:
                    branching_markers.remove(marker)
                    self.backbone_to_marked_end[atom_index][
                        bonded_to_marker] = path_with_ends

[docs]    @classmethod
    def write(cls, filename, struct, markers, name, classname=None):
        """
        Write the structure out to a file, tagging it with properties in such a
        way that the driver will recognize when reading the structure in

        :type filename: str
        :param filename: The path to the file

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to write

        :type markers: list
        :param markers: A list of the indexes of the Rx atoms.

        :type name: str
        :param name: The name to use for the title of the structure

        :type classname: str
        :param classname: The chemical class of this moiety. If not supplied,
            the actual python class name will be used since those are typically
            the same as the chemical class.
        """

        if not classname:
            classname = cls.__name__
        struct.property[MOIETY_PROP] = classname
        struct.property[MARKER_PROP] = ','.join([str(x) for x in markers])
        struct.title = name
        struct.append(filename)

[docs]    def checkRequiredProperties(self, struct, propexps):
        """
        Check to see if the structure has the desired properties set on it

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to check

        :type propexps: list of tuple
        :param propexps: (property string, explanation of what the property is)
            for each structure property to check
        """

        for prop, explanation in propexps:
            if prop not in struct.property:
                raise MoietyError('Structure %s is missing %s which specifies '
                                  '%s.' % (struct.title, prop, explanation))

[docs]    def readFromStructure(self, struct, propexps=None):
        """
        Read the data for this moiety from the structure object and check to
        make sure all the required properties are set

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to check

        :type propexps: list of tuple
        :param propexps: (property string, explanation of what the property is)
            for each structure property to check
        """

        if not propexps:
            propexps = []
        # Check that all required properties are available
        propexps.append(
            (MARKER_PROP, 'comma-separated list of Rx atom indexes'))
        self.checkRequiredProperties(struct, propexps)

        # Convert the structure properties to instance data
        try:
            markers = [int(x) for x in struct.property[MARKER_PROP].split(',')]
        except ValueError:
            raise MoietyError('The Rx atom indexes in %s must be a comma-'
                              'separated list of integers. Got "%s" instead.' %
                              (MARKER_PROP, struct.property[MARKER_PROP]))
        self.markers = markers
        for index, atom_id in enumerate(self.markers, start=1):
            struct.atom[atom_id].property[self.RX_PROP] = index
        self.name = struct.title

[docs]    def findSideGroup(self):
        """
        Find and mark the side group atoms.  The side group is any heavy atom
        not part of the backbone and any hydrogen attached to one of those
        heavy atoms.  Note that IUPAC specifically states that a "sidechain" is
        an oligomer or polymer, while a "side group" is a non-oligomer/polymer.
        So monomers have side groups, not sidechains.
        """

        # Find the indexes of all backbone atoms, including those rings fused to
        # the backbone
        backbone_atoms = {
            x for x in self.raw_struct.atom
            if x.property.get(msprops.BACKBONE_ATOM_PROP)
        }
        marked_indexes = {x.index for x in backbone_atoms}
        # In a four-atom dihedral angle list, bond rotation (2nd-3rd) changes
        # the xyz of side atoms attached to the 3rd atom and the xyz of the
        # 4th atom.
        self.backbone_side_atoms = {}
        num_groups = 0
        adjoining_prop = msprops.BACKBONE_ADJOINING_ATOM_PROP
        all_side_atom_ids = set()

        for atom in backbone_atoms:
            self.backbone_side_atoms[atom.index] = []
            for neighbor in atom.bonded_atoms:
                if not neighbor.property.get(msprops.BACKBONE_ATOM_PROP):
                    # This is a non-backbone atom bound to the backbone
                    if neighbor.index in self.markers:
                        # This is just a temporary marker atom, ignore it
                        marked_indexes.add(neighbor.index)
                        continue
                    if neighbor.element == 'H':
                        # Hydrogens are just adjoining atoms and not side groups
                        neighbor.property[adjoining_prop] = True
                        marked_indexes.add(neighbor.index)
                        self.backbone_side_atoms[atom.index] += [neighbor.index]
                    else:
                        # This may be a single atom or multi atom group
                        try:
                            group = self.raw_struct.getMovingAtoms(
                                atom, neighbor)
                        except ValueError:
                            # This is a sidegroup atom in the same ring as a
                            # backbone atom. This can happen if rings share at
                            # least one atom with a ring that is part of the
                            # backbone.
                            group = self.findRingSideGroup(
                                self.raw_struct, marked_indexes, neighbor.index)
                        if len(group) == 1:
                            if neighbor.property.get(
                                    msprops.SIDE_GROUP_ATOM_PROP):
                                # This is an atom in a side ring.
                                if neighbor.index not in all_side_atom_ids:
                                    # This atom has not been added to side atoms
                                    self.backbone_side_atoms[atom.index] += [
                                        neighbor.index
                                    ]
                                    # Register this atom and prevent double
                                    # counting from the other end.
                                    all_side_atom_ids.add(neighbor.index)
                                continue
                            else:
                                # This is a non-backbone atom bound to the
                                # backbone and not yet part of any side group
                                neighbor.property[adjoining_prop] = True
                        # Mark all atoms in this side group with the group name
                        num_groups += 1
                        gst = self.raw_struct.extract(group)
                        formula = analyze.generate_molecular_formula(gst)
                        group_name = '%s_%d' % (formula, num_groups)
                        for gindex in group:
                            gatom = self.raw_struct.atom[gindex]
                            gatom.property[
                                msprops.SIDE_GROUP_ATOM_PROP] = group_name
                            marked_indexes.add(gindex)
                            if gindex not in self.markers:
                                self.backbone_side_atoms[atom.index] += [gindex]
                    all_side_atom_ids.update(
                        self.backbone_side_atoms[atom.index])

[docs]    def finalizeSideGroup(self):
        """
        Find atoms in backbone (marked as BACKBONE_ATOM_PROP) but not in the
        shortest path (backbone_path_indexes) and then treat these atoms along
        with their side groups as the side groups of certain atom in shortest
        path (backbone_path_indexes)
        """

        backbone_path_indexes = set(self.backbone_path_indexes)
        atom_ids_to_move = set()
        for atom_id_to_move in self.backbone_side_atoms:
            if atom_id_to_move not in backbone_path_indexes:
                atom_ids_to_move.add(atom_id_to_move)

        if not atom_ids_to_move:
            # all atoms marked as BACKBONE_ATOM_PROP are in
            # backbone_path_indexes
            return

        for backbone_atom_id in self.backbone_path_indexes:
            # search along backbone_path_indexes to find neighbor atoms marked
            # as BACKBONE_ATOM_PROP but not in backbone_path_indexes
            next_indexes = [backbone_atom_id]
            while next_indexes:
                current_index = next_indexes.pop(0)
                for next_atom in self.raw_struct.atom[
                        current_index].bonded_atoms:
                    next_index = next_atom.index
                    if next_index in atom_ids_to_move:
                        # Find a neighbor atom marked as BACKBONE_ATOM_PROP,
                        # but not in backbone_path_indexes.
                        self.backbone_side_atoms[
                            backbone_atom_id] += self.backbone_side_atoms[
                                next_index]
                        self.backbone_side_atoms[backbone_atom_id].append(
                            next_index)
                        self.backbone_side_atoms.pop(next_index)
                        atom_ids_to_move.remove(next_index)
                        next_indexes.append(next_index)

[docs]    def markBackboneAtom(self, index):
        """
        Mark the atom as a backbone atom and make sure that if the atom is in a
        ring that all atoms in the ring are marked as backbone

        :type index: int
        :param index: The index of the atom to mark
        """

        self.raw_struct.atom[index].property[msprops.BACKBONE_ATOM_PROP] = True
        for ring_indexes in self.rings:
            if index in ring_indexes:
                for rindex in ring_indexes:
                    ratom = self.raw_struct.atom[rindex]
                    ratom.property[msprops.BACKBONE_ATOM_PROP] = True

[docs]    def setAsFragment(self):
        """
        Set backbone path and side atoms for Initiator and Terminator.
        Only the atom connected to R1 group is treated as backbone path;
        all the rest real atoms are treated as side groups.
        """

        # Use the atom connected to the first R1 marker as the head atom
        head = next(self.raw_struct.atom[self.markers[0]].bonded_atoms)
        self.backbone_path_indexes = [head.index]
        self.backbone_side_atoms = {head.index: []}
        excluded_indexes = set([head.index] + self.markers)
        for atom in self.raw_struct.atom:
            if atom.index in excluded_indexes:
                continue
            self.backbone_side_atoms[head.index].append(atom.index)

[docs]    def findRotatableBonds(self):
        """
        Find rotatable bond along the backbone path.
        """

        self.rotatable_bonds = []
        backbone = self.backbone_path_indexes
        atom_idx0 = backbone[0]
        for atom_idx1 in backbone[1:]:
            if analyze.is_bond_rotatable(self.raw_struct.getBond(
                    atom_idx0, atom_idx1),
                                         allow_methyl=True):
                self.rotatable_bonds.append({atom_idx0, atom_idx1})
            atom_idx0 = atom_idx1


[docs]class Terminator(BaseMoiety):
    """
    A unit that terminates a chain

    Note that the concept of an "Terminator" can be a bit fungible. It can
    either be the actual terminator moiety that ends a polymer chain, or it may
    be an already built chain that will be tacked on to some initiation point.
    """

[docs]    def __init__(self, struct, markers=None, name=None):
        """
        Create a Terminator object

        Terminators drawn by the user as:
            R1-stuff
        And stored as:
            marker-head-stuff

        where marker is the R1 atom, head is the atom it is bound to, and stuff
        is everything else.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure of this Moiety

        :type markers: list
        :param markers: A list of the indexes of the Rx atoms

        :type name: str
        :param name: The name of this moiety - gets added to each atom as an
            atom property
        """

        BaseMoiety.__init__(self, struct, markers=markers, name=name)
        self.findHead()
        self.moveHeadToOrigin()

[docs]    def findHead(self):
        """
        Find and store the index of the head atom
        """

        self.head = self.findAttachedAtomIndex(self.markers[0], HEAD_ATOM_PROP)

[docs]    def getNextStructure(self, *args):
        """
        Get a copy of this structure to add to a chain
        """

        return self.raw_struct.copy()

[docs]    def getCouplerAtom(self, *args):
        """
        Get the coupler (head) atom
        """

        return self.raw_struct.atom[self.head]

[docs]    def getCouplerAndGrower(self, *args):
        """
        Get the coupler and grower atoms and markers. The latter is None because
        Terminators have no grow points

        :rtype: `AttachmentAtoms`
        :return: The atoms indexes of the coupler, grower and marker atoms
        """

        coupler = self.head
        coupler_marker = self.markers[0]
        return AttachmentAtoms(coupler, coupler_marker, None, None)

[docs]    def getPDBName(self):
        """
        Get a PDB residue-like name for this moiety
        """

        # This overrides the base class return value
        return TRM


[docs]class Cascader(Terminator):
    """
    A group that ends a chain but starts multiple new chains

    Cascaders are drawn as::

            R2
            |
        R1-stuff-R2

    and stored as::

                  cascader
                      |
        marker-head-stuff-cascader

    where cascader is an atom property added to the R2 marker atoms
    """

[docs]    def __init__(self, *args, **kwargs):
        Terminator.__init__(self, *args, **kwargs)
        self.findBackbone()

[docs]    def findBackbone(self):
        """
        Find the set of atoms that creates the shortest distance (number of
        bonds) between the head and each cascade points. These are considered
        backbone atoms.

        Backbone atoms are marked with a backbone atom property.
        """

        all_backbone = []
        for atom in self.raw_struct.atom:
            if atom.property.get(CASCADER_ATOM_PROP):
                path = analyze.find_shortest_bond_path(self.raw_struct,
                                                       self.head, atom.index)
                all_backbone.extend(path)
                if len(path) > len(self.backbone_path_indexes):
                    self.backbone_path_indexes = path
        # Set the backbone property
        for index in all_backbone:
            self.markBackboneAtom(index)

[docs]    def findHead(self):
        """
        Find the head atom, but also mark cascader atoms that will start new
        points
        """

        Terminator.findHead(self)
        for index in self.markers[1:]:
            self.raw_struct.atom[index].property[
                CASCADER_MARKER_ATOM_PROP] = True
            next(self.raw_struct.atom[index].bonded_atoms
                ).property[CASCADER_ATOM_PROP] = True

[docs]    def readFromStructure(self, struct):
        """
        Overrides the parent method to add and read Cascader-specific
        properties.

        See the parent class method for documentation
        """

        # Check all required properties are available
        propexps = [(CASCADE_GEN_PROP, 'the number of cascade generations')]
        BaseMoiety.readFromStructure(self, struct, propexps=propexps)

        # Convert the structure properties to instance data
        self.generations = struct.property[CASCADE_GEN_PROP]

[docs]    @classmethod
    def write(cls, generations, filename, struct, *args):
        """
        Over-ride the parent class method to add generations as a property to
        the structure.

        :type generations: int
        :param generations: The number of cascade generations allowed

        See the parent class method for documentation
        """

        struct.property[CASCADE_GEN_PROP] = generations
        Terminator.write(filename, struct, *args, classname='Cascader')

[docs]    def getPDBName(self):
        """
        Get a PDB residue-like Name for this moiety
        """

        return CAS

[docs]    def setAsFragment(self):
        """
        Prepare Cascader information for in-cell grow.
        """

        self.findSideGroup()
        self.finalizeSideGroup()
        self.findBackbone2BranchingPoint([self.markers[0]])
        self.findRotatableBonds()


[docs]class Initiator(BaseMoiety):
    """
    A group that starts the whole polymer off - can have multiple grow points,
    which means that multiple chains will radiate from this group (a dendrimer).

    Note that the concept of an "Initiator" can be a bit fungible. It can either
    be the actual intiator moiety that starts the whole polymer, or it may be a
    partially built polymer that still needs additional chains added to it.
    """

[docs]    def __init__(self, *args, **kwargs):
        """
        Overrides the parent init method to allow the parial_polymer keyword
        parameter.

        :type partial_polymer: bool
        :keyword partial_polymer: If True, this Initiator is actually a
            partially built polymer that is going to just act like an initiator.
        """

        self.partial_polymer = kwargs.pop('partial_polymer', False)
        BaseMoiety.__init__(self, *args, **kwargs)
        for grow_marker in self.markers:
            self.findAttachedAtomIndex(grow_marker, HEAD_ATOM_PROP)
        if not self.partial_polymer:
            for atom in self.raw_struct.atom:
                atom.resnum = 1
                atom.pdbres = self.getPDBName()

[docs]    def completePolymer(self, chain_terminator, chain_namer, clash_fixer):
        """
        Tack on an existing chain to each grow point

        :type chain_terminator: `Terminator`
        :param chain_terminator: A Terminator object that stores the existing
            chain to add

        :type chain_namer: function
        :param chain_namer: A function that should be called on each chain
            that will be added to the existing polymer. The purpose of this
            function should be to add chain name data to the added structure.

        :type clash_fixer: function
        :param clash_fixer: A function to call on the polymer when any new chain
            is added in order to fix any clashes caused by the new chain
        """

        polymer = self.raw_struct.copy()
        if not self.partial_polymer:
            # This is a true initiator so we need to define chain information
            chain_namer(polymer)
            propagate_chain_data = len(self.markers) == 1
            if propagate_chain_data:
                # This has a single initiation point so the chain attached to
                # it will share the same chain ID
                chain_namer = False
        else:
            # This is a partial polymer, so chain data is already set on it
            propagate_chain_data = False
        for grow_marker in self.markers:
            grower = self.findAttachedAtomIndex(grow_marker, HEAD_ATOM_PROP)
            polymer.atom[grower].property[GROWER_ATOM_PROP] = True
            chain_terminator.addToChain(
                polymer,
                grower,
                grow_marker,
                orientation=HEADFIRST,
                remove_marker=False,
                propagate_chain_data=propagate_chain_data,
                chain_namer=chain_namer)
            if not propagate_chain_data:
                # We've added a whole new chain to an existing chain, so need to
                # fix clashes
                polymer = clash_fixer(polymer)

        # We wait until all chains are added to remove any marker atoms because
        # we don't want atom indexes to change during this process
        polymer.deleteAtoms(self.markers)
        return polymer

[docs]    def fillBranchPoints(self, chain_terminator, chain_branch_points,
                         chain_namer, clash_fixer):
        """
        Tack on an existing chain to each of our branch points - each point has
        a percent chance of getting a chain added to it as long as its
        generation is less than the generation limit

        :type chain_terminator: `Terminator`
        :param chain_terminator: A Terminator object that stores the existing
            chain to add

        :type chain_branch_points: list
        :param chain_branch_points: A list of atom indexes in the
            chain_terminator structure that are branch points

        :type chain_namer: function
        :param chain_namer: A function that should be called on each chain
            that will be added to the existing polymer. The purpose of this
            function should be to add chain name data to the added structure.

        :type clash_fixer: function
        :param clash_fixer: A function to call on the polymer when any new chain
            is added in order to fix any clashes caused by the new chain

        :rtype: (`schrodinger.structure.Structure`, int)
        :return: The new polymer structure with additional branching, plus the
            number of new branches that were added
        """

        polymer = self.raw_struct.copy()
        markers_to_delete = []
        for grow_marker in self.markers:
            grower = self.findAttachedAtomIndex(grow_marker)
            markatom = polymer.atom[grow_marker]
            generation = markatom.property[BRANCH_GEN_ATOM_PROP]
            if generation == markatom.property[BRANCH_MAXGEN_ATOM_PROP]:
                # We've reached the maximum number of branch generations
                markatom.property[BRANCH_PCT_ATOM_PROP] = 0
                continue
            chance = get_random_nonzero_to_one() * 100.
            if chance > markatom.property[BRANCH_PCT_ATOM_PROP]:
                # Failed the chance to branch at this atom
                markatom.property[BRANCH_PCT_ATOM_PROP] = 0
                continue
            # All tests have passed, we'll branch at this point
            polymer.atom[grower].property[BRANCH_ATOM_PROP] = True
            num_existing_atoms = polymer.atom_total
            chain_terminator.addToChain(polymer,
                                        grower,
                                        grow_marker,
                                        orientation=HEADFIRST,
                                        remove_marker=False,
                                        chain_namer=chain_namer)
            polymer = clash_fixer(polymer)
            # Mark the correct generation on all the newly-added branching atoms
            for index in chain_branch_points:
                batom = polymer.atom[num_existing_atoms + index - 1]
                batom.property[BRANCH_GEN_ATOM_PROP] = generation + 1
            markers_to_delete.append(grow_marker)
        # Wait to delete all marker atoms until the end so that atom indexes do
        # not change
        polymer.deleteAtoms(markers_to_delete)
        return polymer, len(markers_to_delete)

[docs]    def getPDBName(self):
        """
        Get a PDB residue-like Name for this moiety
        """

        return INI


[docs]class Monomer(BaseMoiety):
    """
    The repeating unit that makes up the polymer chain
    """

[docs]    def __init__(self, struct):
        """
        Create a Monomer object

        Monomers drawn by the user as:
            R1-stuff-R2
        And stored as:
            marker1-head-stuff-tail-marker2

        where marker1 is the R1 atom, head is the atom it is bound to, marker2
        is the R2 atom and tail is the atom it is bound to, and stuff
        is everything else.
        """

        BaseMoiety.__init__(self, struct)
        # 4 structures are stored for each instance based on orientation and
        # chirality - HEADFIRST/R, HEADFIRST/S, TAILFIRST/R, TAILFIRST/S
        # precalculating these saves time when building the polymer. To access
        # the HEADFIRST, R structure, use
        # self.structures[HEADFIRST][msprops.CHIRAL_R]
        self.structures = {HEADFIRST: {}, TAILFIRST: {}}
        self.tail = None
        self.tactical_index = None
        # backbone_path_indexes may differ from the set of atoms marked with the
        # backbone atom property. The former contains only the atoms that form
        # the shortest path while the latter will additionally contain any
        # atoms that are in the same ring as an atom in backbone_path_indexes
        self.default_chirality = None
        self.previous_chirality = None
        self.directional = False
        self.findHeadTailBranch()
        self.findBackbone()
        self.findSideGroup()
        if self.all_trans:
            self.setAllTransBackbone()
        self.alignOnXAxis()
        self.setupTacticity()
        self.setDirecional()
        self.flipTailFirst()
        self.rings = self.raw_struct.find_rings(sort=True)
        self.is_monosaccharide = self.getMonosaccharideType() is not None

[docs]    def setAllTransBackbone(self):
        """
        Set all the backbone dihedrals to 180
        """

        atomlist = [self.markers[0]]
        atomlist += self.backbone_path_indexes
        atomlist += [self.markers[1]]
        for index in range(len(atomlist) - 3):
            indexes = atomlist[index:index + 4]
            try:
                self.raw_struct.adjust(180., *indexes)
            except structure.AtomsInRingError:
                # Center two atoms are in a ring; dihedral can't be adjusted.
                pass

[docs]    def hasChirality(self):
        """
        Check if this monomer has a chiral backbone

        :rtype: bool
        :return: Whether the backbone has a chiral atom
        """

        return bool(self.tactical_index)

[docs]    def findHeadTailBranch(self):
        """
        Find and mark the head, tail and any branching atoms
        """

        self.head = self.findAttachedAtomIndex(self.markers[0], HEAD_ATOM_PROP)
        self.tail = self.findAttachedAtomIndex(self.markers[1], TAIL_ATOM_PROP)
        for marker in self.markers[2:]:
            # This marks the atom attached to marker as a branch atom
            self.findAttachedAtomIndex(marker, BRANCH_ATOM_PROP,
                                       self.branching_percent)
            batom = self.raw_struct.atom[marker]
            batom.property[BRANCH_PCT_ATOM_PROP] = self.branching_percent
            batom.property[BRANCH_GEN_ATOM_PROP] = 1
            batom.property[BRANCH_MAXGEN_ATOM_PROP] = self.branching_max_gen

[docs]    def findBackbone(self):
        """
        Find the set of atoms that creates the shortest distance (number of
        bonds) between the head and tail atoms. This is the monomer backbone.

        Backbone atoms are marked with a backbone atom property
        """

        self.backbone_path_indexes = analyze.find_shortest_bond_path(
            self.raw_struct, self.head, self.tail)
        # Set the backbone property
        for index in self.backbone_path_indexes:
            self.markBackboneAtom(index)

[docs]    def findRingSideGroup(self, struct, terminator_indexes, root_index):
        r"""
        Return a list of atoms bound to root_index (and recursively all atoms
        bound to those atoms, ad infinitum).  terminator_indexes is a set of
        atoms that terminate the group - when a terminator atom is encountered,
        the group stops growing in that direction.  Unlike
        Structure.getMovingAtoms or buildcomplex.find_atoms_to_remove, this
        method gracefully handles cases where terminator atoms and group atoms
        share the same ring.  Imagine naphthalene::

            3   7
           / \ / \
          4   2   8
          |   |   |
          5   1   9
           \ / \ /
            6   10

        If root_index=6 and terminator_indexes={1}, all other atoms will become
        part of the same group as 6 - because there is a path all the way around
        the outer ring that bypasses 1 going clockwise from 6.

        If root_index=6 and terminator_indexes={1,2}, then only atoms 6, 5, 4, 3
        will be members of the group, because both atoms 1 and 2 terminate the
        group.

        root_index is always part of the group

        :type struct: schrodinger.structure.Structure
        :param struct: The structure to use

        :type terminator_indexes: set of int
        :param terminator_indexes: The indexes of the atoms that terminate the
            group.

        :type root_index: int
        :param root_index: The index of the first atom in the group. All
            neighbors of this atom that are not terminator atoms will be added
            to the list.

        :rtype: list
        :return: A list of all atoms recursively bound to the root atom,
            including root_index itself but not including any atom in
            terminator_indexes
        """

        indexes_not_yet_checked = {root_index}
        group_indexes = set()
        while indexes_not_yet_checked:
            check_index = indexes_not_yet_checked.pop()
            group_indexes.add(check_index)
            for atom in struct.atom[check_index].bonded_atoms:
                adex = atom.index
                if adex in terminator_indexes:
                    # Stop going in this direction if this is a terminator atom
                    pass
                elif adex not in group_indexes and \
                    adex not in indexes_not_yet_checked:
                    # We haven't encountered this atom before
                    indexes_not_yet_checked.add(adex)
        return list(group_indexes)

[docs]    def alignOnXAxis(self):
        """
        Align the monomer so the head is at the origin and the backbone is
        aligned to the +X axis
        """

        self.moveHeadToOrigin()

        if len(self.backbone_path_indexes) < 2:
            # Nothing to align - single atom backbone
            return

        # Now rotate the backbone vector to align with the +X axis.  The
        # backbone vector points from the head (origin) to the tail
        backbone_vector = self.raw_struct.atom[self.tail].xyz
        self.alignToVector(self.raw_struct, backbone_vector, transform.X_AXIS)

[docs]    def flipTailFirst(self):
        """
        Flip the structure so the tail is on the origin and the backbone is
        aligned to the +X axis
        """

        # matrix for rotation 180 degrees about the Y-axis
        matrix = transform.get_rotation_matrix(transform.Y_AXIS, numpy.pi)

        for chirality, struct in self.structures[HEADFIRST].items():
            scopy = struct.copy()
            # Rotate around the Y-axis
            transform.transform_structure(scopy, matrix)
            # Now move the tail to the origin
            translate = [-coord for coord in scopy.atom[self.tail].xyz]
            transform.translate_structure(scopy, *translate)
            self.structures[TAILFIRST][chirality] = scopy

[docs]    def setDirecional(self):
        """
        Set whether the monomer has directional properties.
        """

        if msutils.is_coarse_grain(self.raw_struct):
            return

        head_capper_elements = set([
            x.element
            for x in self.raw_struct.atom[self.head].bonded_atoms
            if len(x.bond) == 1 and x.index != self.markers[0]
        ])
        tail_capper_elements = set([
            x.element
            for x in self.raw_struct.atom[self.tail].bonded_atoms
            if len(x.bond) == 1 and x.index != self.markers[1]
        ])

        elements = set(['H', 'F', 'Cl', 'Br', 'I', 'At'])
        hc_elements = elements.difference(head_capper_elements)
        tc_elements = elements.difference(tail_capper_elements)
        if len(hc_elements) == 1:
            tc_elements = tc_elements.difference(hc_elements)
        elif len(tc_elements) == 1:
            hc_elements = hc_elements.difference(tc_elements)

        st_orig = self.raw_struct.copy()
        for ele in hc_elements:
            st_orig.atom[self.markers[0]].element = ele
            try:
                tc_elements.remove(ele)
            except KeyError:
                pass
            break
        for ele in tc_elements:
            st_orig.atom[self.markers[1]].element = ele
            break

        st_flipped = self.raw_struct.copy()
        st_flipped.atom[self.markers[1]].element = st_orig.atom[
            self.markers[0]].element
        st_flipped.atom[self.markers[0]].element = st_orig.atom[
            self.markers[1]].element
        self.directional = not st_orig.isEquivalent(st_flipped)

[docs]    def setupTacticity(self):
        """
        Store both R and S structures for the monomer so we don't have to keep
        inverting the chirality when needed.  This takes a miniscule amount of
        effort, since it is only done once per monomer type
        """

        # Find a chiral atom in the backbone
        chiral_indexes = analyze.get_chiral_atoms(self.raw_struct)
        backset = set(self.backbone_path_indexes)
        self.tactical_index = None
        bb_chirality = None
        for index, chirality in chiral_indexes.items():
            if chirality in R_S and index in backset:
                if self.tactical_index:
                    # We only allow tacticity for monomers with 1 chiral
                    # backbone atom
                    self.tactical_index = None
                    bb_chirality = None
                    break
                self.tactical_index = index
                bb_chirality = chirality
        self.default_chirality = bb_chirality

        # Label the atoms by bb chirality
        scopy = self.raw_struct.copy()
        prop = PROP_BY_CHIRALITY[bb_chirality]
        for atom in self.raw_struct.atom:
            atom.property[prop] = True

        self.structures[HEADFIRST][bb_chirality] = self.raw_struct
        if self.tactical_index:
            # Store a structure of the opposite chirality
            tactical_atom = self.raw_struct.atom[self.tactical_index]
            tactical_atom.property[CHIRAL_BB_ATOM_PROP] = True

            # Create and store a structure with the opposite chirality
            fixed_atoms = []
            # Find two backbone neighbors to the chiral atom
            candidates = backset
            candidates.update(self.markers[:2])
            tactical_atom = scopy.atom[self.tactical_index]
            tactical_atom.property[CHIRAL_BB_ATOM_PROP] = True
            for neighbor in tactical_atom.bonded_atoms:
                if neighbor.index in candidates:
                    fixed_atoms.append(neighbor.index)
                    if len(fixed_atoms) == 2:
                        break

            # Convert to the other chirality and store the structure
            mm.mmstereo_atom_invert_chirality(scopy, self.tactical_index,
                                              *fixed_atoms)
            other_chirality = get_other_item(R_S, bb_chirality)
            # Label the atoms by bb chirality
            other_prop = PROP_BY_CHIRALITY[other_chirality]
            for atom in scopy.atom:
                atom.property[other_prop] = True
            self.structures[HEADFIRST][other_chirality] = scopy

[docs]    def getNextStructure(self, orientation):
        """
        Get the next monomer structure with the given orientation. "Next" in
        this case takes into account the chirality of the previous structure -
        for syntactic polymers, the monomers alternate chirality.

        :type orientation: str
        :param orientation: HEADFIRST or TAILFIRST constants

        :rtype: `schrodinger.structure.Structure`
        :return: The next structure to use for this monomer
        """

        if self.default_chirality is None:
            chirality = None
        elif self.tacticity == TACTICITY_ISO:
            chirality = self.default_chirality
        elif self.tacticity == TACTICITY_ATAC:
            chirality = random.choice(R_S)
        else:
            if not self.previous_chirality:
                chirality = self.default_chirality
            else:
                chirality = get_other_item(R_S, self.previous_chirality)
        self.previous_chirality = chirality
        return self.structures[orientation][chirality].copy()

[docs]    def getCouplerAndGrower(self, orientation):
        """
        Get the coupler, coupler_marker, grower and grow_marker atoms for this
        monomer

        :type orientation: str
        :param orientation: HEADFIRST or TAILFIRST constants

        :rtype: `AttachmentAtoms`
        :return: The atoms indexes of the coupler, grower and marker atoms.
            Which one is the head and which is the tail depends on orientation.
        """

        headtail = [self.head, self.tail]
        if orientation == HEADFIRST:
            index = 0
        else:
            index = 1
        coupler = headtail[index]
        coupler_marker = self.markers[index]
        grower = get_other_item(headtail, coupler)
        grow_marker = get_other_item(self.markers, coupler_marker)
        return AttachmentAtoms(coupler, coupler_marker, grower, grow_marker)

[docs]    def getCouplerAtom(self, orientation):
        """
        Get the coupler atom.

        :type orientation: str
        :param orientation: HEADFIRST or TAILFIRST constants

        :rtype: `_StructureAtom`
        :return: The coupler atom
            Whether this is the head and or the tail depends on orientation.
        """

        if orientation == HEADFIRST:
            return self.raw_struct.atom[self.head]
        else:
            return self.raw_struct.atom[self.tail]

[docs]    def isBrancher(self):
        """
        Is this monomer a braching monomer?

        :rtype: bool
        :return: Whether this monomer is a branching monomer
        """

        return len(self.markers) == 3

[docs]    def isLadder(self):
        """
        Is this monomer a braching monomer?

        :rtype: bool
        :return: Whether this monomer is a branching monomer
        """

        return len(self.markers) == 4

[docs]    @staticmethod
    def addPropsToStructure(struct, letter, markers, name, tacticity,
                            branching_percent, branching_max_gen, all_trans):
        """
        Add properties to the structure that the driver will need as input

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to add the properties to

        :type letter: str
        :param letter: The one-character code for this monomer

        :type markers: list of int
        :param markers: Atom indexes of the marker atoms

        :type name: str
        :param name: The title to use

        :type tacticity: str
        :param tacticity: One of the TACTICITY module constants specifying the
            tacticity for this monomer

        :type branching_percent: float
        :param branching_percent: The % chance to branch for this monomer

        :type branching_max_gen: int
        :param branching_max_gen: The maximum branching generations for this
            monomer

        :type all_trans: bool
        :param all_trans: Whether the intra-monomer backbone dihedrals should be
            all set to 180.
        """
        struct.property[MOIETY_PROP] = 'Monomer_%s' % letter
        struct.property[MARKER_PROP] = ','.join([str(x) for x in markers])
        struct.title = name
        struct.property[TACTICITY_PROP] = tacticity
        struct.property[BRANCH_PCT_PROP] = branching_percent
        struct.property[BRANCH_MAX_PROP] = branching_max_gen
        struct.property[BBTRANS_PROP] = all_trans

[docs]    @classmethod
    def write(cls, filename, struct, *args):
        """
        Write out the structure

        :type filename: str
        :param filename: The path to the file

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to write out

        See the addPropsToStructure method for additional argument documentation
        """

        cls.addPropsToStructure(struct, *args)
        struct.append(filename)

[docs]    def readFromStructure(self, struct):
        """
        Overrides the parent method to add and read Monomer-specific
        properties.

        See the parent class method for documentation
        """

        # Check all required properties are available
        propexps = []
        propexps.append(
            (TACTICITY_PROP, 'tacticity, which can have values of '
             '%s, %s or %s' % (TACTICITY_ISO, TACTICITY_SYN, TACTICITY_ATAC)))
        propexps.append((BRANCH_PCT_PROP, 'percent branching probability'))
        propexps.append((BRANCH_MAX_PROP, 'maximum branching generations'))
        propexps.append((BBTRANS_PROP, 'whether the monomer backbone is all '
                         'trans'))
        BaseMoiety.readFromStructure(self, struct, propexps=propexps)

        # Convert the structure properties to instance data
        msg = ('For monomers, the %s property value must be "%s", where x is a '
               'single letter that is different for each monomer. %s instead '
               'has the value "%s"')
        letters = struct.property[MOIETY_PROP].split('_')[-1]
        letter = letters[0]
        letter_up = letter.upper()
        if len(letter) > 1 or letter.upper() not in string.ascii_uppercase:
            raise MoietyError(msg %
                              (MOIETY_PROP, MONOMERX, struct.title, letter))
        # self.letter is letter assigned as the code of this monomer and the
        # first letter in self.letter is a single letter
        self.letter = letter_up + letters[1:]
        self.all_trans = struct.property[BBTRANS_PROP]
        self.tacticity = struct.property[TACTICITY_PROP]
        self.branching_percent = struct.property[BRANCH_PCT_PROP]
        self.branching_max_gen = struct.property[BRANCH_MAX_PROP]

[docs]    def getPDBName(self):
        """
        Get a PDB residue-like Name for this moiety
        """

        # self.letter is the single letter assigned as the code of this monomer
        return self.letter

[docs]    def setAsFragment(self):
        """
        Prepare monomer information for in-cell grow.
        """

        self.finalizeSideGroup()
        self.findBackbone2BranchingPoint([self.markers[0]])
        self.findRotatableBonds()

[docs]    def breakTemplatePolymer(self):
        """
        Copy the original moieties to tmp_moieties and break the monomers
        into small new monomers according to rotatable bonds in side group.

        :rtype: list of {Monomer}
        :return: the small monomer moieties that replace the old large one
        """

        # FIXME: should modify this function to be compatible with CAS
        self.tmp_moieties = [copy.deepcopy(self)]
        self.tmp_moieties[-1].pdbres = self.getPDBName().ljust(4)
        self.finished_moieties = []
        while self.tmp_moieties:
            tmp_moiety = self.tmp_moieties.pop(0)
            bonds_to_break = tmp_moiety.findBondsToBreak()
            struct_copy = tmp_moiety.raw_struct.copy()
            struct_copy.deleteAtoms(tmp_moiety.markers)
            for atom_ids in bonds_to_break:
                struct_copy.deleteBond(*atom_ids)
            self.stripSideStructs(struct_copy, tmp_moiety.pdbres)
        return self.finished_moieties

[docs]    def findBondsToBreak(self):
        """
        Search the side groups of all backbone atoms and find the
        'first neighbor' rotatable bonds.

        :rtype: list of tuple
        :return: 'first neighbor' rotable bonds in side groups for all backbone
            atoms
        """

        bonds_to_break = []

        for backbone_atom_id, side_atom_ids in self.backbone_side_atoms.items():
            bonds_to_break += nearest_rotatable_bonds(self.raw_struct,
                                                      self.rings,
                                                      backbone_atom_id,
                                                      side_atom_ids)
        return bonds_to_break

[docs]    def stripSideStructs(self, struct_copy, pdbres):
        """
        Strip the sub structures in side groups and form new monomers from
        these sub structures.

        :type struct_copy: `schrodinger.structure.Structure`
        :param struct_copy: the structure copy with 'first neighbor' rotatable
            bonds deleted.

        :type pdbres: str
        :param pdbres: the name of the residue to strip side structs from
        """

        for molecule in struct_copy.molecule:
            struct = molecule.extractStructure(copy_props=True)
            for atom in struct.atom:
                if atom.property.get(HEAD_ATOM_PROP):
                    head_atom_id = atom.index
                if atom.property.get(TAIL_ATOM_PROP):
                    # This is the tail atom of parent monomer
                    is_finished_moiety = True
                    break
            else:
                # This is a new struct broken from parent monomer, which
                # does not contain the backbone of parent monomer
                # Find the longest path from root to leaf as new backbone
                is_finished_moiety = False
                graph = analyze.create_nx_graph(struct)
                shortest_path_length = networkx.shortest_path_length(
                    graph, source=head_atom_id)
                tail_atom_id = max(shortest_path_length,
                                   key=shortest_path_length.get)
                struct.atom[tail_atom_id].property[TAIL_ATOM_PROP] = True

            resname = self.getNewResidueName(is_finished_moiety)
            self.updateTemplatePolymerResidue(struct, pdbres, resname)
            assign_markers(struct)
            new_monomer = Monomer(struct)
            new_monomer.pdbres = resname
            if is_finished_moiety:
                self.finished_moieties.append(new_monomer)
            else:
                self.tmp_moieties.append(new_monomer)

[docs]    def getNewResidueName(self, is_finished_moiety):
        """
        Generate the residue name for new sub structure.

        :type is_finished_moiety: bool
        :param is_finished_moiety: whether the new sub structure contains
            the backbone of the parent monomer

        :rtype: str
        :return: residue name for new sub structure
        """

        if is_finished_moiety:
            # new Monomers based on Monomer 'X   ' have resnames from 'X0  '
            # to 'X999'
            pdb_name = self.getPDBName()
            digits = ''.join(x for x in pdb_name if x.isdigit())
            letters = pdb_name.replace(digits, '')
            resname = letters + str(len(self.finished_moieties))
        elif self.tmp_moieties:
            last_resname = self.tmp_moieties[-1].pdbres.rstrip()
            resname = str(int(last_resname) + 1)
        else:
            resname = self.getFirstTmpResname()

        return resname.ljust(4)

[docs]    def getFirstTmpResname(self):
        """
        The resname of the first temporary moiety in self.tmp_moieties list.

        :type resname: str
        :param resname: residue name of a moiety
        """

        tmp_resnames = []
        for residue in self.template_polymer.residue:
            try:
                # Temporary residue pdbres is '0', '1', ...
                # Finished residue pdbres is 'A0', 'A1', ...
                tmp_resnames.append(int(residue.pdbres))
            except ValueError:
                pass

        if tmp_resnames:
            return str(max(tmp_resnames) + 1)
        else:
            return '0'

[docs]    def updateTemplatePolymerResidue(self, struct, pdbres, resname):
        """
        Update the residue number and name for the atoms in template
        polymer for the newly added residue.

        :type struct: `schrodinger.structure.Structure`
        :param struct: the new sub struct to form new monomer

        :type pdbres: str
        :param pdbres: name of the parent residue

        :type resname: str
        :param resname: residue name of the new sub struct
        """

        map_monomer_atom_id = {}
        for atom in struct.atom:
            map_monomer_atom_id[atom.property[
                msprops.MONOMER_ORIG_ATOM_IDX_PROP]] = atom.index

        residue_number = 0
        for residue in self.template_polymer.residue:
            residue_number += 1
            residue.resnum = residue_number
            if residue.pdbres != pdbres:
                continue
            residue_number += 1
            for atom in residue.atom:
                orig_atom_id = atom.property[msprops.MONOMER_ORIG_ATOM_IDX_PROP]
                if orig_atom_id in map_monomer_atom_id:
                    atom.pdbres = resname
                    atom.resnum = residue_number
                    atom.property[
                        msprops.
                        MONOMER_ORIG_ATOM_IDX_PROP] = map_monomer_atom_id[
                            orig_atom_id]

[docs]    def getMonosaccharideType(self):
        """
        Get the monosaccharide type for this monomer, if any

        :rtype: str
        :return: The monosaccharide type, or None if the monomer is not a
            monosaccharide
        """
        return self.raw_struct.property.get(MONOSACC_PROP)


[docs]def remove_stale_props(st):
    """
    Remove stale properties from the given structure.

    :type st: `schrodinger.structure.Structure`
    :param st: the structure from which to remove the stale properties
    """

    # these properties could be present if the structure was read
    # from moiety files prepared by the polymer builder GUI
    atom_props = list(PROP_BY_CHIRALITY.values()) + [CHIRAL_BB_ATOM_PROP]

    for atom in st.atom:
        for key in atom_props:
            atom.property.pop(key, None)


[docs]class Moieties(object):
    """
    A holder and manager for the various moieties that make up the polymer
    """

[docs]    def __init__(self, filename, structs=None, one_letter=True, simple=False):
        """
        Create a Moieties object

        :type filename: str
        :param filename: The path to the file that holds the moiety structures

        :type structs: list
        :param structs: List of structure moiety

        :type one_letter: If True, the monomer letter must a one letter code
        :param one_letter: bool

        :param bool simple: If True, use the simplified input rules -
            initiator/terminator need not be specified, structures not marked
            with MOIETY_PROP are considered monomers, etc.
        """

        self.initiator = None
        self.terminator = None
        self.cascader = None
        self.monomers = {}
        self.one_letter = one_letter
        untyped = 'untyped'

        single_msg = ('There is more than one structure in the input '
                      'file with %s defined. There can be only one.')
        if not structs:
            structs = structure.StructureReader(filename)

        untyped_monomers = []
        for struct in structs:
            remove_stale_props(struct)
            # Each structure has a property defined that tells what kind of
            # moiety it is
            try:
                mtype = struct.property[MOIETY_PROP].lower()
            except KeyError:
                if simple:
                    mtype = untyped
                else:
                    raise MoietyError('Each structure in the input file must '
                                      'have the %s property defined.' %
                                      MOIETY_PROP)
            if mtype == INITIATOR:
                if self.initiator:
                    raise MoietyError(single_msg % INITIATOR)
                self.initiator = Initiator(struct)
            elif mtype == TERMINATOR:
                if self.terminator:
                    raise MoietyError(single_msg % TERMINATOR)
                self.terminator = Terminator(struct)
            elif mtype == CASCADER:
                if self.cascader:
                    raise MoietyError(single_msg % CASCADER)
                self.cascader = Cascader(struct)
            elif mtype == untyped:
                untyped_monomers.append(struct)
            elif (self.one_letter and mtype[:-1] == MONOMERX[:-1]) or (
                    not self.one_letter and mtype.startswith(MONOMERX[:-1])):
                this_monomer = Monomer(struct)
                letter = this_monomer.letter
                if letter in self.monomers:
                    raise MoietyError('There is more than one monomer defined '
                                      'with the code "%s".' % letter)
                self.monomers[letter] = this_monomer
            else:
                raise MoietyError('%s has an undefined value for %s' %
                                  (struct.title, MOIETY_PROP))

        if simple:
            self.handleSimpleInput(untyped_monomers)

        singlets = [
            x for x in [self.initiator, self.terminator, self.cascader] if x
        ]
        self.all_moieties = list(self.monomers.values()) + singlets

[docs]    def handleSimpleInput(self, untyped_monomers):
        """
        Fill out any required moiety information that was not supplied due
        to the simplified input format

        :param list untyped_monomers: Structures that were not typed in the
            input file so need to have monomer codes assigned to them.
        """

        self._createSimpleIT()
        self._assignUntypedMonomers(untyped_monomers)

    def _createSimpleIT(self):
        """ Create simple initiator and/or terminator if required """

        # Create H initiator/terminator if needed
        # Note - we don't have to specify if we want an AA or CG
        # end group because CG initiator/teminators are deleted after the
        # polymer is built, so it does not matter if they are AA end groups.
        # CG just needs "something" to put there temporarily.
        if not self.initiator:
            struct = get_generic_end_structure()
            struct.title = 'aH'
            self.initiator = Initiator(struct)
        if not self.terminator:
            struct = get_generic_end_structure()
            struct.title = 'wH'
            self.terminator = Terminator(struct)

    def _assignUntypedMonomers(self, untyped_monomers):
        """
        Assign unused codes to monomers that didn't have input codes assigned.
        Also assign necessary structure properties
        """

        available_codes = [
            x for x in string.ascii_uppercase if x not in self.monomers
        ]
        for struct in untyped_monomers:
            try:
                good_code = available_codes.pop(0)
            except IndexError:
                raise MoietyError('Too many (> 26) unique monomers requested')
            marker_string = struct.property.get(MARKER_PROP, None)
            if marker_string:
                markers = [int(x) for x in marker_string.split(',')]
            else:
                markers = self.findSimpleHeadTailMarkers(struct)
            name = struct.title
            tacticity = struct.property.get(TACTICITY_PROP, TACTICITY_ATAC)
            branch_pct = struct.property.get(BRANCH_PCT_PROP, 0.0)
            branch_maxg = struct.property.get(BRANCH_MAX_PROP, 0)
            all_trans = struct.property.get(BBTRANS_PROP, False)
            Monomer.addPropsToStructure(struct, good_code, markers, name,
                                        tacticity, branch_pct, branch_maxg,
                                        all_trans)
            self.monomers[good_code] = Monomer(struct)

[docs]    def findSimpleHeadTailMarkers(self, struct):
        """
        Find the atoms that mark the head and tail atoms for monomers that
        did not have these marked on input. The "marker" atom is the one that is
        bonded to the head or tail and gets deleted when joining monomers.

        For now, a simple algorithm is used that finds the longest bond path
        in the structure and uses that, preferrably one terminated with H
        atoms in case of ties.

        :param `schrodinger.structure.Structure` struct: The monomer structure

        :rtype: list
        :return: First item is the index of the head atom marker, the second
            item is the index of the tail atom marker
        """

        # Create the graph of the structure
        st_graph = analyze.create_nx_graph(struct)

        # We choose backbones that start and end with H preferentially
        hydrogens = {x.index for x in struct.atom if x.element == 'H'}
        backbone = graph.find_backbone(st_graph, prefer_indexes=hydrogens)
        head_marker = backbone[0]
        tail_marker = backbone[-1]
        return [head_marker, tail_marker]

[docs]    def hasCascader(self):
        """
        Has a cascader been defined?

        :rtype: bool
        :return: Whether a cascader was defined
        """

        return bool(self.cascader)

[docs]    def hasBrancher(self):
        """
        Has a brancher been defined?

        :rtype: bool
        :return: Whether a brancher was defined
        """

        return any([x.isBrancher() for x in self.monomers.values()])

[docs]    def hasLadder(self):
        """
        Return True if ladder monomers are found.

        :rtype: bool
        :return: Ladder monomers exist
        """

        return any([x.isLadder() for x in self.monomers.values()])

[docs]    def singleR1Atom(self, end_moieties=None):
        """
        Whether each end moiety has one single R1 atom.

        :param end_moieties: the end moieties to check against.
        :type end_moieties: list

        :return: True if each end moiety has one single R1 atom
        :rtype: bool
        """

        if end_moieties is None:
            end_moieties = [self.initiator, self.terminator]
        return all(len(x.markers) == 1 for x in end_moieties)

[docs]    def hasRings(self):
        """
        Do any moieties have rings?

        :rtype: bool
        :return: Whether any of the moieties have rings
        """

        return any(x.has_rings for x in self.all_moieties)

[docs]    def getMonomer(self, letter):
        """
        Get the monomer with the given one-letter code

        :type letter: str
        :param letter: The one-letter code for the desired monomer

        :rtype: `Monomer`
        :return: The monomer with the given one-letter code
        """

        return self.monomers[letter]

[docs]    def getMoiety(self, letter):
        """
        Get the get one single moiety based on 'INI', 'TRM', 'CAS' or
        monomer one-letter code.

        :type letter: str
        :param letter: The one-letter code for the desired monomer

        :rtype: `Monomer`, `Initiator`, `Cascader`, or `Terminator`
        :return: the initiator, cascader, terminat or one nonomer find by
            the one letter code
        """

        if letter == INI:
            return self.initiator
        elif letter == TRM:
            return self.terminator
        elif letter == CAS:
            return self.cascader
        else:
            return self.monomers[letter]

[docs]    def getOneLetterCodes(self):
        """
        Get all the allowed one-letter codes

        :rtype: list
        :return: All the defined one-letter codes
        """

        return list(self.monomers)

[docs]    def checkValidOneLetterCodes(self, codes):
        """
        Check that all the codes in the given string are valid

        :type codes: str
        :param codes: A string with a series of one-letter codes

        :rtype: bool
        :return: Whether all the one-letter codes are associated with a Monomer
        """

        for letter in codes:
            if letter == WILDCARD:
                continue
            try:
                self.monomers[letter]
            except KeyError:
                return False
        return True

[docs]    def checkValidWeights(self, weights):
        """
        Validate the given weights

        :type weights: dict
        :param weights: Keys are one-letter codes, values are integer weights
            for that monomer in a random copolymer

        :rtype: bool
        :return: Whether all the one-letter codes in weights are valid
        """

        for letter in self.monomers.keys():
            if letter not in weights:
                weights[letter] = 1
        codestring = "".join(weights.keys())
        return self.checkValidOneLetterCodes(codestring)

[docs]    def getCascadeGenerations(self):
        """
        Get the number of cascade generations

        :rtype: int
        :return: The number of cascade generations (0 if none)
        """

        if not self.cascader:
            return 0
        return self.cascader.generations

[docs]    def getAllMoietyNames(self):
        """
        Get the names of all Moieties - initiator, terminator, monomers, etc.

        :rtype: list
        :return: A list of all moiety names. Monomers first, then initiator,
            terminator and cascader (if defined)
        """

        names = [x.name for x in self.all_moieties]
        return names