Source code for schrodinger.application.matsci.cg_mapping

"""
Utilities for mapping CG structures to AA

Copyright Schrodinger, LLC. All rights reserved.
"""

import itertools
from collections import OrderedDict
from collections import defaultdict
from collections import namedtuple

import networkx
import numpy

from schrodinger import structure
from schrodinger.application.matsci import clusterstruct
from schrodinger.application.matsci import coarsegrain
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import msprops
from schrodinger.application.matsci import smartsutils
from schrodinger.application.matsci.nano import xtal
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.structutils import transform

# just create 130 to avoid potential future issues
MOLECULE_COLORS = 10 * [
    (255, 128, 0),  # orange
    (0, 0, 255),  # blue
    (255, 255, 0),  # yellow
    (0, 255, 0),  # green
    (0, 255, 255),  # aqua
    (127, 0, 255),  # purple
    (255, 0, 255),  # magenta
    (153, 76, 0),  # brown
    (128, 128, 128),  # gray
    (255, 0, 127),  # pink
    (76, 153, 0),  # dark green
    (0, 76, 153),  # royal blue
    (255, 255, 255)  # white
]

# unique bond keys
SMARTS_UNIQUE_BOND_LABEL = 'smarts'
SMARTS_UNIQUE_BOND_KEY = smartsutils.GROUP_ATOM_INDEXES_PROP
POLYMER_UNIQUE_BOND_LABEL = 'polymer'
POLYMER_UNIQUE_BOND_KEY = msprops.MONOMER_ORIG_ATOM_IDX_PROP
RESIDUE_UNIQUE_BOND_LABEL = 'residue'
RESIDUE_UNIQUE_BOND_KEY = msprops.PDB_ATOM_NAME_KEY
UNIQUE_BOND_DICT = OrderedDict([
    (SMARTS_UNIQUE_BOND_LABEL, SMARTS_UNIQUE_BOND_KEY),
    (POLYMER_UNIQUE_BOND_LABEL, POLYMER_UNIQUE_BOND_KEY),
    (RESIDUE_UNIQUE_BOND_LABEL, RESIDUE_UNIQUE_BOND_KEY)
])

# chiral keys, CHIRAL_SYM_DICT maps pair tuples of bonded
# coarse grain particle chiralities according to symmetry
# equivalence, for example ('S', 'R') maps to ('R', 'S')
# and ('S', 'S') maps to ('R', 'R'), any tuple containing
# an achiral particle, i.e. 'A', maps to a pair of achiral
# particles, for example ('R', 'A') maps to ('A', 'A')
CHIRAL_PROP_DICT = {
    msprops.CHIRAL_R_MONOMER_PROP: msprops.CHIRAL_R,
    msprops.CHIRAL_S_MONOMER_PROP: msprops.CHIRAL_S
}
CHIRAL_A = 'A'
get_chiral_hash = lambda x: x if CHIRAL_A not in x else 2 * (CHIRAL_A,)
get_chiral_pair = lambda x: (x, tuple(sorted(get_chiral_hash(x))))
ARS = CHIRAL_A + msprops.CHIRAL_R + msprops.CHIRAL_S
CHIRAL_SYM_DICT = dict(map(get_chiral_pair, itertools.product(ARS, repeat=2)))
CHIRAL_RR = 2 * (msprops.CHIRAL_R,)
CHIRAL_SS = 2 * (msprops.CHIRAL_S,)
CHIRAL_SYM_DICT[CHIRAL_SS] = CHIRAL_RR
CHIRAL_SEPARATOR = coarsegrain.CHIRAL_SEPARATOR

RESIDUE_NAME_KEY = 's_m_pdb_residue_name'
RESIDUE_NUMBER_KEY = 'i_m_residue_number'
DEFINITION_NAME_KEYS = [smartsutils.GROUP_NAME_PROP, \
    smartsutils.GROUP_NAMES_PROP, msprops.MONOMER_NAME_PROP, RESIDUE_NAME_KEY]
DEFINITION_NUMBER_KEYS = [smartsutils.GROUP_NUMBER_PROP, \
    smartsutils.GROUP_NUMBERS_PROP, RESIDUE_NUMBER_KEY, RESIDUE_NUMBER_KEY]
DEFINITION_KEYS = list(zip(DEFINITION_NAME_KEYS, DEFINITION_NUMBER_KEYS))

PDB_ATOM_NAME_KEY = msprops.PDB_ATOM_NAME_KEY
POLYMER_BACKBONE_TAG = '-backbone'
POLYMER_BACKBONE_AND_ADJOINING_TAG = '-backbone-and-adjoining'
POLYMER_SIDEGROUP_TAG = '-sidegroup-'
PROTEIN_BACKBONE_TAG = '-backbone'
PROTEIN_SIDECHAIN_TAG = '-sidechain'
GENERAL_TAG = 'molecule-'

# PRO is allowed to have a side-chain
RESIDUES_NO_SIDECHAIN = ['ACE', 'GLY', 'NMA']
BACKBONE_PDB_ATOM_NAMES = ['H', 'N', 'CA', 'HA', 'C', 'O']

NonContiguousBondingMsg = ('Particles containing atoms that are '
                           'not contiguously bonded are currently not '
                           'supported.')

CG_BOND_ORDER = 1
CG_PARTICLE_INDICES = 's_matsci_cg_particle_indices'
CG_TAG = '_cg'

# CG model will inherit the following properties from
# its parent structure
INHERITABLE_PROPERTIES = [
    xtal.Crystal.SPACE_GROUP_KEY, xtal.Crystal.A_KEY, xtal.Crystal.B_KEY,
    xtal.Crystal.C_KEY, xtal.Crystal.ALPHA_KEY, xtal.Crystal.BETA_KEY,
    xtal.Crystal.GAMMA_KEY, xtal.Crystal.CHORUS_BOX_AX_KEY,
    xtal.Crystal.CHORUS_BOX_AY_KEY, xtal.Crystal.CHORUS_BOX_AZ_KEY,
    xtal.Crystal.CHORUS_BOX_BX_KEY, xtal.Crystal.CHORUS_BOX_BY_KEY,
    xtal.Crystal.CHORUS_BOX_BZ_KEY, xtal.Crystal.CHORUS_BOX_CX_KEY,
    xtal.Crystal.CHORUS_BOX_CY_KEY, xtal.Crystal.CHORUS_BOX_CZ_KEY
]

# prop_name is the atom property name associated with the atom definition, idx
# is the index of the sought definition (used for sorting available
# definitions), name is the name of the definition, and numberstr is the molecule
# number (n), chain name (c), and match index (m) combined as 'n-c-m'
# for example ('s_m_pdb_residue_name', 2, GLY, 4-B-7) means that this atom
# has with residue name as (s_m_pdb_residue_name) GLY matches the 3rd sought
# definition (0 indexed) and is in the 4th molecule, chain B, and 7th of such
# residues
ATOM_DEFINITION = namedtuple('ATOM_DEFINITION',
                             ['prop_name', 'idx', 'name', 'numberstr'])


[docs]def get_cg_particle_indices(atom):
    """
    Return a list of CG particle indices for the given atom.

    :type atom: schrodinger.structure._StructureAtom
    :param atom: the atom

    :rtype: list
    :return: CG particle indices
    """

    tokens = atom.property.get(CG_PARTICLE_INDICES)
    if tokens:
        return list(map(int, tokens.split(smartsutils.ATOM_PROP_DELIM)))
    return []


[docs]class NonContiguousBondingError(Exception):
    """
    Exception to handle non contiguous bonding in CG
    """

[docs]    def __init__(self, msg=NonContiguousBondingMsg, *args, **kwargs):
        """
        Exception with default error message
        """
        super().__init__(msg, *args, **kwargs)


[docs]def extract_and_contract(struct, idxs=None, pbc=None, fast=False):
    """
    From the given indices and structure extract a structure and
    contract it according to the PBC.

    :type struct: structure.Structure
    :param struct: the structure from which to extract

    :type idxs: set or None
    :param idxs: a set of integer atom indices defining
        the structure to be extracted or None in which case
        all indices will be used

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: A pre-computed PBC for the structure

    :type fast: bool
    :param fast: True if the computation should be done on a throw-away
        structure that will not be used for more than a center of mass calculation,
        False if the extracted structure must preserve properties such as atom
        formal charge and structure properties.

    :rtype: structure.Structure
    :return: the extracted and contracted structure
    """

    # no need to pass in anchors to clusterstruct.contract_by_molecule
    # as it is guaranteed that at least a single atom will not move
    # and therefore that the contraction doesn't suffer from drift,
    # it is possible that the stationary atom is always the first atom

    if idxs is None:
        idxs = set(range(1, struct.atom_total + 1))

    if fast:
        # extract is a very expensive action if the parent structure is large,
        # so just creating a new structure with atoms at the same positions is
        # vastly faster
        sub_st = structure.create_new_structure()
        for index in idxs:
            atom = struct.atom[index]
            # Creating as 'C' avoids mmat warnings for some elements
            newat = sub_st.addAtom('C', *atom.xyz)
            newat.element = atom.element
            # see MATSCI-10422 - where parent atom mass depends
            # on this property
            numbers = atom.property.get(smartsutils.GROUP_NUMBERS_PROP)
            if numbers:
                newat.property[smartsutils.GROUP_NUMBERS_PROP] = numbers
        # We have to bond all atoms in the molecule so that it is a single
        # molecule, however, the actualy bonding *does not matter* as long as
        # there is a continuous bond path connecting all atoms, so just fake
        # it rather than try to reproduce the original bonding
        for index in range(1, sub_st.atom_total):
            sub_st.addBond(index, index + 1, 1)
    else:
        sub_st = struct.extract(idxs, copy_props=True)
        synced_st = xtal.sync_pbc(sub_st)
        if synced_st:
            sub_st = synced_st
            if not pbc:
                try:
                    pbc = clusterstruct.create_pbc(sub_st)
                except ValueError:
                    pass
    if pbc:
        clusterstruct.contract_by_molecule(sub_st, pbc=pbc)
    return sub_st


[docs]def get_center_of_mass_weight(atom):
    """
    Return the weight of the atom used for the center of mass
    calculation.

    :type atom: schrodinger.structure._StructureAtom
    :param atom: the atom

    :rtype: float
    :return: the weight of the atom in amu
    """

    numbers = smartsutils.get_group_numbers(atom)
    if numbers:
        n_particles_w_atom = len(numbers)
    else:
        n_particles_w_atom = 1
    return 20 * numpy.tanh(atom.atomic_weight / 20) / n_particles_w_atom


[docs]def center_of_mass(struct, idxs=None, pbc=None):
    """
    Return the center of mass of the given indices
    in the given structure accounting for if the structure
    has a PBC.

    :type struct: structure.Structure
    :param struct: the structure containing the atoms
        for which the center of mass is needed

    :type idxs: set or None
    :param idxs: a set of integer atom indices for which
        the center of mass is needed or None in which case all
        atoms will be used

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: A pre-computed PBC for the structure

    :rtype: numpy.array
    :return: the center of mass
    """

    if idxs is None:
        idxs = set(range(1, struct.atom_total + 1))
    sub_st = extract_and_contract(struct, idxs=idxs, pbc=pbc, fast=True)
    xyzs = sub_st.getXYZ()
    weights = [get_center_of_mass_weight(atom) for atom in sub_st.atom]
    return numpy.average(xyzs, weights=weights, axis=0)


[docs]class Particle(object):
    """
    Create a coarse grain particle.
    """

[docs]    def __init__(self):
        """
        Create an instance.
        """

        self.xyz = None
        self.vdw_radius = None
        self.atomic_weight = None
        self.formal_charge = None
        self.partial_charge = None
        self.formula = None
        self.name = None
        self.rgb_color = None
        self.atom_type = None
        self.key = None

        self.parent_structure = None
        self.contracted_parent_structure = None
        self.parent_structure_str = None
        self.parent_indices = None
        self.parent_cut_bonds = None

[docs]    def setXYZ(self, xyz=None, astructure=None, pbc=None):
        """
        Set the position of the given particle.

        :type xyz: numpy.array or None
        :param xyz: the position of the particle or None
            if there isn't one

        :type astructure: structure.Structure or None
        :param astructure: the structure from which the parent
            structure will be extracted and for which the center of
            mass is needed or None if there isn't one

        :type pbc: `schrodinger.infra.structure.PBC`
        :param pbc: A pre-computed PBC for the structure
        """

        if xyz is not None:
            self.xyz = xyz
        elif astructure is not None:
            self.xyz = center_of_mass(astructure,
                                      idxs=self.parent_indices,
                                      pbc=pbc)

[docs]    def setVDWRadius(self, vdw_radius=None):
        """
        Set the VDW radius in Ang.

        :type vdw_radius: float or None
        :param vdw_radius: the VDW radius of the given particle
            in Ang. or None if there isn't one
        """

        if vdw_radius is not None:
            self.vdw_radius = vdw_radius
        else:
            max_distance = 0.0
            for atom in self.contracted_parent_structure.atom:
                distance = transform.get_vector_magnitude(
                    numpy.array(atom.xyz) - self.xyz)
                distance += atom.radius
                if distance > max_distance:
                    max_distance = distance
            self.vdw_radius = max_distance

[docs]    def setAtomicWeight(self, atomic_weight=None):
        """
        Set the atomic weight in g/mol.

        :type atomic_weight: float or None
        :param atomic_weight: the atomic weight of the given particle
            in g/mol or None if there isn't one
        """

        if atomic_weight is not None:
            self.atomic_weight = atomic_weight
        else:
            # self.parent_structure can be a fragment and so its
            # .total_weight property includes implicit hydrogens
            # which we don't want so compute it by hand
            self.atomic_weight = \
                sum([mm.mmelement_get_atomic_weight_by_atomic_number(atom.atomic_number)
                    for atom in self.parent_structure.atom])

[docs]    def setFormalCharge(self, formal_charge=None):
        """
        Set the formal charge.

        :type formal_charge: int
        :param formal_charge: the formal charge of the given particle
            or None if there isn't one
        """

        if formal_charge is not None:
            self.formal_charge = formal_charge
        else:
            self.formal_charge = self.parent_structure.formal_charge

[docs]    def setPartialCharge(self, partial_charge=None):
        """
        Set the partial charge.

        :type partial_charge: float
        :param partial_charge: the partial charge of the given particle
            or None if there isn't one
        """

        if partial_charge is not None:
            self.partial_charge = partial_charge
        else:
            self.partial_charge = \
                sum([atom.partial_charge for atom in self.parent_structure.atom])

[docs]    def setParentStructure(self, astructure):
        """
        Set the parent structure.

        :type astructure: structure.Structure
        :param astructure: the structure from which the parent
            structure will be extracted
        """

        self.parent_structure = \
            astructure.extract(self.parent_indices, copy_props=True)
        self.contracted_parent_structure = extract_and_contract(
            astructure, idxs=self.parent_indices)

[docs]    def setParentStructureStr(self):
        """
        Set the parent structure string.
        """

        self.parent_structure_str = coarsegrain.get_base64_str_from_structure(
            self.parent_structure)

[docs]    def setParentIndices(self, parent_indices):
        """
        Set the parent indices.

        :type parent_indices: list
        :param parent_indices: the parent indices, i.e. indices
            of the parent atomic structure for this particle
        """

        self.parent_indices = parent_indices

[docs]    def setParentCutBonds(self, astructure):
        """
        Set the parent cut bonds.

        :type astructure: structure.Structure
        :param astructure: the structure from which the parent
            structure was extracted and thus for which the bonding
            information is needed
        """

        # this list contains pair tuples for bonds that crossed the
        # particle boundary in the parent structure, the first atom
        # is the particle atom, the second atom is the atom that is
        # outside the particle

        self.parent_cut_bonds = []
        for aidx in self.parent_indices:
            aatom = astructure.atom[aidx]
            for batom in aatom.bonded_atoms:
                bidx = batom.index
                if bidx not in self.parent_indices:
                    order = astructure.getBond(aidx, bidx).order
                    self.parent_cut_bonds.append((aidx, bidx, order))

[docs]    def setFormula(self, formula=None):
        """
        Set the formula of the given particle.

        :type formula: str or None
        :param formula: the formula for the given particle or None
            if there isn't one
        """

        if formula is not None:
            self.formula = formula
        else:
            self.formula = analyze.generate_molecular_formula(
                self.parent_structure)

[docs]    def setName(self, name):
        """
        Set the name of the given particle.

        :type name: str
        :param name: name of the particle
        """

        self.name = name

[docs]    def setRGBColor(self, rgb_color):
        """
        Set the RGB color of the given particle.

        :type rgb_color: tuple
        :param rgb_color: a triple of integers in [0, 255] that give an RGB color
            for the given particle
        """

        self.rgb_color = rgb_color

[docs]    def setAtomType(self, atom_type):
        """
        Set the atom type of the given particle.

        :type atom_type: int
        :param atom_type: atom type of the given particle
        """

        self.atom_type = atom_type

[docs]    def setKey(self, key):
        """
        Set the key of the given particle, this is the key that
        indicates the parent structure string.

        :type key: str
        :param key: key of the given particle
        """

        self.key = key

[docs]    def defineAtom(self, astructure, index):
        """
        Define the given atom.

        :type astructure: structure.Structure
        :param astructure: the structure containing the atom to define

        :type index: int
        :param index: index of the atom to define
        """

        # note that mmct_atom_set_coarse_grain_particle must be called
        # after setting the atom.atom_type or warnings regarding
        # the calling of mmct_atom_set_isotope and mmct_atom_set_atomic_number
        # on atoms marked as coarse grain particles will be raised, also
        # mmct_atom_set_coarse_grain_particle must be called before
        # mmct_atom_set_coarse_grain_mass

        atom = astructure.atom[index]
        atom.x = self.xyz[0]
        atom.y = self.xyz[1]
        atom.z = self.xyz[2]

        coarsegrain.set_atom_coarse_grain_properties(
            astructure,
            atom,
            self.name,
            rgb=self.rgb_color,
            atom_type=self.atom_type,
            formal_charge=self.formal_charge,
            partial_charge=self.partial_charge,
            radius=self.vdw_radius,
            mass=self.atomic_weight)

        if self.key:
            atom.property[coarsegrain.CG_PARTICLE_KEY_KEY] = self.key
        if self.parent_indices:
            atom.property[coarsegrain.CG_PARTICLE_PARENT_INDICES_KEY] = \
                str(tuple(self.parent_indices))
        if self.parent_cut_bonds:
            atom.property[coarsegrain.CG_PARTICLE_PARENT_BONDS_KEY] = \
                str(tuple(self.parent_cut_bonds))

[docs]    @staticmethod
    def getParentIndices(cg_structure, cg_idx):
        """
        Return the parent indices for the given coarse grain
        particle index in the given coarse grain structure.

        :type cg_structure: structure.Structure
        :param cg_structure: the coarse grain structure

        :type cg_idx: int
        :param cg_idx: the coarse grain particle index

        :rtype: tuple
        :return: the parent indices for the given coarse grain
            particle index
        """

        idxs = cg_structure.atom[cg_idx].property[
            coarsegrain.CG_PARTICLE_PARENT_INDICES_KEY]
        return eval(idxs)

[docs]    @staticmethod
    def getParentCutBonds(cg_structure, cg_idx, reverse=False):
        """
        Return the parent cut bonds for the given coarse grain
        particle index in the given coarse grain structure.

        :type cg_structure: structure.Structure
        :param cg_structure: the coarse grain structure

        :type cg_idx: int
        :param cg_idx: the coarse grain particle index

        :type reverse: bool
        :param reverse: by default the first atom index of a pair
            from the returned tuple of pairs is for the particle with
            the given index and the second atom index of the pair is for
            the outside particle, if reverse is True it reverses this
            ordering

        :rtype: tuple
        :return: the parent cut bonds for the given coarse grain
            particle index
        """

        idxs = cg_structure.atom[cg_idx].property[
            coarsegrain.CG_PARTICLE_PARENT_BONDS_KEY]
        idxs = eval(idxs)
        if reverse:
            idxs = tuple([(j, i, k) for i, j, k in idxs])
        return idxs

[docs]    @staticmethod
    def getParentBonds(cg_structure, cg_idx_i, cg_idx_j):
        """
        Return the parent bonds between the given coarse
        grain particles indices in the given coarse grain
        structure.

        :type cg_structure: structure.Structure
        :param cg_structure: the coarse grain structure

        :type cg_idx_i: int
        :param cg_idx_i: the first coarse grain particle index

        :type cg_idx_j: int
        :param cg_idx_j: the second coarse grain particle index

        :rtype: tuple
        :return: the parent bonds between the given coarse
            grain particle indices
        """

        bonds = []
        for bond in Particle.getParentCutBonds(cg_structure, cg_idx_i):
            if bond[1] in Particle.getParentIndices(cg_structure, cg_idx_j):
                bonds.append(bond)
        return tuple(bonds)


[docs]class CoarseGrainBond(object):
    """
    Manage a coarse grain bond.
    """

    FORMAT_START = '('
    FORMAT_END = ')'

    DUMMY_ELEMENT = 'DU'

[docs]    def __init__(self, name_1, idx_1, name_2, idx_2):
        """
        Create an instance.

        :type name_1: str
        :param name_1: the name of particle 1

        :type idx_1: int
        :param idx_1: the index particle 1

        :type name_2: str
        :param name_2: the name of particle 2

        :type idx_2: int
        :param idx_2: the index particle 2
        """

        self.name_1 = name_1
        self.idx_1 = idx_1
        self.name_2 = name_2
        self.idx_2 = idx_2

        self.parent_indices_1 = None
        self.parent_cut_bonds_1 = None
        self.parent_indices_2 = None
        self.parent_cut_bonds_2 = None
        self.parent_bonds = None
        self.smiles = None
        self.label_1 = None
        self.label_2 = None
        self.chiral_hash = None
        self.parent_bonds_hash_info = None
        self.parent_bonds_hash = None

[docs]    def setParentData(self, cg_structure):
        """
        Set the parent data.

        :type cg_structure: structure.Structure
        :param cg_structure: the CG model
        """

        pair = (self.idx_1, self.idx_2)
        inds = [Particle.getParentIndices(cg_structure, x) for x in pair]
        self.parent_indices_1, self.parent_indices_2 = inds
        cuts = [Particle.getParentCutBonds(cg_structure, x) for x in pair]
        self.parent_cut_bonds_1, self.parent_cut_bonds_2 = cuts
        self.parent_bonds = Particle.getParentBonds(cg_structure, *pair)

[docs]    def getAllParentIndices(self):
        """
        Return a tuple containing all parent indices.

        :rtype: tuple
        :return: contains all parent indices
        """

        return tuple(set(self.parent_indices_1 + self.parent_indices_2))

[docs]    def setSmiles(self, st):
        """
        Set a unique SMILES for the parent indices.

        :type st: structure.Structure
        :param st: the parent model
        """

        # get all parent indices, this includes the parent atom indices
        # of each particle as well as the indices of any atoms bonded
        # to the parent atoms
        all_parent_indices = self.getAllParentIndices()
        out_idxs = set()
        for atuple in self.parent_cut_bonds_1 + self.parent_cut_bonds_2:
            in_idx, out_idx, order = atuple
            if out_idx not in all_parent_indices:
                out_idxs.add(out_idx)
        all_parent_indices += tuple(out_idxs)
        all_parent_indices = sorted(all_parent_indices)

        # extract structure
        parent_st = st.extract(all_parent_indices)

        # see MATSCI-3619, handle dangling bonds, we have a fragment
        # with dangling bonds, if we remove the dangling bonds then
        # the smiles generation will necessarily create implicit
        # hydrogens, we will prevent that by keeping the dangling
        # atoms but modifying them to be dummy atoms, i.e. elemental
        # symbol of DU, these dummy atoms show as wildcards (*) in
        # the patterns, note these patterns although obtained with
        # analyze.generate_smiles are actually smarts patterns as
        # they are matching and can not generate structures alone,
        # for example as with structure.SmilesReader, for example the
        # smarts patterns for the three bond types in standard P3HT
        # (polythiophene) polymer look like:
        #
        # (1) [*]c(c1)sc(c1[*])-c(c2[*])sc([*])c2
        # (2) [*]c1c([*])sc(c1)-c(c2)sc([*])c2[*]
        # (3) [*]c1c([*])sc(c1)-c(c2[*])sc([*])c2
        #
        amap = dict(
            zip(all_parent_indices, list(range(1,
                                               len(all_parent_indices) + 1))))
        for old in out_idxs:
            new = amap[old]
            aatom = parent_st.atom[new]
            aatom.element = self.DUMMY_ELEMENT

        # get smiles
        self.smiles = analyze.generate_smiles(
            parent_st,
            unique=True,
            stereo=analyze.STEREO_FROM_ANNOTATION_AND_GEOM)

[docs]    def setDefaultParticleLabels(self, st):
        """
        Set the default particle labels for the two coarse
        grain particles.

        :type st: structure.Structure
        :param st: the parent model
        """

        inside_idx_type_dict, outside_idx_type_dict = map(
            dict, zip(*self.parent_bonds_hash_info))

        element_dicts = []
        for idx_type_dict in (inside_idx_type_dict, outside_idx_type_dict):
            element_dict = {}
            for idx in idx_type_dict:
                element_dict.setdefault(st.atom[idx].element, []).append(idx)
            element_dicts.append(element_dict)
        inside_element_dict, outside_element_dict = element_dicts

        inside_chiral_sym, outside_chiral_sym = list(self.chiral_hash)

        inside_items = (inside_element_dict, inside_chiral_sym,
                        inside_idx_type_dict)
        outside_items = (outside_element_dict, outside_chiral_sym,
                         outside_idx_type_dict)

        labels = []
        for element_dict, chiral_sym, type_dict in (inside_items,
                                                    outside_items):
            label = ''
            for element in sorted(element_dict):
                for idx in element_dict[element]:
                    type_label, type_value = type_dict[idx]
                    if type_label == RESIDUE_UNIQUE_BOND_LABEL:
                        type_value = ''
                    label += element + type_value
            if chiral_sym != CHIRAL_A:
                label += CHIRAL_SEPARATOR + chiral_sym
            labels.append(label)
        self.label_1, self.label_2 = labels

[docs]    def setChiralHash(self, st):
        """
        Set the chiral hash.

        :type st: structure.Structure
        :param st: the parent model
        """

        # only the first parent bond needs to be considered,
        # if the CG particle types for this bond are equivalent
        # then sort the hash to avoid uniqueifying bonds with
        # different atom orderings

        chiral_hash = []
        pair = []
        if self.parent_bonds:
            pair = self.parent_bonds[0][:-1]
        if self.name_1 == self.name_2:
            pair = sorted(pair)
        for parent_idx in pair:
            parent_atom = st.atom[parent_idx]
            for prop, label in CHIRAL_PROP_DICT.items():
                value = parent_atom.property.get(prop, None)
                if value:
                    chiral_hash.append(label)
                    break
            else:
                chiral_hash.append(CHIRAL_A)
        self.chiral_hash = CHIRAL_SYM_DICT.get(tuple(chiral_hash))

[docs]    def setParentBondsHash(self, st):
        """
        Set the parent bonds hash.

        :type st: structure.Structure
        :param st: the parent model
        """

        # categorize using UNIQUE_BOND_DICT, hashes are just some
        # hash of data from the original repeat unit, if the CG particle
        # types for this bond are equivalent then sort the hash
        # to avoid uniqueifying bonds with different atom orderings

        parent_bonds_hash_info = []
        for parent_bond in self.parent_bonds:
            parent_bond_hash = []
            pair = parent_bond[:-1]
            if self.name_1 == self.name_2:
                pair = sorted(pair)
            for parent_idx in pair:
                parent_atom = st.atom[parent_idx]
                for label, key in UNIQUE_BOND_DICT.items():
                    if key == SMARTS_UNIQUE_BOND_KEY:
                        idxs = smartsutils.get_group_atom_indices(parent_atom)
                        value = smartsutils.ATOM_PROP_DELIM.join(map(str, idxs))
                    else:
                        value = str(parent_atom.property.get(key, '')).strip()
                    if value:
                        parent_bond_hash.append((parent_idx, (label, value)))
                        break
                else:
                    return
            parent_bonds_hash_info.append(tuple(parent_bond_hash))
        self.parent_bonds_hash_info = tuple(parent_bonds_hash_info)

        bond_hashes = []
        for pair in self.parent_bonds_hash_info:
            bond_hash = '_'.join(['-'.join(atuple[1]) for atuple in pair])
            bond_hashes.append(bond_hash)
        self.parent_bonds_hash = ':'.join(bond_hashes)

[docs]    @staticmethod
    def formatParticleLabel(label):
        """
        Format the particle label.

        :type label: str
        :param label: the label to format

        :rtype: str
        :return: the formated label
        """

        return ''.join(
            [CoarseGrainBond.FORMAT_START, label, CoarseGrainBond.FORMAT_END])


[docs]class CGMapperMixin(object):
    """
    A common mixin class for mapping all-atoms molecules to coarse-grain
    """

    def _filterExistingDefinitions(self):
        """
        Even atom property access gets expensive over huge atomistic systems
        for CG simulations, so this prefilters the (name, number) tuples to get
        rid of any tuples where the name property does not appear on any atom.
        The remaining (name, number) tuples will need to be searched for on
        every atom.

        :rtype: list
        :return: Each item of the list is a (name, number) tuple from
            DEFINITION_KEYS. Only those tuples with name properties that actually
            exist as atom properties on the structure will be included.
        """

        existing_definitions = []
        if not self.astructure.atom:
            return existing_definitions

        for name_key, num_key in DEFINITION_KEYS:
            keep = False
            if name_key == RESIDUE_NAME_KEY:
                # Built-in atom properties like RESIDUE_NAME_KEY are treated
                # differently and can't be accessed via mmct_atom_property_get
                # functions. But we know this property will be there.
                keep = True
            else:
                try:
                    mm.mmct_atom_property_get_string(self.astructure, name_key,
                                                     1)
                except mm.MmException as mmexc:
                    if mmexc.rc == mm.MMCT_ATOM_PROPERTY_NOT_DEFINED_IN_CT:
                        # Property does not exist on any atom
                        keep = False
                    elif mmexc.rc == mm.MMCT_ATOM_PROPERTY_UNDEFINED_ATOM:
                        # Property exists on at least one atom, but not this one
                        keep = True
                    else:
                        # An unexpected error occurred, don't be silent about it
                        raise
                else:
                    # The property exists on the atom we checked
                    keep = True
            if keep:
                existing_definitions.append((name_key, num_key))
        return existing_definitions

[docs]    def getAtomPolymerNames(self, atom, name):
        """
        Get the polymer names for this atom.

        :type atom: structure._StructureAtom
        :param atom: the atom for which the names are needed

        :type name: str
        :param name: polymer monomer name

        :rtype: list
        :return: polymer names
        """

        # add all polymer backbone atoms to both the backbone and backbone
        # and adjoining names and then delete the latter name later if there
        # were never any adjoining atoms added

        names = []
        backbone_atom = atom.property.get(msprops.BACKBONE_ATOM_PROP, None)
        if backbone_atom:
            names.append(name + POLYMER_BACKBONE_TAG)
            names.append(name + POLYMER_BACKBONE_AND_ADJOINING_TAG)
        adjoining_atom = \
            atom.property.get(msprops.BACKBONE_ADJOINING_ATOM_PROP, None)
        if adjoining_atom:
            if atom.element == 'H':
                names.append(name + POLYMER_BACKBONE_TAG)
            names.append(name + POLYMER_BACKBONE_AND_ADJOINING_TAG)
        side_group_name = \
            atom.property.get(msprops.SIDE_GROUP_ATOM_PROP, None)
        if side_group_name:
            names.append(name + POLYMER_SIDEGROUP_TAG + side_group_name)
        return names

[docs]    def getAtomResidueNames(self, atom, name):
        """
        Get the residue names for this atom.

        :type atom: structure._StructureAtom
        :param atom: the atom for which the names are needed

        :type name: str
        :param name: residue name

        :rtype: list
        :return: residue names
        """

        names = []
        pdb_name = atom.property.get(PDB_ATOM_NAME_KEY, None)
        if pdb_name is None:
            return names
        pdb_name = pdb_name.strip()
        if not pdb_name:
            return names
        if pdb_name in BACKBONE_PDB_ATOM_NAMES:
            names.append(name + PROTEIN_BACKBONE_TAG)
        else:
            names.append(name + PROTEIN_SIDECHAIN_TAG)
        return names

[docs]    def getAtomDefinitions(self, atom, existing_definitions):
        """
        Get the definitions for this atom.

        :type atom: structure._StructureAtom
        :param atom: the atom for which definition data is needed

        :type existing_definitions: list
        :param existing_definitions: Each item of the list is a (name, number)
            tuple from DEFINITION_KEYS that should be searched on the atom.

        :rtype: list
        :return: list of ATOM_DEFINITION for the passed atom 
        """

        idxs_names_numberstrs = []
        chain = str(atom.chain).strip()
        if not chain:
            chain = '_'
        molnum = str(atom.molecule_number)
        for idx, (name_key, number_key) in enumerate(existing_definitions):
            # see MATSCI-4079
            if name_key in [
                    smartsutils.GROUP_NAME_PROP, smartsutils.GROUP_NAMES_PROP
            ]:
                tmpchain = '_'
                names = smartsutils.get_group_names(atom)
                numbers = smartsutils.get_group_numbers(atom)
                if not names or not numbers:
                    continue
            else:
                tmpchain = chain
                try:
                    names = [atom.property[name_key]]
                    numbers = [atom.property[number_key]]
                except KeyError:
                    continue

            for name, number in zip(names, numbers):
                name = name.strip()
                numberstr = '-'.join([molnum, tmpchain, str(number)])
                if not name:
                    name = GENERAL_TAG + numberstr
                idxs_names_numberstrs.append(
                    ATOM_DEFINITION(name_key, idx, name, numberstr))
                if name_key == msprops.MONOMER_NAME_PROP:
                    for aname in self.getAtomPolymerNames(atom, name):
                        idxs_names_numberstrs.append(
                            ATOM_DEFINITION(name_key, idx, aname, numberstr))
                elif name_key == RESIDUE_NAME_KEY and name not in RESIDUES_NO_SIDECHAIN:
                    for aname in self.getAtomResidueNames(atom, name):
                        idxs_names_numberstrs.append(
                            ATOM_DEFINITION(name_key, idx, aname, numberstr))
        return idxs_names_numberstrs

[docs]    def trimDefinitionDict(self, definition_dict):
        """
        Trim the given definition dictionary to get rid of redundant
        definitions.

        :type definition_dict: OrderedDict
        :param definition_dict: keys are particle type strings, values are tuples
            of tuples of integers of particle occurrences
        """

        # if a polymer backbone and adjoining definition contains only backbone
        # atoms or if a polymer backbone definition contains only monomer atoms
        # then delete the former definitions as in these cases they are equivalent
        # to the latter definitions, the following tags is a list of pair tuples
        # where the first element of each tuple should be checked for redundancy with
        # the second element

        keys_to_rm = []
        tags = [(POLYMER_BACKBONE_AND_ADJOINING_TAG, POLYMER_BACKBONE_TAG),
                (POLYMER_BACKBONE_TAG, '')]
        for key in list(definition_dict):
            for tag1, tag2 in tags:
                if key.endswith(tag1):
                    name = key.split(tag1)[0]
                    if definition_dict[key] == definition_dict[name + tag2]:
                        keys_to_rm.append(key)
        for key in keys_to_rm:
            definition_dict.pop(key)

[docs]    @staticmethod
    def getUniqueDefNames(atom_definitions):
        """
        Gets the dictionary of names where the a single atom definition name is
        being used by multiple properties

        :param atom_definitions: List of all atom definitions
        :type atom_definitions: list(AtomDefinition)

        :returns: The dictionaries have keys as the definition names and the
            values of first dict is the property used by the definition and
            second dictionary values are list of properties ignored by the
            definition
        :rtype: dict, dict
        """

        prop_name_keys = defaultdict(set)
        for atm_def in atom_definitions:
            prop_name_keys[atm_def.name].add(atm_def.prop_name)

        used_props, ignored_props = {}, {}
        for name, prop_names in prop_name_keys.items():
            # Check if any property names need to be removed
            if len(prop_names) == 1:
                used_props[name] = list(prop_names)[0]
                ignored_props[name] = []
                continue

            # Always prefer SMARTS properties
            prop_names = sorted(list(prop_names))
            for smart_prop in [
                    smartsutils.GROUP_NAME_PROP, smartsutils.GROUP_NAMES_PROP
            ]:
                prop_names.sort(key=smart_prop.__eq__)

            # Remove the a key to use and save other keys as ignored keys
            used_props[name] = prop_names.pop(-1)
            ignored_props[name] = prop_names

        return used_props, ignored_props

[docs]    def getDefinitionDict(self, warn_logger=None):
        """
        Get definition dict.

        :type warn_logger: function
        :param warn_logger: If passed the warnings generated during generation
            of definitions will be passed to this function as a string

        :rtype: OrderedDict
        :return: keys are particle type strings, values are tuples
            of tuples of integers of particle occurrences
        """

        existing_definitions = self._filterExistingDefinitions()
        # build a dictionary of available definitions atom-wise
        atom_definitions = {}
        for atom in self.astructure.atom:
            atom_definitions[atom.index] = self.getAtomDefinitions(
                atom, existing_definitions)

        # Find the keys that are duplicate
        used_prop_names, ignored_prop_names = self.getUniqueDefNames(
            msutils.flatten(atom_definitions.values()))
        if warn_logger:
            ignore_str = ''
            for name, values in ignored_prop_names.items():
                if not values:
                    continue
                used_prop = used_prop_names[name]
                ignore_str += (
                    f"Found multiple definitions for {name} using {used_prop} "
                    f"and ignoring {','.join(values)}.\n")
            if ignore_str:
                warn_logger(ignore_str)

        # Create unsorted definition dictionary
        definition_dict = defaultdict(
            lambda: defaultdict(lambda: defaultdict(list)))
        for aid, adefinitions in atom_definitions.items():
            for adef in adefinitions:
                if adef.prop_name in ignored_prop_names[adef.name]:
                    continue
                definition_dict[adef.idx][adef.name][adef.numberstr].append(aid)

        # sort by definition index
        pairs = sorted(list(definition_dict.items()), key=lambda x: x[0])
        if pairs:
            dicts = list(zip(*pairs))[1]
        else:
            dicts = ()

        # for each definition index flatten, sort by definition name,
        # and sort by numberstr
        definition_dict = OrderedDict()
        for adict in dicts:
            pairs = []
            for key, value in adict.items():
                tmp_pairs = sorted(list(value.items()), key=lambda x: x[0])
                tmp_values = list(zip(*tmp_pairs))[1]
                pairs.append((key, tmp_values))
            pairs = sorted(pairs, key=lambda x: x[0])
            definition_dict.update(pairs)

        self.trimDefinitionDict(definition_dict)

        return definition_dict

[docs]    def areContiguous(self, indices):
        """
        Return True if for the given indices the atoms are contiguously
        bonded, False otherwise.

        :type indices: tuple
        :param indices: the indices to check
        """

        if len(indices) < 2:
            return True

        # This method using networkx is (nearly) infinitely faster than
        # extracting a structure of these atoms and checking the number of
        # molecules, mainly because structure extract time depends on the size
        # of the PARENT structure, not the extracted structure.
        graph = analyze.create_nx_graph(self.astructure, atoms=indices)
        if not graph:
            # No graph means no bonds between these atoms
            return False
        return networkx.is_connected(graph)

[docs]    def getParticles(self, particles, aname, acolor, aatom_type, akey):
        """
        Return list of particle objects.

        :type particles: list
        :param particles: contains particle indices as lists of atom indices

        :type aname: str
        :param aname: name for the particles

        :type acolor: tuple
        :param acolor: a triple of integers in [0, 255] that give an RGB color
            for the particles

        :type aatom_type: int
        :param aatom_type: atom type for the particles

        :type akey: str
        :param akey: a structure property key indicating the parent
            structure string

        :rtype: list
        :return: contains Particle instances
        """

        # some data is shared but only the first particle gets the
        # parent structure and structure string set as this can be
        # a lot of information to have to store for the many particle
        # instances

        try:
            # Precompute the pbc that will be needed for all particles. This
            # saves a tiny amount of time, but also saves book-keeping of PBC
            # structure properties for the particle structures.
            pbc = clusterstruct.create_pbc(self.astructure)
        except ValueError:
            pbc = None
        parent_indices = particles[0]
        ref_particle = Particle()
        ref_particle.setParentIndices(parent_indices)
        ref_particle.setParentStructure(self.astructure)
        ref_particle.setParentStructureStr()
        ref_particle.setXYZ(astructure=self.astructure, pbc=pbc)
        ref_particle.setVDWRadius()
        ref_particle.setAtomicWeight()
        ref_particle.setFormalCharge()
        ref_particle.setPartialCharge()
        ref_particle.setParentCutBonds(self.astructure)
        ref_particle.setFormula()
        ref_particle.setName(aname)
        ref_particle.setRGBColor(acolor)
        ref_particle.setAtomType(aatom_type)
        ref_particle.setKey(akey)
        particle_objs = [ref_particle]
        for parent_indices in particles[1:]:
            aparticle = Particle()
            aparticle.setParentIndices(parent_indices)
            aparticle.setXYZ(astructure=self.astructure, pbc=pbc)
            aparticle.setVDWRadius(vdw_radius=ref_particle.vdw_radius)
            aparticle.setAtomicWeight(atomic_weight=ref_particle.atomic_weight)
            aparticle.setFormalCharge(formal_charge=ref_particle.formal_charge)
            aparticle.setPartialCharge(
                partial_charge=ref_particle.partial_charge)
            aparticle.setParentCutBonds(self.astructure)
            aparticle.setFormula(formula=ref_particle.formula)
            aparticle.setName(aname)
            aparticle.setRGBColor(acolor)
            aparticle.setAtomType(aatom_type)
            aparticle.setKey(akey)
            particle_objs.append(aparticle)
        return particle_objs

[docs]    def inheritProperties(self, cg_structure):
        """
        Inherit properties for the CG model from the parent structure.

        :type cg_structure: structure.Structure
        :param cg_structure: the CG model
        """

        for prop in INHERITABLE_PROPERTIES:
            value = self.astructure.property.get(prop, None)
            if value is not None:
                cg_structure.property[prop] = value

[docs]    def markParticleAtoms(self, particle_index, atom_indices):
        """
        Mark the atoms in the structure by particle index.

        :type particle_index: int
        :param particle_index: the particle index

        :type atom_indices: list
        :param atom_indices: contains the atom indices for this particle
        """

        for idx in atom_indices:
            smartsutils.append_property(self.astructure.atom[idx],
                                        CG_PARTICLE_INDICES, particle_index)

[docs]    def buildCGFromParticles(self, all_particles, allow_unassigned=False):
        """
        Build coarse grain structure from the particle

        :param list all_particles: list of lists of `Particles`, where each
            list corresponds to each mapping group

        :param bool allow_unassigned: If False particles not mapped will be
            ignored, if True they will be ignored leading to no CG bond creation.

        :rtype: structure.Structure
        :return: the new CG model
        """

        # set up coarse grain model structure
        cg_structure = structure.create_new_structure()
        self.inheritProperties(cg_structure)
        coarsegrain.set_as_coarse_grain(cg_structure)

        # define coarse grain model atoms and mark atoms in the parent
        # structure to assist in defining the bonding in the coarse
        # grain model
        for particles in all_particles:
            sample_particle = particles[0]
            name = sample_particle.name
            prop_name = coarsegrain.CG_PARTICLE_KEY % name
            parent_structure_str = sample_particle.parent_structure_str
            cg_structure.property[prop_name] = parent_structure_str
            for particle in particles:
                atom = cg_structure.addAtom('C', *particle.xyz)
                particle.defineAtom(cg_structure, atom.index)
                self.markParticleAtoms(atom.index, particle.parent_indices)

        # define bonding in coarse grain model
        afunc = lambda x: get_cg_particle_indices(self.astructure.atom[x])
        for particles in all_particles:
            for particle in particles:
                for a_idx_st, b_idx_st, order in particle.parent_cut_bonds:
                    a_idxs_cg, b_idxs_cg = list(map(afunc,
                                                    [a_idx_st, b_idx_st]))
                    if not a_idxs_cg or not b_idxs_cg:
                        # In case the atom was not assigned a CG map. Raise
                        # error only when unassigned are not allowed.
                        if allow_unassigned:
                            continue
                        else:
                            raise ValueError(
                                f'CG bond between atom {a_idx_st} and '
                                f'{b_idx_st} cannot be created as they are not '
                                'mapped.')
                    # for cases where atoms belong to multiple CG particles prevent
                    # bonding a CG particle to itself
                    a_idxs_cg = list(set(a_idxs_cg).difference(b_idxs_cg))
                    for a_idx_cg in a_idxs_cg:
                        for b_idx_cg in b_idxs_cg:
                            if not cg_structure.areBound(a_idx_cg, b_idx_cg):
                                cg_structure.addBond(a_idx_cg, b_idx_cg,
                                                     CG_BOND_ORDER)
        return cg_structure

[docs]    def getAssignedAndUnassigned(self, all_particles):
        """
        Get currently mapped (assigned) and upmapped (unassigned) atoms in the
        all-atom structure.

        :param list all_particles: list of lists of `Particles`, where each
            list corresponds to each mapping group

        :rtype: tuple(set, set)
        :return: tuple where first element is indexes of all-atom atoms that
            have been assigned mapping scheme and second are the atoms that
            have not been assigned any mapping scheme
        """

        unassigned_atoms = set(range(1, self.astructure.atom_total + 1))
        assigned_atoms = set()
        for particles in all_particles:
            assigned_atoms.update(
                msutils.flatten(particles,
                                afunc=lambda x: getattr(x, 'parent_indices')))
        unassigned_atoms.difference_update(assigned_atoms)
        return assigned_atoms, unassigned_atoms

[docs]    def categorizeBonds(self):
        """
        Return a dictionary of categorized bonds.

        :rtype: OrderedDict
        :return: sorted keys where keys are pair tuples of coarse grain
            particle names, values are OrderedDicts where keys are (SMILES,
            chiral_hash) tuples and values are lists of CoarseGrainBond
        """

        all_categorized_cg_bonds = OrderedDict()
        obj = coarsegrain.Internals(self.cg_structure)
        obj.setBonds()
        for (cg_name_1, cg_name_2), data in obj.bonds.items():

            categorized_cg_bonds = OrderedDict()
            for cg_idx_1, cg_idx_2 in data[0]:
                cg_bond = CoarseGrainBond(cg_name_1, cg_idx_1, cg_name_2,
                                          cg_idx_2)
                cg_bond.setParentData(self.cg_structure)
                cg_bond.setChiralHash(self.astructure)
                # The atomistic bond is examined for the bond directional properties
                # If atomistic bond has directional info, we pass it to the cg
                # bond, and skip the assigning bond direction stages.
                # If atomistic bond doesn't have directional info, we ask users
                # to define
                parent_bonds = cg_bond.parent_bonds
                # No bond direction if there is no parent bond and hence won't
                # be counted towards categorized_cg_bonds
                if not parent_bonds:
                    continue
                atomic_bond = self.astructure.getBond(*parent_bonds[0][:-1])
                for prop_key in [
                        coarsegrain.CG_UNIQUE_BOND_PARTICLE_1_LABEL_KEY,
                        coarsegrain.CG_UNIQUE_BOND_PARTICLE_2_LABEL_KEY
                ]:
                    value = atomic_bond.property.get(prop_key)
                    if not value:
                        # This cg_bond cannot find atomistic bond direction,
                        # and is counted toward categorized_cg_bonds
                        break
                    self.cg_structure.getBond(
                        cg_bond.idx_1, cg_bond.idx_2).property[prop_key] = value
                else:
                    # cg_bond sets bond direction based atomistic bond property
                    continue

                cg_bond.setParentBondsHash(self.astructure)

                if cg_bond.parent_bonds_hash and cg_bond.chiral_hash:
                    atuple = (cg_bond.parent_bonds_hash, cg_bond.chiral_hash)
                    categorized_cg_bonds.setdefault(atuple, []).append(cg_bond)

            # now dedup using SMILES which is necessary because when using
            # UNIQUE_BOND_DICT bonds may be categorized differently even though
            # they are in fact symmetrically equivalent, for example consider
            # (1) ala-arg-ala-arg, if entire residues are particles then there
            # are two types of ala-arg bonds if on the other hand the backbones
            # are particles then with UNIQUE_BOND_DICT there will still be two
            # types of ala(backbone)-arg(backbone) bonds even though they are
            # symmetrically equivalent, or (2) terpy ligand, with UNIQUE_BOND_DICT
            # there will be two types of phen-phen bonds even though they are
            # symmetrically equivalent, only the first occurrence, i.e. pairs[0],
            # needs to be considered
            f_categorized_cg_bonds = OrderedDict()
            for pairs in categorized_cg_bonds.values():
                pairs[0].setSmiles(self.astructure)
                atuple = (pairs[0].smiles, pairs[0].chiral_hash)
                f_categorized_cg_bonds.setdefault(atuple, []).extend(pairs)

            # only keep pairs with two or more distinct bonds, for those
            # define the default particle labels
            if len(f_categorized_cg_bonds) >= 2:
                for pairs in f_categorized_cg_bonds.values():
                    for pair in pairs:
                        pair.setDefaultParticleLabels(self.astructure)
                all_categorized_cg_bonds[(cg_name_1, cg_name_2)] = \
                    f_categorized_cg_bonds

        return all_categorized_cg_bonds


[docs]class CGMapper(CGMapperMixin):
    """
    Class create a mapped coarse grain model from an all-atom structure.
    """

    TYPE_MAX = 400

[docs]    def __init__(self, struct):
        """
        Initiate CGMapper class.

        :param structure.Structure struct: All-atom structure to be mapped to CG
        """
        self.astructure = struct

        synced_astructure = xtal.sync_pbc(self.astructure)
        if synced_astructure:
            self.astructure = synced_astructure

        if msutils.is_coarse_grain(self.astructure):
            raise TypeError('The structure is already a coarse-grained model.')

[docs]    def getMappedParticles(self):
        """
        Create `Particles` for the assigned mapping scheme.

        :rtype: list(Particles)
        :return: list of lists of `Particles`, where each list corresponds to
            each mapping group
        """

        particles = []
        for count, (aname, aindexes) in enumerate(self.definition_dict.items()):
            akey = coarsegrain.CG_PARTICLE_KEY % aname
            aatom_type = self.TYPE_MAX - count
            aparticles = []
            for indexes in aindexes:
                if not self.areContiguous(indexes):
                    raise NonContiguousBondingError()
                else:
                    aparticles.append(indexes)
            cur_particles = self.getParticles(aparticles, aname,
                                              MOLECULE_COLORS[count],
                                              aatom_type, akey)
            particles.append(cur_particles)
        return particles

[docs]    def validateAllAssigned(self, all_particles):
        """
        Check if all the atoms in the structure have been assigned mapping
        scheme.

        :param list all_particles: list of lists of `Particles`, where each
            list corresponds to each mapping group

        :raise: ValueError if all particle have not been assigned names
        """

        _, unassigned_atoms = self.getAssignedAndUnassigned(all_particles)
        num_unassigned_atoms = len(unassigned_atoms)
        if num_unassigned_atoms != 0:
            raise ValueError(f'There are {num_unassigned_atoms} atoms '
                             'unassigned.')

[docs]    def markCGBonds(self):
        """
        CG bonds that are mapped to same named atoms but resulted from different
        all-atom bonding environment will be marked as unique. Thus, resulting
        in individual parameters for each.
        """

        key_1 = coarsegrain.CG_UNIQUE_BOND_PARTICLE_1_LABEL_KEY
        key_2 = coarsegrain.CG_UNIQUE_BOND_PARTICLE_2_LABEL_KEY
        aformat = CoarseGrainBond.formatParticleLabel
        categorized_bonds = self.categorizeBonds()

        for name_pair, adict in categorized_bonds.items():
            for cg_bonds in adict.values():
                for cg_bond in cg_bonds:
                    abond = self.cg_structure.getBond(cg_bond.idx_1,
                                                      cg_bond.idx_2)
                    abond.property[key_1] = aformat(cg_bond.label_1)
                    abond.property[key_2] = aformat(cg_bond.label_2)

[docs]    def mapCGStructure(self, cg_names, allow_unassigned, unique_bonds):
        """
        Map the passed cg_names to a coarse grain structure and build it from
        the all-atom structure.

        :param list: list of cg_names to be mapped

        :param bool allow_unassigned: if True, not all atoms are required to be
            mapped to the CG model. In case of False a check would be done and
            ValueError will be raised if all atoms were not assigned mapping

        :param bool unique_bonds: if True CG bonds that are mapped to same named
            atoms but resulted from different all-atom bonding environment will
            be marked as unique. If False the bonds will be considered
            indistinguishable
        """

        definition_dict = self.getDefinitionDict()
        # Only take use cg_names in the definition
        self.definition_dict = {
            name: definition_dict[name] for name in cg_names
        }

        # Define Particles
        particles = self.getMappedParticles()
        if not allow_unassigned:
            self.validateAllAssigned(particles)

        # Build CG structure from particles
        cg_structure = self.buildCGFromParticles(particles, allow_unassigned)
        cg_structure.applyTubeStyle()
        title = self.astructure.title.strip()
        cg_structure.title = title + CG_TAG
        self.cg_structure = cg_structure

        # Categorized bonds
        if unique_bonds:
            self.markCGBonds()