Source code for schrodinger.application.phase.packages.ligand_aligner

"""
This module contains the LigandAligner class, which uses a best practices
approach to flexibly align a set of ligand structures. The basic procedure
is as follows:

1. A Bemis-Murcko scaffold analysis is performed, and structures are clustered
   according to their largest scaffold, where scaffolds are represented using
   bond orders but not elemental types (i.e., fuzzy scaffolds).
2. Clusters are sorted by decreasing scaffold size, and the structures within
   each cluster are sorted by decreasing size to establish a canonical order
   for all subsequent work. The very first structure in the canonical list is
   the primary reference, and subsequent structures in the list tend to be
   aligned to reference structures that appear earlier in the list.
3. A scaffold-based distance matrix is computed, where the i, j entry is the
   scaffold number (0, 1, etc.) of the largest scaffold that's shared by
   structures i and j. Larger scaffolds have lower scaffold numbers, so the
   scaffold number can be used as a distance. When constructing the matrix,
   embedded matching is employed (e.g., imidazole matches benzimidazole) in
   order to increase the size of the largest shared scaffold.
4. A minimum spanning tree is constructed from the distance matrix, with
   structures being added to the tree using the canonical order described in
   step 2. Each edge of the tree holds a structure, the reference to which it
   is to be aligned, and the largest scaffold they share. The ordered edges of
   this tree provide a prescription for sequentially aligning each structure
   to a previously aligned reference with which it shares the largest possible
   scaffold.
5. The ordered edges of the minimum spanning tree are traversed, with the
   applicable alignment method (see AlignMethod below) being used to find the
   best superposition of a given structure to its reference. Once a structure
   is aligned to its reference, the aligned structure replaces the original
   structure, and it is made available as a reference for subsequent structures
   in the tree.
6. The structures for which core snapping was successful are post-processed
   in an attempt to superimpose sidechains that are common to two or more
   structures.
7. The aligned structures are returned in the order they were provided.

If a particular structure is designated as the reference to which all other
structures are to be aligned, steps 1-5 are appropriately modified or skipped.
Furthermore, if a SMARTS-based core is designated or MCS-based cores are
requested, Bemis-Murcko scaffold detection is disabled and the indicated core
is used in lieu of the largest shared Bemis-Murcko scaffold. In the case of
MCS, bond orders and elemental types must match, ring atoms may match only
other ring atoms, and complete rings must be matched.

If refinement is requested, additional conformers are generated for each
structure that was successfully snapped to its reference, with core atoms and
snapped sidechains being held fixed. The resulting conformers are used to
systematically increase the average in-place shape similarity over all pairs
of structures, whether snapped or not. Conformers are not generated for the
the primary reference, a user designated reference, or any structure that
was not snapped to its reference.

Copyright Schrodinger LLC, All Rights Reserved.
"""

import copy
import itertools
from enum import Enum

import numpy
from rdkit import Chem
from rdkit.Chem import rdFMCS

from schrodinger import structure
from schrodinger.infra import canvas
from schrodinger.infra import fast3d
from schrodinger.infra import mm
from schrodinger.infra import phase
from schrodinger.infra.canvas import ChmMmctAdaptor
from schrodinger.rdkit import rdkit_adapter
from schrodinger.structutils import analyze
from schrodinger.utils import log

# Designation for using MCS-based cores:
MCS_CORES = "MCS"

# Conformational sampling method:
SAMPLING_METHODS = {
    phase.CONF_SAMPLE_COARSE_NAME: "basic",
    phase.CONF_SAMPLE_FINE_NAME: "default"
}

# Method used to align a particular structure to its reference:
AlignMethod = Enum("AlignMethod", "snap_core least_squares shape_based")
ALIGN_METHOD_DESCRIPTION = {
    AlignMethod.snap_core: "snapped core",
    AlignMethod.least_squares: "flexible least-squares core",
    AlignMethod.shape_based: "flexible shape-based"
}

# Note that the preferred method is to snap the core onto the reference and
# conformationally sample the region outside the core. If the core cannot be
# snapped because of a stereo failure, open ring failure, etc., the entire
# structure is conformationally sampled and each conformer is aligned using
# least-squares superposition of the core atoms. If the structure shares no
# scaffolds with its reference, fully-flexible shape-based alignment is done.
# The best alignment, regardless of method, is the superposition that yields
# the highest all-atom shape similarity with atomic overlaps differentiated by
# elemental type.

# Aligned structures contain the following properties:
INPUT_STRUCTURE_NUMBER_PROP = "i_phase_Input_Structure_Number"
OUTPUT_STRUCTURE_NUMBER_PROP = "i_phase_Structure_Number"
REFERENCE_STRUCTURE_PROP = "i_phase_Reference_Structure"
SHAPE_SIM_PROP = "r_phase_Similarity_to_Reference"
ALIGN_METHOD_PROP = "s_phase_Alignment_Method"
ALIGN_REASON_PROP = "s_phase_Alignment_Reason"
CORE_SMARTS_PROP = "s_phase_Core_SMARTS"

# Atom coordinate tolerance for detecting common snapped cores:
ATOM_COORD_TOL = 1.0e-4

# Tolerance for detecting distorted bonds:
DISTORTED_BOND_TOL = 0.5

# Default maximum number of conformers to generate:
DEFAULT_MAX_CONFS = 1000


[docs]class LigandAlignerOptions:
    """
    Holds options that control alignment of structures.
    """

[docs]    def __init__(self):

        # Whether to refine superpositons after the basic alignment procedure.
        self.refine = False

        # Whether to immediately fail with a RuntimeError if any structures
        # contain multiple fragments or no rings. If False, those structures
        # are quietly skipped.
        self.fail_on_bad = False

        # Whether to skip the post-processing step wherein chemically identical
        # sidechains are snapped onto one another in pairs of structures that
        # share a common snapped core.
        self.ignore_sidechains = False

        # Whether to align hydrogens on terminal rotatable heavy atoms (-CH3,
        # -NH2, -OH, etc.) if the heavy atoms are part of the core. If False,
        # those hydrogens will be aligned only if the conformational sampling
        # yields a torsion angle that superimposes them.
        self.align_terminal_hydrogens = True

        # Conformational sampling method. Empirical evidence suggests that
        # fast3d "basic" sampling, which corresponds to Phase coarse/rapid
        # sampling, tends to yield the most consistent superpositions.
        self.sampling_method = phase.CONF_SAMPLE_COARSE_NAME

        # Maximum number of conformers to generate for each input structure.
        # Must be > 0.
        self.max_confs = DEFAULT_MAX_CONFS

        # Whether to minimize conformers.
        self.minimize_confs = False

        # Whether to replace the primary reference structure with a sampled
        # fast3d conformer that yields the best shape-based superposition to
        # the input primary reference structure. This can sometimes result in
        # better consensus alignments since the primary reference structure
        # is then an actual fast3d conformer, and as such may produce better
        # superpositions with fast3d conformers generated for other structures.
        self.use_sampled_ref = False

        # Tolerance for detection of close contacts in snapped structures. A
        # tolerance of 0 turns off close contact detection.
        self.close_contact_tol = phase.DEFAULT_CLOSE_CONTACT_TOL


[docs]class LigandAligner:
    """
    Generates a consensus alignment for a set of ligands using the best
    practices approach described at the top of this module.
    """

[docs]    def __init__(self, options=None, logger=None):
        """
        Constructor taking options that control the alignment behavior and
        a logger for printed output.

        :param options: Alignment options. See also setOptions.
        :type options: LigandAlignerOptions

        :param logger: Logger for printed output. Informative messages about
                the alignment process are issued at the INFO level; additional
                messages triggered by core snapping failures are issed at the
                DEBUG level. See also setLogger.
        :type logger: Logger
        """

        self.setOptions(options or LigandAlignerOptions())
        self.setLogger(logger or log.get_output_logger(__file__))

[docs]    def align(self, sts, ref_index=None, core=""):
        """
        Aligns the supplied structures using core snapping, flexible
        least-squares core alignment, or flexible shape-based superposition.
        See setOptions to modify the default alignment behavior.

        :param sts: The structures to be aligned.
        :type sts: list(structure.Structure)

        :param ref_index: 1-based reference structure number. Overrides the
                basic algorithm that chooses reference structures and
                determines the order in which alignments are performed.
        :type ref_index: int

        :param core: If a reference structure is designated, core may be a
                SMARTS string that matches the reference structure, or it may
                be MCS_CORES.
        :type core: str

        :return: The aligned structures in the order they were provided, minus
                any fragmented structures or structures lacking rings.
        :rtype: list(structure.Structure)
        """

        if core and not ref_index:
            mesg = "Must designate a reference structure when specifying core"
            raise RuntimeError(mesg)

        # Initialize all data that will be created during the alignment
        # workflow.
        self._initAlignData(ref_index, core)

        # Filter out bad structures, create ChmMol objects and, if
        # applicable, rdkit.Mol objects for MCS.
        sts_filtered, self._mols = self._setupAlign(sts)
        if len(sts_filtered) < 2:
            raise RuntimeError("Must supply 2 or more valid structures")

        self._checkCore()

        # Align the valid structures.
        return self._align(sts_filtered)

[docs]    def getLogger(self):
        """
        Returns the logger used for output messages.

        :return: The logger.
        :rtype: Logger
        """

        return self._logger

[docs]    def getOptions(self):
        """
        Returns a copy of the options that control the alignment process and
        the level of printed output.

        :return: The options.
        :rtype: LigandAlignerOptions
        """

        return copy.copy(self._options)

[docs]    def setLogger(self, logger):
        """
        Sets the logger to be used for printing messages during the
        alignment process.

        :param logger: The logger.
        :type logger: Logger
        """

        self._logger = logger

[docs]    def setOptions(self, options):
        """
        Sets options that control the alignment process and the level of
        output printed.

        :param options: The options.
        :type options: LigandAlignerOptions
        """

        if options.max_confs < 1:
            raise ValueError("Maximum number of conformers must be > 0")

        mode = SAMPLING_METHODS[options.sampling_method]
        use_default_frag_lib = True
        custom_frag = []
        self._f3d = fast3d.Engine(mode, options.minimize_confs,
                                  use_default_frag_lib, custom_frag)
        self._f3d.setDesiredNumberOfConformers(options.max_confs)

        self._options = copy.copy(options)

    def _align(self, sts):
        """
        Aligns the supplied structures using core snapping, flexible
        least-squares core alignment, or flexible shape-based superposition.
        Structures are modified directly.

        :param sts: The structures to be aligned.
        :type sts: list(structure.Structure)

        :return: The aligned structures in the order they were provided.
        :rtype: list(structure.Structure)
        """

        if not self._core:
            self._logger.info(f"\nFinding scaffolds for {len(sts)} structures")
            finder = canvas.ChmScaffoldFinder(self._mols, False, False)
            self._scaffold_count = finder.getScaffoldCount()
            mesg = f"Total number of scaffolds = {self._scaffold_count}"
            self._logger.info(mesg)

            # Store fuzzy SMARTS for each scaffold.
            for i in range(self._scaffold_count):
                smarts = finder.getScaffold(i).getFuzzyScaffold(2)
                # As per PHASE-2111, allow any aromatic nitrogen to match any
                # other aromatic nitrogen to protect against inconsistent
                # assignment of tautomeric states.
                self._fuzzy_smarts.append(smarts.replace("[aH]", "[a]"))
        elif self._core == MCS_CORES:
            self._logger.info(f"\nFinding MCS cores for {len(sts)} structures")
            self._findMcsCores(sts)
        else:
            self._fuzzy_smarts = [self._core]

        if not self._ref_index:
            # Create minimum spanning tree whose edges determine the order in
            # which structures are aligned. The following prerequisite work is
            # also done:
            # 1. Clustering structures by largest scaffold.
            # 2. Generation of a canonical structure order.
            # 3. Construction of the scaffold-based distance matrix.
            self._createMinimumSpanningTree()
        else:
            # Create a tree that traverses the input structures in the order
            # provided, with the user-designated reference being first.
            self._createLinearSpanningTree()

        # Prepare primary reference structure.
        prim_ref = self._mst_edges[0][0]
        sts[prim_ref] = self._preparePrimaryRef(sts[prim_ref])

        # Perform alignments in the order prescribed by the edges of the
        # minimum/linear spanning tree.
        self._logger.info("")
        n = len(self._mst_edges)
        for i, edge in enumerate(self._mst_edges):
            stA = sts[edge[0]]
            stB = sts[edge[1]]
            smarts = ""
            dAB = edge[2]  # Index of largest shared scaffold or MCS
            if dAB != -1:
                smarts = self._fuzzy_smarts[dAB]
            progress = f"({i + 1} of {n})"
            mesg = f"Aligning {edge[1] + 1}-->{edge[0] + 1} {progress}"
            if smarts:
                mesg += f": Core SMARTS = {smarts}"
            self._logger.info(mesg)
            stB.property[REFERENCE_STRUCTURE_PROP] = edge[0] + 1
            stB.property[CORE_SMARTS_PROP] = smarts
            stB_aligned = self._getBestAlignment(stA, stB, smarts)
            shape_sim = stB_aligned.property[SHAPE_SIM_PROP]
            self._logger.info("  Shape similarity = %.6f" % shape_sim)
            sts[edge[1]] = stB_aligned

        if not self._options.ignore_sidechains:
            sts = self._snapSideChains(sts)
        if self._options.refine:
            sts = self._refineAlignments(sts)

        return sts

    def _alignConformers(self,
                         stA,
                         stB,
                         stB_confs,
                         shape,
                         core_mapping=None,
                         append_confs=True):
        """
        Finds the best superposition of the provided conformers of stB onto
        stA using shape similarity. If core_mapping is provided, the B->A
        mappings therein will be used to perform a least-squares alignment)
        of the applicable core atoms. Otherwise, conformers will be aligned
        using the standard PhpFastShape algorithm. Returns the highest shape
        similarity found and the associated aligned conformer of stB.

        :param stA: The fixed reference structure.
        :type stA: structure.Structure

        :param stB: The structure to be aligned onto stA.
        :type stB: structure.Structure

        :param stB_confs: fast3d conformers of stB. Each conformer is a tuple
                whose second element contains the atomic coordinates as a
                linear array of length 3 * num_atoms.
        :type stB_confs: list(fast3d.Conformation)

        :param shape: PhpFastShape object with stA as the reference.
        :type shape: phase.PhpFastShape

        :param core_mapping: Pairs of 1-based B->A core atom mappings.
        :type core_mapping: list((int, int))

        :param append_confs: If True, stB and stB_confs are screened.
                Otherwise, only stB_confs are screened.
        :type append_confs: bool

        :return: Highest shape similarity and associated aligned structure.
        :rtype: float, structure.Structure
        """

        mappingA = None
        mappingB = None
        inplace = False
        if core_mapping:
            mappingB, mappingA = zip(*core_mapping)
            inplace = True
        result_best = [0.0]
        stB_copy = stB.copy()
        stB_best = None
        first_conf = True
        # Prepending [None] to stB_confs allows incoming stB coordinates to
        # be used.
        for stB_conf in [None] + stB_confs:
            if stB_conf:
                # Note that the shape of stB_conf[1] prevents it from being
                # supplied directly to stB_copy.setXYZ.
                mm.mmct_ct_set_all_xyz(stB_copy, stB_conf[1])
            elif not append_confs:
                continue
            if core_mapping:
                phase.align_least_squares(stA, mappingA, stB_copy, mappingB,
                                          False)
                if self._options.align_terminal_hydrogens:
                    phase.align_terminal_hydrogens(stA, stB_copy, core_mapping)
            result = shape.computeShapeSimPy(stB_copy, first_conf, inplace)
            first_conf = False
            if result[0] > result_best[0]:
                result_best = list(result)
                stB_best = stB_copy.copy()
        if not inplace:
            shape.alignCtPy(stB_best.handle, result_best)

        stB_best.property[SHAPE_SIM_PROP] = result_best[0]
        return result_best[0], stB_best

    def _alignLeastSquares(self, stA, stB, core_mappings):
        """
        Generates conformers for stB, with stB->stA least-squares alignment
        on the core atoms, and finds the conformer that yields the highest
        shape similarity to the reference structure stA.

        :param stA: The fixed reference structure.
        :type stA: structure.Structure

        :param stB: The structure to be aligned onto stA.
        :type stB: structure.Structure

        :param core_mappings: Lists of 1-based B-->A atom mappings that define
                the various ways the core of stB is to be aligned to stA.
        :type core_mappings: list(list((int, int)))

        :return: Best alignment over all generated conformers of stB.
        :rtype: structure.Structure
        """

        shape = self._getShape(stA)
        stB_confs = self._f3d.run(stB)
        sim_best = 0.0
        stB_aligned_best = None
        core_mapping_best = None
        for core_mapping in core_mappings:
            sim, stB_aligned = self._alignConformers(stA, stB, stB_confs, shape,
                                                     core_mapping)

            if sim > sim_best:
                sim_best = sim
                stB_aligned_best = stB_aligned
                core_mapping_best = list(core_mapping)

        st_number = stB.property[OUTPUT_STRUCTURE_NUMBER_PROP] - 1
        self._least_squares_core_mappings[st_number] = core_mapping_best

        return stB_aligned_best

    def _alignShapes(self, stA, stB, append_confs=True):
        """
        Generates conformers for stB and finds the conformer that yields
        the best shape-based alignment to the reference structure stA.

        :param stA: The fixed reference structure.
        :type stA: structure.Structure

        :param stB: The structure to be aligned onto stA.
        :type stB: structure.Structure

        :param append_confs: If True, stB and the generated conformers are
                screened. Otherwise, only the conformers are screened.
        :type append_confs: bool

        :return: Best alignment over all generated conformers of stB.
        :rtype: structure.Structure
        """

        shape = self._getShape(stA)
        return self._alignConformers(stA,
                                     stB,
                                     self._f3d.run(stB),
                                     shape,
                                     append_confs=append_confs)[1]

    def _alignSnappedCore(self, stA, snappedB, core_mappings):
        """
        Generates conformers for each snapped structure in snappedB, keeping
        the core atoms frozen, and finds the conformer that yields the highest
        shape similarity to the reference structure stA. If refinement is
        turned on, this function also fills self._frozen_refinement_atoms with
        a first approximation of the atoms to hold fixed during refinement. The
        sets are typically expanded as rigid regions are grown when snapping
        sidechains.

        :param stA: The fixed reference structure.
        :type stA: structure.Structure

        :param snappedB: The snapped structures to be aligned onto stA.
        :type snappedB: list(structure.Structure)

        :param core_mappings: 1-based B->A core atom mappings for each
                structure in snappedB.
        :type core_mappings: list(list((int, int)))

        :return: Best alignment over all generated conformers of snappedB.
        :rtype: structure.Structure
        """

        shape = self._getShape(stA)
        frozen_prop = fast3d.Engine.FROZEN_ATOMS_PROPERTY_NAME
        # Note: The presence of frozen_prop signals that the core has been
        # snapped and should be held frozen. The core still needs to be
        # realigned, however, because fast3d doesn't keep the conformers
        # in the original frame of reference.
        sim_best = 0.0
        stB_aligned_best = None
        core_mapping_best = None
        for stB, core_mapping in zip(snappedB, core_mappings):
            frozen_atoms = self._getFrozenAtoms(stB, core_mapping)
            frozen_atoms_string = " ".join(str(i) for i in frozen_atoms)
            stB_copy = stB.copy()
            stB_copy.property[frozen_prop] = frozen_atoms_string
            stB_confs = self._f3d.run(stB_copy)
            if frozen_prop in stB_copy.property:
                del stB_copy.property[frozen_prop]
            sim, stB_aligned = self._alignConformers(stA, stB_copy, stB_confs,
                                                     shape, core_mapping)
            if sim > sim_best:
                sim_best = sim
                stB_aligned_best = stB_aligned
                core_mapping_best = list(core_mapping)

        st_number = stB.property[OUTPUT_STRUCTURE_NUMBER_PROP] - 1
        if not self._options.ignore_sidechains:
            # Store best mapping for use in common core construction when
            # snapping identical sidechains.
            self._snapped_core_mappings[st_number] = core_mapping_best

        if self._options.refine:
            self._frozen_refinement_atoms[st_number] = set(
                self._getFrozenAtoms(stB, core_mapping_best))

        return stB_aligned_best

    def _badCore(self, st1, st2, core1, core2):
        """
        Returns True if core1 is too small or either core is disconnected.

        :param st1: First structure.
        :type st1: structure.Structure

        :param st2: Second structure.
        :type st2: structure.Structure

        :param core1: 1-based core atom numbers in first structure.
        :type core1: list(int)

        :param core2: 1-based core atom numbers in second structure.
        :type core2: list(int)

        :return: True if core is bad.
        :rtype: bool
        """

        if len(core1) < 3:
            return True
        if not phase.is_connected_subgraph(st1.handle, core1):
            return True
        if not phase.is_connected_subgraph(st2.handle, core2):
            return True

        return False

    def _checkCore(self):
        """
        Raises a RuntimeError if a user-supplied core SMARTS doesn't match
        the user-designated reference structure.
        """

        smarts = self._core
        if smarts and smarts != MCS_CORES:
            mol = self._mols[self._ref_index - 1]
            query = canvas.ChmQuery(smarts)
            matcher = canvas.ChmQueryMatcher(True)
            if not matcher.hasMatch(query, mol):
                mesg = f'SMARTS "{smarts}" does not match reference structure'
                raise RuntimeError(mesg)

    def _clusterByScaffold(self):
        """
        Clusters structures according to their largest scaffold.
        """

        self._logger.info("\nClustering structures by largest scaffold")

        self._storeFuzzyScaffoldsInMol()

        already_clustered = set()
        nmol = len(self._mols)
        for iscaff in range(self._scaffold_count):
            source_structures = []
            for jmol in range(nmol):
                if jmol not in already_clustered:
                    if iscaff in self._scaffolds_in_mol[jmol]:
                        source_structures.append(jmol)
                        already_clustered.add(jmol)
            if source_structures:
                self._clusters[iscaff] = source_structures

        icluster = 0
        for iscaff, source_structures in self._clusters.items():
            icluster += 1
            smarts = self._fuzzy_smarts[iscaff]
            ss_string = ", ".join(str(i + 1) for i in source_structures)
            self._logger.info(f"Cluster {icluster}:")
            self._logger.info(f"  Scaffold SMARTS = {smarts}")
            self._logger.info(f"  Structures: {ss_string}")

    def _createLinearSpanningTree(self):
        """
        Creates a surrogate for the minimum spanning tree when a reference
        structure has been designated. That structure will appear in every
        edge of the tree, and the order of the edges will follow the input
        order of the other ligands.
        """

        nmol = len(self._mols)
        ref_pos = self._ref_index - 1
        self._canonical_order = [ref_pos]
        for i in range(nmol):
            if i != ref_pos:
                self._canonical_order.append(i)

        if self._core:
            if self._core == MCS_CORES:
                dmatrix = self._getMcsMatrix()
                self._scaffold_count = nmol
            else:
                dmatrix = self._getSmartsMatrix()
                self._scaffold_count = 1
        else:
            self._storeFuzzyScaffoldsInMol()
            dmatrix = self._getScaffoldMatrix()

        for i in range(1, nmol):
            ipos = self._canonical_order[i]
            dij = dmatrix[ipos][ref_pos]
            if dij == self._scaffold_count:
                # No shared scaffold/SMARTS.
                dij = -1
            self._mst_edges.append([ref_pos, ipos, dij])

    def _createMinimumSpanningTree(self):
        """
        Creates a minimum spanning tree using Prim's algorithm.

        The tree consists of edges [i, j, dij], where i and j are 0-based
        structure numbers, and dij is the 0-based scaffold number of the
        largest scaffold shared by the two structures. If they share no
        scaffolds, dij will be set to -1.
        """

        # Cluster compounds by the largest scaffold they contain.
        self._clusterByScaffold()

        # Sort structures by cluster, and then by decreasing structure
        # size within each cluster. Clusters are already ordered by
        # decreasing size of the associated scaffold.
        size_ranks1 = [-mol.getHeavyAtomCount() for mol in self._mols]
        size_ranks2 = [-mol.getAtomCount() for mol in self._mols]
        size_ranks = list(zip(size_ranks1, size_ranks2))
        ranks = []
        for iscaff, source_structures in self._clusters.items():
            for imol in source_structures:
                ranks.append((iscaff, size_ranks[imol], imol))
        self._canonical_order = [row[2] for row in sorted(ranks)]

        dmatrix = self._getScaffoldMatrix()
        nmol = len(self._mols)
        in_mst = nmol * [0]

        # Start with primary reference structure and build edges of mst by
        # iteratively adding the closest structure not already in the mst.
        primary_ref = self._canonical_order[0]
        in_mst[primary_ref] = 1
        mst_size = 1
        big_dist = self._scaffold_count + 1
        while mst_size < nmol:
            dmin = big_dist
            imin = None
            jmin = None
            # Always follow canonical order when adding edges so that larger
            # structures with larger scaffolds tend to appear earlier in tree.
            for i in self._canonical_order:
                if in_mst[i]:
                    for j in self._canonical_order:
                        if not in_mst[j] and dmatrix[i][j] < dmin:
                            dmin = dmatrix[i][j]
                            imin = i
                            jmin = j
            in_mst[jmin] = 1
            mst_size += 1
            self._mst_edges.append([imin, jmin, dmin])

        # Correct edges that correspond to no shared scaffold. These are the
        # edges for which flexible shape-based alignment must be done.
        for edge in self._mst_edges:
            if edge[2] == self._scaffold_count:
                edge[0] = primary_ref
                edge[2] = -1

    def _failsRDKitConversion(self, st):
        """
        Returns a non-blank error message if the supplied structure cannot
        be converted to an RDKit Mol. Returns an empty string otherwise.

        :param st: The structure to be checked
        :type st: structure.Structure

        :return: Error message or empty string
        :rtype: str
        """

        try:
            rdkit_adapter.to_rdkit(st, implicitH=True)
        except Exception as e:
            return str(e)

        return ''

    def _filterMatches(self, matches, filter):
        """
        Returns the rows in matches that contain the exact same atom numbers
        as in filter.

        :param matches: Lists of 1-based atom numbers.
        :type matches: list(list(int))

        :param filter: List of 1-based atom numbers.
        :type filter: list(int)

        :return: Matches containing the same atom numbers as in filter.
        :rtype: list(list(int))
        """

        filter_set = set(filter)
        n = len(filter_set)
        filtered_matches = []
        for match in matches:
            if len(filter_set.intersection(match)) == n:
                filtered_matches.append(match)

        return filtered_matches

    def _findMcsCores(self, sts):
        """
        Finds the MCS between the reference structure and each of the
        supplied structures and stores the smarts in self._fuzzy_smarts,
        in the same order as sts. An empty string is stored for the
        reference structure as well as for any structure that fails
        to yield an MCS of at least 3 atoms.

        :param sts: The structures to be aligned.
        :type sts: list(structure.Structure)
        """

        ref_pos = self._ref_index - 1
        rdkit_mol_ref = rdkit_adapter.to_rdkit(sts[ref_pos], implicitH=True)
        chm_adaptor = canvas.ChmMmctAdaptor()
        stereo = canvas.ChmMmctAdaptor.StereoFromAnnotation_Safe
        chm_mol_ref = chm_adaptor.create(sts[ref_pos], stereo)
        chm_matcher = canvas.ChmQueryMatcher(True)
        for i, st in enumerate(sts):
            if i == ref_pos:
                self._fuzzy_smarts.append("")
            else:
                rdkit_mol = rdkit_adapter.to_rdkit(st, implicitH=True)
                smarts = rdFMCS.FindMCS(
                    [rdkit_mol, rdkit_mol_ref],
                    ringMatchesRingOnly=True,
                    completeRingsOnly=True,
                    atomCompare=Chem.rdFMCS.AtomCompare.CompareElements,
                    bondCompare=Chem.rdFMCS.BondCompare.CompareOrder,
                    ringCompare=Chem.rdFMCS.RingCompare.StrictRingFusion,
                    timeout=10).smartsString
                # Ensure that the RDKit SMARTS is legal and matches at least
                # 3 atoms in the equivalent ChmMol, so that we know it's safe
                # to use the SMARTS for subsequent Canvas-based work.
                chm_mol = chm_adaptor.create(st, stereo)
                match_size = 0
                try:
                    chm_query = canvas.ChmQuery(smarts)
                    for match in chm_matcher.match(chm_query, chm_mol):
                        match_size = match.getMatchedAtoms().size()
                        break
                except:
                    pass
                if match_size < 3:
                    smarts = ""
                self._fuzzy_smarts.append(smarts)

    def _getAlignCoreOptions(self, snapping_sidechains=False):
        """
        Creates options for snapping core atoms. By default, close contacts,
        open rings, and changes in stereochemistry are prohibited, but a
        PhpException will not be thrown in the event of these failures, so that
        mappings can be retrieved for least-squares alignment of the core. If
        snapping_sidechains is True, open rings will be allowed, and the other
        types of failures will trigger an exception. Open rings are allowed
        when snapping sidechains because only the sidechain atoms will move,
        so it doesn't matter if a given ring is not fully mapped.

        :param snapping_sidechains: Whether sidechains are being snapped.
        :type snapping_sidechains: bool

        :return: Core snapping options.
        :rtype: phase.PhpAlignCoreOptions
        """

        options = phase.PhpAlignCoreOptions()
        options.close_contact_tol = self._options.close_contact_tol
        if not snapping_sidechains:
            options.open_ring_policy = phase.PhpOpenRingPolicy_SKIP
            options.stereo_change_action = phase.PhpStereoChangeAction_SAVE_MAPPINGS
            options.close_contact_action = phase.PhpCloseContactAction_SAVE_MAPPINGS
            options.open_ring_action = phase.PhpOpenRingAction_SAVE_MAPPINGS

        return options

    def _getAnchorAtoms(self, st, ring_atoms):
        """
        Returns a set containing 1-based atom numbers for all atoms from
        which a sidechain is allowed to be grown. This consists of ring
        atoms bonded to exactly one exocyclic heavy atom.

        :param st: Structure whose anchor atoms are sought.
        :type st: structure.Structure

        :param ring_atoms: 1-based set of all ring atom numbers in structure.
        :type ring_atoms: set(int)

        :return: Anchor atom numbers.
        :rtype: set(int)
        """

        anchor_atoms = set()
        for ring_atom in ring_atoms:
            num_exocyclic = 0
            for neighbor in st.atom[ring_atom].bonded_atoms:
                if neighbor.atomic_number != 1 and neighbor.index not in ring_atoms:
                    num_exocyclic += 1
            if num_exocyclic == 1:
                anchor_atoms.add(ring_atom)

        return anchor_atoms

    def _getBestAlignment(self, stA, stB, smarts=""):
        """
        Finds the best flexible alignment of stB onto stB using one of the
        following treatments of stB:
        (1) Core snapping with flexible treatment beyond core.
        (2) Full flexible treatment with least-squares alignment of core.
        (3) Full flexible treatment with elemental shape-based alignment.

        Method 1 is used if a SMARTS is provided and the core can be snapped
        with no errors. Method 2 is used if core snapping results in errors.
        Method 3 is used if no SMARTS is provided.

        :param stA: The fixed reference structure
        :type stA: structure.Structure

        :param stB: The structure to be aligned onto stA
        :type stB: structure.Structure

        :param smarts: SMARTS string that represents the core
        :type smarts: str

        :return: Aligned version of stB.
        :rtype: structure.Structure
        """

        align_method = AlignMethod.snap_core
        align_method_descr = ALIGN_METHOD_DESCRIPTION[align_method]
        align_reason = ""
        snappedB = []
        core_mappings = []
        if smarts:
            align_core_options = self._getAlignCoreOptions()
            try:
                aligner = phase.PhpAlignCore(stA.handle, stB.handle, smarts,
                                             align_core_options)
                # We've asked that mappings be saved even if snapping fails.
                core_mappings = aligner.getMappings()
                # Check for failures.
                if aligner.closeContactFailure():
                    align_reason = "creation of a close contact"
                elif aligner.openRingFailure():
                    align_reason = "open ring in core mapping"
                elif aligner.stereoFailure():
                    align_reason = "stereochemistry change"
                if align_reason:
                    align_method = AlignMethod.least_squares
                else:
                    # Get snapped core alignment for each mapping.
                    snappedB = [
                        structure.Structure(ct) for ct in aligner.getAlignedB()
                    ]
                    # Keep only alignments without distorted bonds.
                    align_keep = self._removeDistoredStructures(stB, snappedB)
                    if align_keep:
                        snappedB = [snappedB[i] for i in align_keep]
                        core_mappings = [core_mappings[i] for i in align_keep]
                    else:
                        align_method = AlignMethod.least_squares
                        align_reason = "distorted bond in snapped structure"
            except phase.PhpException as exc:
                # An unknown type of failure. We don't have mappings, so we
                # have to do flexible shape-based alignment.
                self._logger.debug(f"Core snapping failed: {exc}")
                align_method = AlignMethod.shape_based
                align_reason = "unknown core snapping failure"
        else:
            # No core SMARTS available for this pair.
            align_method = AlignMethod.shape_based
            if not self._core:
                align_reason = "no shared scaffolds"
            elif self._core == MCS_CORES:
                align_reason = "no MCS with reference"
            else:
                align_reason = "no SMARTS matches"

        if align_reason:
            align_method_descr = ALIGN_METHOD_DESCRIPTION[align_method]
            mesg = "Performing {} alignment due to {}"
            self._logger.info(mesg.format(align_method_descr, align_reason))

        stB_aligned = None
        if align_method == AlignMethod.snap_core:
            stB_aligned = self._alignSnappedCore(stA, snappedB, core_mappings)
        elif align_method == AlignMethod.least_squares:
            stB_aligned = self._alignLeastSquares(stA, stB, core_mappings)
        else:
            stB_aligned = self._alignShapes(stA, stB)

        stB_aligned.property[ALIGN_METHOD_PROP] = align_method_descr
        stB_aligned.property[ALIGN_REASON_PROP] = align_reason
        return stB_aligned

    def _getFrozenAtoms(self, stB, core_mapping):
        """
        Creates a 1-based list of atoms in stB to be held frozen while
        generating conformers.

        :param stB: Structure containing atoms to freeze.
        :type stB: structure.Structure

        :param core_mapping: Pairs of 1-based B->A core atom mappings.
        :type core_mapping: list(int, int))

        :return: List of frozen atoms.
        :rtype: list(int)
        """

        frozen_atoms = [atom_pair[0] for atom_pair in core_mapping]
        if not self._options.minimize_confs:
            # Expand list to include terminal atoms attached to frozen_atoms
            # because they may end up with weird placement otherwise.
            frozen_set = set(frozen_atoms)
            n = len(frozen_atoms)
            for i in range(n):
                for neighbor in stB.atom[frozen_atoms[i]].bonded_atoms:
                    if neighbor.bond_total == 1 and neighbor not in frozen_set:
                        frozen_atoms.append(neighbor.index)

        return frozen_atoms

    def _getMcsMatrix(self):
        """
        Surrogate for _getScaffoldMatrix when MCS-based cores are in use.
        The [i][ref] and [ref][i] elements are set to i if structure i
        yielded an MCS with the reference structure. Other elements are
        set to -1.

        :return: n by n matrix with values as described above.
        :rtype: numpy.ndarray
        """

        nmol = len(self._mols)
        mcs_matrix = numpy.full((nmol, nmol), -1)
        ref_pos = self._ref_index - 1
        for i in range(nmol):
            if self._fuzzy_smarts[i]:
                mcs_matrix[i][ref_pos] = i
                mcs_matrix[ref_pos][i] = i

        return mcs_matrix

    def _getScaffoldMatrix(self):
        """
        Determines the largest scaffold shared by each pair of structures. This
        can be thought of as a distance matrix because a value of 0 means that
        the two structures share the largest identified scaffold and thus share
        as much of their scaffold backbone as possible. Larger scaffold numbers
        correspond to progressively smaller shared scaffolds and hence larger
        distances. A distance of self._scaffold_count indicates the structures
        share no scaffolds.

        :return: n by n matrix with scaffold numbers in off-diagonal positions,
                where n is the number of structures.
        :rtype: numpy.ndarray
        """

        nmol = len(self._mols)
        nscaff = self._scaffold_count
        scaffold_matrix = numpy.full((nmol, nmol), nscaff)
        for i, scaffolds_i in enumerate(self._scaffolds_in_mol):
            if scaffolds_i:
                scaffold_matrix[i][i] = min(scaffolds_i)
                for j in range(i):
                    scaffolds_j = self._scaffolds_in_mol[j]
                    shared_ij = scaffolds_i.intersection(scaffolds_j)
                    if shared_ij:
                        dij = min(shared_ij)
                        scaffold_matrix[i][j] = dij
                        scaffold_matrix[j][i] = dij

        return scaffold_matrix

    def _getSmartsMatrix(self):
        """
        Surrogate for _getScaffoldMatrix when a core SMARTS has been supplied.
        Off-diagonal elements are set to 0 if one of the structures is the
        reference and the other structure matches the core SMARTS. Other
        elements are set to 1.

        :return: n by n matrix with 0/1 values as described above, where n is
                the number of structures.
        :rtype: numpy.ndarray
        """

        nmol = len(self._mols)
        smarts_matrix = numpy.ones((nmol, nmol), int)
        query = canvas.ChmQuery(self._core)
        matcher = canvas.ChmQueryMatcher(True)
        ref_pos = self._ref_index - 1
        for i, mol in enumerate(self._mols):
            if i != ref_pos:
                if matcher.hasMatch(query, mol):
                    smarts_matrix[i][ref_pos] = 0
                    smarts_matrix[ref_pos][i] = 0

        return smarts_matrix

    def _getShape(self, st_ref):
        """
        Creates a PhpFastShape object to score alignments against the
        provided reference structure.

        :param st_ref: Reference structure.
        :type st_ref: structure.Structure

        :return: PhpFastShape object.
        :rtype: phase.PhpFastShape
        """

        atom_types = "element"
        hydrogens = phase.HYDROGENS_ALL
        weight_prop = ""
        return phase.PhpFastShape(st_ref.handle, hydrogens, atom_types,
                                  weight_prop)

    def _getSideChain(self, anchor_atom, st, ring_atoms):
        """
        Retrieves a sidechain connected to the indicated anchor atom from
        a stored dictionary or grows it from scratch. The returned list of
        sidechain atoms will be [-1] if a chain cannot be grown or if the
        chain has been removed from the pool because it was already snapped.

        :param anchor_atom: 1-based atom number from which the sidechain
                originates. This atom is not included in the sidechain.
        :type anchor_atom: int

        :param st: The structure.
        :type st: structure.Structure

        :param ring_atoms: 1-based set of ring atom numbers in the structure.
        :type ring_atoms: set(int)

        :return: 1-based sidechain atom numbers followed by SMARTS.
        :rtype: list(int), str
        """

        st_number = st.property[OUTPUT_STRUCTURE_NUMBER_PROP] - 1
        atoms = self._sidechain_atoms[st_number][anchor_atom]
        smarts = self._sidechain_smarts[st_number][anchor_atom]
        if not atoms:
            atoms, smarts = self._growSideChain(st, anchor_atom, ring_atoms)
            if not atoms:
                atoms = [-1]
            self._sidechain_atoms[st_number][anchor_atom] = atoms
            self._sidechain_smarts[st_number][anchor_atom] = smarts

        return atoms, smarts

    def _growCore(self, st_i, st_j, core_i, core_j):
        """
        Attempts to expand the provided lists of core atoms by adding pairs
        of heavy atoms which have the same coordinates and which are bonded
        to atoms already in the core. The lists of core atoms are modified
        directly.

        :param st_i: Structure associated with core_i.
        :type st_i: structure.Structure

        :param st_j: Structure associated with core_j.
        :type st_j: structure.Structure

        :param core_i: 1-based list of core atom numbers in st_i.
        :type core_i: list(int)

        :param core_j: 1-based list of core atom numbers in st_j.
        :type core_j: list(int)
        """

        excluded_i = (st_i.atom_total + 1) * [0]
        for i in core_i:
            excluded_i[i] = 1
        excluded_j = (st_j.atom_total + 1) * [0]
        for j in core_j:
            excluded_j[j] = 1
        while True:
            # Look for atoms with the same coordinates bonded to current core.
            len_prev = len(core_i)
            for atom_pos in range(len_prev):
                atom_i = core_i[atom_pos]
                atom_j = core_j[atom_pos]
                for bi in st_i.atom[atom_i].bonded_atoms:
                    if excluded_i[bi.index] or bi.atomic_number == 1:
                        continue
                    xyzi = (bi.x, bi.y, bi.z)
                    for bj in st_j.atom[atom_j].bonded_atoms:
                        if excluded_j[bj.index] or bj.atomic_number == 1:
                            continue
                        xyzj = (bj.x, bj.y, bj.z)
                        match = True
                        for coord_i, coord_j in zip(xyzi, xyzj):
                            if abs(coord_i - coord_j) > ATOM_COORD_TOL:
                                match = False
                                break
                        if match:
                            core_i.append(bi.index)
                            core_j.append(bj.index)
                            excluded_i[bi.index] = 1
                            excluded_j[bj.index] = 1
            if len(core_i) == len_prev:
                break

    def _growSideChain(self, st, anchor_atom, ring_atoms):
        """
        Attempts to grow a sidechain attached to anchor_atom, which is assumed
        to be a ring atom in the core with exactly one attached exocyclic heavy
        atom. Returns the list of heavy atoms in the sidechain followed by the
        the corresponding SMARTS string. An empty list and string are returned
        if the chain leads to another ring.

        :param st: Structure for which the chain is to be grown.
        :type st: structure.Structure

        :param anchor_atom: 1-based atom number of chain attachment point.
        :type anchor_atom: int

        :param ring_atoms: 1-based set of all ring atom numbers in structure.
        :type ring_atoms: set(int)

        :return: 1-based atom numbers and SMARTS string for sidechain.
        :rtype: list(int), str
        """

        excluded = (st.atom_total + 1) * [0]
        excluded[anchor_atom] = 1
        sidechain_atoms = []
        # Find first atom in sidechain.
        for neighbor in st.atom[anchor_atom].bonded_atoms:
            index = neighbor.index
            if index not in ring_atoms and neighbor.atomic_number != 1:
                sidechain_atoms.append(neighbor)
            excluded[index] = 1

        # Grow sidechain from first atom, traversing all branches.
        while True:
            len_prev = len(sidechain_atoms)
            for i in range(len_prev):
                atom = sidechain_atoms[i]
                for neighbor in atom.bonded_atoms:
                    index = neighbor.index
                    if not excluded[index]:
                        if index in ring_atoms:
                            return [], ""
                        elif neighbor.atomic_number != 1:
                            sidechain_atoms.append(neighbor)
                            excluded[index] = 1
            if len(sidechain_atoms) == len_prev:
                break

        smarts = analyze.generate_smarts_canvas(st, sidechain_atoms)
        return [atom.index for atom in sidechain_atoms], smarts

    def _initAlignData(self, ref_index, core):
        """
        Initializes all member level data that gets regenerated with each call
        to the align() function.

        :param ref_index: 1-based reference structure number.
        :type ref_index: int

        :param core: A SMARTS string, MCS_CORES or empty.
        :type core: str
        """

        self._ref_index = ref_index
        self._core = core
        self._anchor_atom_sets = []
        self._clusters = {}
        self._fuzzy_smarts = []
        self._mols = []
        self._mst_edges = []
        self._ring_atom_sets = []
        self._scaffold_count = 0
        self._scaffolds_in_mol = []
        self._sidechain_atoms = []
        self._sidechain_smarts = []
        self._snapped_core_mappings = {}
        self._least_squares_core_mappings = {}
        self._ring_atom_sets = []
        self._frozen_refinement_atoms = {}

    def _initSideChains(self):
        """
        Initializes dictionaries of sidechain atoms and SMARTS that are used
        to avoid repetitive re-growing of sidechains from the same anchor
        atoms.
        """

        # Loop over anchor atoms in each structure.
        for anchor_atoms in self._anchor_atom_sets:
            sidechain_atoms = {}
            sidechain_smarts = {}
            for atom in anchor_atoms:
                sidechain_atoms[atom] = []
                sidechain_smarts[atom] = ""
            self._sidechain_atoms.append(sidechain_atoms)
            self._sidechain_smarts.append(sidechain_smarts)

    def _matchCoreAtoms(self, st_i, st_j):
        """
        Identifies the common core for a pair of structures from coordinates
        of snapped atoms.

        :param st_i: First structure in pair.
        :type st_i: structure.Structure

        :param st_j Second structure in pair.
        :type st_j: structure.Structure

        :return: Lists of 1-based core atom numbers for structures i and j.
        :rtype: list(int), list(int)
        """

        i = st_i.property[OUTPUT_STRUCTURE_NUMBER_PROP] - 1
        j = st_j.property[OUTPUT_STRUCTURE_NUMBER_PROP] - 1
        core_atoms_i = []
        core_atoms_j = []
        if self._core_atom_coords[i] and self._core_atom_coords[j]:
            for row_i in self._core_atom_coords[i]:
                for row_j in self._core_atom_coords[j]:
                    match = True
                    for k in range(1, 4):
                        if abs(row_i[k] - row_j[k]) > ATOM_COORD_TOL:
                            match = False
                            break
                    if match:
                        core_atoms_i.append(row_i[0])
                        core_atoms_j.append(row_j[0])
            if core_atoms_i:
                # Try to add coincident atoms that lie outside the defined
                # cores.
                self._growCore(st_i, st_j, core_atoms_i, core_atoms_j)

        return core_atoms_i, core_atoms_j

    def _matchSideChains(self, stA, stB, anchorA, anchorB):
        """
        Returns all B-->A atom mappings for the sidechains attached to the
        indicated anchor atoms. No mappings are returned if the side chains
        don't exist or they are not equivalent.

        :param stA: The fixed reference structure.
        :type stA: structure.Structure

        :param stB: The candidate structure to be aligned to the reference.
        :type stB: structure.Structure

        :param anchorA: 1-based atom number of anchor atom in stA.
        :type anchorA: int

        :param anchorB: 1-based atom number of anchor atom in stB.
        :type anchorB: int

        :return: lists of B-->A atom mappings.
        :rtype: list(list((int, int)))
        """

        ringsA = self._ring_atom_sets[stA.property[OUTPUT_STRUCTURE_NUMBER_PROP]
                                      - 1]
        atomsA, smartsA = self._getSideChain(anchorA, stA, ringsA)
        if atomsA == [-1]:
            return []

        ringsB = self._ring_atom_sets[stB.property[OUTPUT_STRUCTURE_NUMBER_PROP]
                                      - 1]
        atomsB, smartsB = self._getSideChain(anchorB, stB, ringsB)
        if atomsB == [-1] or len(atomsB) != len(atomsA):
            return []

        # At this point we know that the sidechains contain the same
        # numbers of atoms, but we don't know that they're equivalent.
        # And we can't assume the sidechains are the same/different simply
        # because the SMARTS are the same/different. The only way to know
        # for certain is to pick one SMARTS and match it against both
        # sidechains.

        matchesA = analyze.evaluate_smarts_canvas(stA,
                                                  smartsB,
                                                  uniqueFilter=False)
        # Filter out matches that don't consist of the atoms in atomsA.
        filteredA = self._filterMatches(matchesA, atomsA)
        if not filteredA:
            return []

        # We only need one mapping to stB because we already requested
        # all mappings to stA.
        matchesB = analyze.evaluate_smarts_canvas(stB,
                                                  smartsB,
                                                  uniqueFilter=True)
        filteredB = self._filterMatches(matchesB, atomsB)

        core_mappings = []
        firstA = atomsA[0]
        firstB = atomsB[0]
        # Keep only mappings for which firstA and firstB appear in the same
        # position in their respective lists. These are the sidechain atoms
        # that are bonded directly to the rings, so they must be mapped to
        # each other.
        for matchB in filteredB:
            indexB = matchB.index(firstB)
            for matchA in filteredA:
                if matchA.index(firstA) == indexB:
                    core_mappings.append(list(zip(matchB, matchA)))

        return core_mappings

    def _preparePrimaryRef(self, st_ref):
        """
        Prepares the primary reference structure, which includes finding the
        most similar conformer if the option to use a primary reference
        conformer is True.

        :param st_ref: Input primary reference structure.
        :type st_ref: structure.Structure

        :return: The incoming reference structure or most similar conformer,
                with the full set of CT-level properties.
        :rtype: structure.Structure
        """

        st_number = st_ref.property[OUTPUT_STRUCTURE_NUMBER_PROP]
        self._logger.info(f"\nPrimary reference = structure {st_number}")
        st_ref.property[SHAPE_SIM_PROP] = 1.0
        align_method = ""
        align_reason = ""
        if self._options.use_sampled_ref:
            mesg = "Replacing primary reference structure with most " + \
            "similar conformer"
            self._logger.info(mesg)
            st_ref = self._alignShapes(st_ref, st_ref, append_confs=False)
            align_method = ALIGN_METHOD_DESCRIPTION[AlignMethod.shape_based]
            align_reason = "primary reference conformer requested"
            sim_ref = st_ref.property[SHAPE_SIM_PROP]
            self._logger.info("Conformer shape similarity = %.6f" % sim_ref)
        st_ref.property[REFERENCE_STRUCTURE_PROP] = st_number
        st_ref.property[ALIGN_METHOD_PROP] = align_method
        st_ref.property[ALIGN_REASON_PROP] = align_reason
        st_ref.property[CORE_SMARTS_PROP] = ""
        return st_ref

    def _refineAlignments(self, sts):
        """
        Generates additional conformers for each structure that was snapped
        to its reference, with core atoms and snapped sidechains held fixed,
        and uses those conformers to systematically increase the average
        in-place shape similarity over all pairs of structures.

        :param sts: The aligned structures to be refined.
        :type sts: list(structure.Structure)

        :return: Refined alignments in the same order as the input structures.
        :rtype: list(structure.Structure)
        """

        refiner = phase.PhpAlignmentRefiner()
        conf_options = phase.PhpConfOptions()
        if self._options.sampling_method == phase.CONF_SAMPLE_FINE_NAME:
            conf_options.setSamplingMethod(phase.CONF_SAMPLE_FINE)
        conf_options.setMaxConfs(self._options.max_confs)
        if self._options.minimize_confs:
            conf_options.setForceField(phase.CONF_FF_OPLS_2005)
        refiner.setConfOptions(conf_options)
        refiner.setMaxIterations(len(sts))  # Number completed is usually small
        if self._logger.getEffectiveLevel() == log.DEBUG:
            refiner.setVerboseOutput(True)

        frozen_atoms = []
        refined_set = set()
        for imol in range(len(sts)):
            frozen = []
            if imol in self._frozen_refinement_atoms:
                frozen = list(self._frozen_refinement_atoms[imol])
                refined_set.add(imol)
            frozen_atoms.append(frozen)

        self._logger.info("\nRefining alignments of snapped structures")
        sts_refined = [
            structure.Structure(ct) for ct in refiner.refine(sts, frozen_atoms)
        ]
        # Need to update SHAPE_SIM_PROP.
        self._updateSimilarities(sts_refined, refined_set)
        return sts_refined

    def _removeDistoredStructures(self,
                                  stB,
                                  snappedB,
                                  bond_tol=DISTORTED_BOND_TOL):
        """
        Identifies structures in snappedB with a bond length that differs by
        more than bond_tol from the corresponding bond length in stB. Returns
        a 0-based list of the structures that contain no such bonds.

        :param stB: The original structure.
        :type stB: structure.Structure

        :param snappedB: List of snapped versions of stB which may contain
                distored bonds.
        :type snappedB: list(structure.Structure)

        :return: 0-based list of structures with no distorted bonds.
        :rtype: list(int)
        """

        sts_to_keep = []
        for i, st_snapped in enumerate(snappedB):
            distorted_bond = False
            for bond_ref, bond_snapped in zip(stB.bond, st_snapped.bond):
                if abs(bond_ref.length - bond_snapped.length) > bond_tol:
                    distorted_bond = True
                    break
            if not distorted_bond:
                sts_to_keep.append(i)

        return sts_to_keep

    def _setupAlign(self, sts):
        """
        Does setup for alignment of the provided structures, which includes
        checking for multiple fragments, the absence of rings, and RDKit
        conversion failures if MCS cores are being used. Filters out such
        structures if the option to skip bad structures is True. Raises a
        RuntimeError otherwise.

        :param sts: The structures to be aligned.
        :type sts: list(structure.Structure)

        :return: Copies of the incoming structures, minus any bad structures,
                and ChmMol objects created from the usable structures
        :rtype: list(structure.Structure), list(ChmMol)
        """

        adaptor = ChmMmctAdaptor()
        mols = []
        keep = []
        fragmented = []
        no_rings = []
        rdkit_conversion_error = []
        # Don't require rings to be present if a core has been specified.
        require_rings = not self._core
        # RDKit conversion errors can occur only if using MCS cores.
        attempt_rdkit_conversion = self._core == MCS_CORES
        for i, st in enumerate(sts):
            if len(st.molecule) > 1:
                fragmented.append(i)
            elif attempt_rdkit_conversion and self._failsRDKitConversion(st):
                rdkit_conversion_error.append(i)
            else:
                mol = adaptor.create(st,
                                     ChmMmctAdaptor.StereoFromAnnotation_Safe)
                if require_rings and not mol.getRingCount():
                    no_rings.append(i)
                else:
                    mols.append(mol)
                    keep.append(i)

        if self._options.fail_on_bad:
            if fragmented:
                fs = ", ".join(str(i + 1) for i in fragmented)
                msg = ("Found multiple disconnected fragments in the "
                       f"following structures: {fs}")
                raise RuntimeError(msg)
            if no_rings:
                nr = ", ".join(str(i + 1) for i in no_rings)
                msg = f"Found no rings in the following structures: {nr}"
                raise RuntimeError(msg)
            if rdkit_conversion_error:
                rce = ", ".join(str(i + 1) for i in rdkit_conversion_error)
                msg = ("RDKit conversion failed for the following structures: "
                       f"{rce}")
                raise RuntimeError(msg)

        # Make copies of the valid structures and add properties that hold
        # the input and output structure numbers. For example, if there are
        # 10 input structures and structures 3 and 7 are removed, the 8
        # returned structures will have input structure numbers 1, 2, 4, 5,
        # 6, 8, 9, 10 and output structure numbers 1, 2, 3, 4, 5, 6, 7, 8.
        sts_out = []
        # Reference structure number may shift or it could be lost.
        found_ref = False
        for number_out, number_in in enumerate(keep):
            in_plus_1 = number_in + 1
            out_plus_1 = number_out + 1
            if self._ref_index and in_plus_1 == self._ref_index:
                self._ref_index = out_plus_1
                found_ref = True
            st_out = sts[number_in].copy()
            st_out.property[INPUT_STRUCTURE_NUMBER_PROP] = in_plus_1
            st_out.property[OUTPUT_STRUCTURE_NUMBER_PROP] = out_plus_1
            sts_out.append(st_out)

        if self._ref_index and not found_ref:
            ref_index = self._ref_index - 1
            if ref_index in fragmented:
                msg = "Reference structure contains multiple fragments"
            elif ref_index in no_rings:
                msg = "Reference structure contains no rings"
            else:
                msg = "RDKit conversion failed for reference structure"
            raise RuntimeError(msg)

        return sts_out, mols

    def _snapAndScore(self, stA, stB, core_mappings):
        """
        Snaps stB to stA according to the B-->A atom mappings in core_mappings
        and identifies the snapped version of stB which yields the highest
        shape similarity to stA. If all attempts to snap the core fail, the
        original stB is returned. If refinement is turned on, the stB atoms
        from the best mapping are added to self._frozen_refinement_atoms. The
        stA atoms are added as long as stA isn't the primary or designated
        reference structure.

        :param stA: The fixed reference structure.
        :type stA: structure.Structure

        :param stB: The structure to be snapped onto stA.
        :type stB: structure.Structure

        :param core_mappings: Lists of 1-based B-->A atom mappings that define
                the various ways the core of stB is to be snapped to stA.
        :type core_mappings: list(list((int, int)))

        :return: A boolean indicating whether stB was successfully snapped,
                and the highest scoring snapped version of stB, or the
                incoming stB if snapping failed.
        :rtype: bool, structure.Structure
        """

        shape = self._getShape(stA)
        inplace = True
        align_core_options = self._getAlignCoreOptions(True)
        try:
            aligner = phase.PhpAlignCore(stA.handle, stB.handle, core_mappings,
                                         align_core_options)
            stB_snapped = [
                structure.Structure(ct) for ct in aligner.getAlignedB()
            ]
            mappings = aligner.getMappings()
            sim_best = 0.0
            stB_snapped_best = None
            mapping_best = None
            first_conf = True
            for st, mapping in zip(stB_snapped, mappings):
                if self._options.align_terminal_hydrogens:
                    phase.align_terminal_hydrogens(stA, st, mapping)
                result = shape.computeShapeSimPy(st.handle, first_conf, inplace)
                first_conf = False
                if result[0] > sim_best:
                    sim_best = result[0]
                    stB_snapped_best = st
                    mapping_best = mapping

            if self._options.refine:
                stA_number = stA.property[OUTPUT_STRUCTURE_NUMBER_PROP] - 1
                stB_number = stB.property[OUTPUT_STRUCTURE_NUMBER_PROP] - 1
                addA = stA_number != self._mst_edges[0][0]
                for atomB, atomA in mapping_best:
                    self._frozen_refinement_atoms[stB_number].add(atomB)
                    if addA:
                        self._frozen_refinement_atoms[stA_number].add(atomA)

            return True, stB_snapped_best
        except phase.PhpException as exc:
            iA = stA.property[OUTPUT_STRUCTURE_NUMBER_PROP]
            iB = stB.property[OUTPUT_STRUCTURE_NUMBER_PROP]
            self._logger.debug(
                f"Sidechain snapping failed for {iB}-->{iA}: {exc}")
            return False, stB

    def _snapSideChains(self, sts_aligned):
        """
        Attempts to improve snapped core alignments by expanding cores to
        include sidechains that are common to two or more structures. In
        this treatment we don't use fuzzy matching, i.e., the sidechains
        must be chemically identical.

        :param sts_aligned: All structures from the initial alignment phase.
        :type sts_aligned: list(structure.Structure)

        :return: Re-snapped structures. If a structure doesn't get re-snapped,
                the incoming structure is returned.
        :rtype: list(structure.Structure)
        """

        self._logger.info("\nAttempting to superimpose identical sidechains")

        # For each snapped structure, store the coordinates of each snapped
        # atom. We will use this data to determine the common core for each
        # pair of snapped structures, even if that pair was never directly
        # snapped onto one another.
        self._storeCoreAtomCoords(sts_aligned)

        # Compile sets of ring atoms and anchor atoms, the latter being
        # potential attachment points for sidechains.
        self._storeRingsAndAnchors(sts_aligned)

        # Avoid repetitive re-growing of sidechains by storing them in
        # dictionaries keyed by anchor atom.
        self._initSideChains()

        # Do all applicable pairwise comparisons in canonical order and re-snap
        # any common sidechains.
        sts_resnapped = [st.copy() for st in sts_aligned]
        resnapped_set = set()
        n = len(sts_aligned)
        for i in range(1, n):
            imol = self._canonical_order[i]
            anchor_atoms_i = self._anchor_atom_sets[imol]
            for j in range(i):
                st_i = sts_resnapped[imol]
                jmol = self._canonical_order[j]
                st_j = sts_resnapped[jmol]
                core_atoms_i, core_atoms_j = self._matchCoreAtoms(st_i, st_j)

                # Skip this pair if the core is too small or disconnected.
                if self._badCore(st_i, st_j, core_atoms_i, core_atoms_j):
                    continue
                core_mapping = list(zip(core_atoms_i, core_atoms_j))
                anchor_atoms_j = self._anchor_atom_sets[jmol]

                # Attempt to grow and snap sidechains attached to each anchor
                # atom in the respective common cores.
                for atom_i, atom_j in zip(core_atoms_i, core_atoms_j):
                    if atom_i in anchor_atoms_i and atom_j in anchor_atoms_j:
                        sidechain_mappings = self._matchSideChains(
                            st_j, st_i, atom_j, atom_i)
                        if not sidechain_mappings:
                            continue
                        new_core_mappings = []
                        for mapping in sidechain_mappings:
                            # It's possible that _matchCoreAtoms already added
                            # one or more atoms from this sidechain simply
                            # because they were already coincident, so don't
                            # add any atoms that are already in the core.
                            new_core = list(core_mapping)
                            for atom_pair in mapping:
                                if atom_pair not in core_mapping:
                                    new_core.append(atom_pair)
                            new_core_mappings.append(new_core)
                        smarts = self._sidechain_smarts[imol][atom_i]
                        mesg = "Attempting to re-snap {}-->{} with " + \
                               "sidechain {} attached to atoms {} and " + \
                               "{}\nCore mappings = {}"
                        self._logger.debug(
                            mesg.format(imol + 1, jmol + 1, smarts, atom_i,
                                        atom_j, new_core_mappings))
                        # If refinement is turned on, a successful call to
                        # self._snapAndScore adds the st_i sidechain atoms to
                        # self._frozen_refinement_atoms. The st_j sidechain
                        # atoms are added if it's not the primary reference.
                        success, snapped_i = self._snapAndScore(
                            st_j, st_i, new_core_mappings)
                        if success:
                            mesg = "Successfully re-snapped {}-->{}: " + \
                                   "Sidechain = {}, anchor atoms = {}, {}"
                            self._logger.info(
                                mesg.format(imol + 1, jmol + 1, smarts, atom_i,
                                            atom_j))
                            st_i = snapped_i
                            sts_resnapped[imol] = snapped_i
                            resnapped_set.add(imol)
                            # Don't snap this sidechain again.
                            self._sidechain_atoms[imol][atom_i] = [-1]

        if resnapped_set:
            self._updateSimilarities(sts_resnapped, resnapped_set)

        return sts_resnapped

    def _storeCoreAtomCoords(self, sts_aligned):
        """
        For each snapped structure, this function stores the coordinates of
        each snapped atom.

        :param sts_aligned: Aligned structures.
        :type sts_aligned: list(structure.Structure)
        """

        self._core_atom_coords = len(sts_aligned) * [[]]
        for i, st in enumerate(sts_aligned):
            if i in self._snapped_core_mappings:
                atom_coords = []
                core_mapping = self._snapped_core_mappings[i]
                core_atoms = [atom_pair[0] for atom_pair in core_mapping]
                for core_atom in core_atoms:
                    atom = st.atom[core_atom]
                    atom_coords.append((core_atom, atom.x, atom.y, atom.z))
                self._core_atom_coords[i] = atom_coords

        # The primary reference was never snapped to anything else, so just
        # store coordinates of all heavy atoms.
        iref = self._canonical_order[0]
        st_ref = sts_aligned[iref]
        atom_coords = []
        for atom in st_ref.atom:
            if atom.atomic_number != 1:
                atom_coords.append((atom.index, atom.x, atom.y, atom.z))
        self._core_atom_coords[iref] = atom_coords

    def _storeFuzzyScaffoldsInMol(self):
        """
        Matches the fuzzy scaffold SMARTS against each structure and stores
        the scaffold index for each SMARTS that yields a match.
        """

        queries = [canvas.ChmQuery(smarts) for smarts in self._fuzzy_smarts]
        matcher = canvas.ChmQueryMatcher(True)
        for mol in self._mols:
            scaffolds = set()
            for i, query in enumerate(queries):
                if matcher.hasMatch(query, mol):
                    scaffolds.add(i)
            self._scaffolds_in_mol.append(scaffolds)

    def _storeRingsAndAnchors(self, sts):
        """
        Compiles sets of ring atoms and anchor atoms for the provided
        structures.

        :param sts: The structures.
        :type sts: list(structure.Structure)
        """

        for st in sts:
            ring_atoms = set(itertools.chain(*st.find_rings()))
            self._ring_atom_sets.append(ring_atoms)
            self._anchor_atom_sets.append(self._getAnchorAtoms(st, ring_atoms))

    def _updateSimilarities(self, sts, subset):
        """
        For each structure number in subset, the in-place shape similarity
        to the corresponding reference structure is recalculated to reflect
        snapping of sidechains or refinement, and the property SHAPE_SIM_PROP
        is updated. It's worth noting that shape similarity to the reference
        doesn't always increase because snapping of sidechains and refinement
        aren't solely focused on increasing the similarity to the reference.
        They are instead concerned with increasing the similarity between any
        number of pairs of structures.

        :param sts: All structures in the analysis. The SHAPE_SIM_PROP is
                overwritten for the affected subset.
        :type sts: list(structure.Structure)

        :param subset: 0-based set of structure numbers to operate on.
        :type subset: set(int)
        """

        mesg = "\nRecalculating shape similarities to reference structure"
        self._logger.info(mesg)

        inplace = True
        first_conf = True
        ref_prev = None
        shape = None
        for i in sorted(subset):
            stB = sts[i]
            ref = stB.property[REFERENCE_STRUCTURE_PROP] - 1
            if ref != ref_prev:
                shape = self._getShape(sts[ref])
                ref_prev = ref
            sim_old = sts[i].property[SHAPE_SIM_PROP]
            sim_new = shape.computeShapeSimPy(stB, first_conf, inplace)[0]
            sts[i].property[SHAPE_SIM_PROP] = sim_new
            mesg = "Structure %d: Old similarity = %.6f, new similarity = " + \
                   "%.6f"
            self._logger.info(mesg % (i + 1, sim_old, sim_new))