Source code for schrodinger.application.matsci.buildcomplex

"""
This module assists in building organometallic complexes.  Given one or more
ligands, these ligands will be arranged around a central atom.

Copyright Schrodinger, LLC. All rights reserved.
"""
import os

import numpy
import pandas
from past.utils import old_div
from types import SimpleNamespace
from collections import namedtuple

from schrodinger import structure
from schrodinger.application.matsci import atomicsymbols
from schrodinger.application.matsci import clusterstruct
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import rdpattern
from schrodinger.application.matsci import rxn_channel
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import minimize
from schrodinger.structutils import rmsd
from schrodinger.structutils import smiles
from schrodinger.structutils import transform
from schrodinger.utils import fileutils

MONODENTATE = 'Monodentate'
""" Name for ligands that have a single coordination site """
BIDENTATE = 'Bidentate'
""" Name for ligands that have two coordination sites """
OCTAHEDRAL = 'Octahedral'
""" VESPR geometry with 6 coordination sites around a central atom """
TRIGONAL_BIPYRAMIDAL = 'Trigonal bipyramidal'
""" VESPR geometry with 5 coordination sites around a central atom """
TETRAHEDRAL = 'Tetrahedral'
""" VESPR geometry with 4 coordination sites around a central atom """
SQUARE_PLANAR = 'Square planar'
""" VESPR geometry with 4 coordination sites around a central atom """
TRIGONAL_PLANAR = 'Trigonal planar'
""" VESPR geometry with 3 coordination sites around a central atom """
LINEAR = 'Linear'
""" VESPR geometry with 2 coordination sites around a central atom """
PENTAGONAL_BIPYRAMIDAL = 'Pentagonal bipyramidal'
""" VESPR geometry with 7 coordination sites around a central atom """
SQUARE_ANTIPRISMATIC = 'Square antiprismatic'
""" VESPR geometry with 8 coordination sites around a central atom """
TRICAPPED_TRIGONAL_PRISMATIC = 'Tricapped trigonal prismatic'
""" VESPR geometry with 9 coordination sites around a central atom """
SUPPORTED_GEOMETRIES = [
    OCTAHEDRAL, TRIGONAL_BIPYRAMIDAL, SQUARE_PLANAR, TETRAHEDRAL,
    TRIGONAL_PLANAR, LINEAR, PENTAGONAL_BIPYRAMIDAL, SQUARE_ANTIPRISMATIC,
    TRICAPPED_TRIGONAL_PRISMATIC
]
""" VESPR geometries that can be built by this module """
FACIAL = 'facial'
""" Octahedral complex with identical atoms on the face of the octahedron """
MERIDIONAL = 'meridional'
""" Octahedral complex with identical atoms on the meridion of the
octahedron """
NO_ISOMER = "none"
""" No specific isomer """
CIS = 'cis'
""" Square planar complex with identical atoms in adjacent sites """
TRANS = 'trans'
""" Square planar complex with identical atoms in opposite sites """

# Note - the ordering of OCTAHEDRAL_LOCATIONS is important here.  For facial, it
# is important that 1,6; 2,5; and 3,4 combinations not be +/- combinations on
# the same axis.  For meridonal, it is important that 1,2; 3,4; and 5,6
# combinations not be +/- on the same axis.  The current order has the positive
# face positions first and the negative face positions last.
OCTAHEDRAL_LOCATIONS = [(2.0, 0.0, 0.0), (0.0, 2.0, 0.0), (0.0, 0.0, 2.0),
                        (0.0, -2.0, 0.0), (-2.0, 0.0, 0.0), (0.0, 0.0, -2.0)]
""" XYZ coordinates of the octahedral coordination sites """
TRIGONAL_BIPYRAMIDAL_LOCATIONS = [(0.0, 2.0, 0.0), (0.0, -2.0, 0.0),
                                  (2.0, 0.0, 0.0), (-1., 0.0, 1.73205),
                                  (-1., 0.0, -1.73205)]
""" XYZ coordinates of the trigonal pyramid coordination sites """
# Note - ordering of SQUARE_PLANAR is important.  Slots are filled from 0-3, so
# for bidentate ligands 0, 1 and 2, 3 must not be +/- on the same axis
SQUARE_PLANAR_LOCATIONS = [(2.0, 0.0, 0.0), (0.0, 2.0, 0.0), (0.0, -2.0, 0.0),
                           (-2.0, 0.0, 0.0)]
""" XYZ coordinates of the square planar coordination sites """
TETRAHEDRAL_LOCATIONS = [(0.0, 2.0, 0.0), (1.88562, -0.66667, 0.0),
                         (-0.94281, -0.66667, -1.63299),
                         (-0.94281, -0.66667, 1.63299)]
""" XYZ coordinates of the tetrahedral coordination sites """
TRIGONAL_PLANAR_LOCATIONS = [(2.0, 0.0, 0.0), (-1., 1.73205, 0.0),
                             (-1., -1.73205, 0.0)]
""" XYZ coordinates of the trigonal planar coordination sites """
LINEAR_LOCATIONS = [(2.0, 0.0, 0.0), (-2.0, 0.0, 0.0)]
""" XYZ coordinates of the linear coordination sites """

# 7-coordinate pentagonal bipyramidal
# Has two axial atoms (+z and -z) and then 5 atoms forming a pentagon in the XY
# plane
#
# Multiply these by 2 because of the 2.0 A bond length
sin72_2 = numpy.sin(numpy.deg2rad(72)) * 2.0
cos72_2 = numpy.cos(numpy.deg2rad(72)) * 2.0
sin144_2 = numpy.sin(numpy.deg2rad(144)) * 2.0
cos144_2 = numpy.cos(numpy.deg2rad(144)) * 2.0
PENTAGONAL_BIPYRAMIDAL_LOCATIONS = [(0.0, 0.0, 2.0), (2.0, 0.0, 0.0),
                                    (cos72_2, sin72_2, 0.0),
                                    (cos144_2, sin144_2, 0.0),
                                    (cos144_2, -sin144_2, 0.0),
                                    (cos72_2, -sin72_2, 0.0), (0.0, 0.0, -2.0)]
""" XYZ coordinates of the pentagonal bipyramidal sites """

# 8-coordinate square antiprismatic
# Looks like 2 squares sandwiching a metal atom, where one of the squares is
# rotated 45 degrees in-plane from the other.
#
# Note - it's not clear what the correct bond angles should be, but using 32
# degrees gives bond angles and distances that are nearly identical between two
# neighbors that are on the same side of the plane and two neighbors that are
# on opposite sides of the plane. That also roughly agrees well with the angles
# in the crystal structure of TaF8 in Na3TaF8. In this case, the 32 degrees is
# the angle the M-X bond makes with the plane that passes through the metal and
# is parallel with the planes of the top 4 and bottom 4 atoms.
sin32 = numpy.sin(numpy.deg2rad(32))
# Multiply these by 2 because of the 2.0 A bond length
sin32_2 = sin32 * 2.0
cos32_2 = numpy.cos(numpy.deg2rad(32)) * 2.0
# Get the scale factor for the "below" atoms so that they, too, have bond
# lengths of 2.0.
sin45 = numpy.cos(numpy.deg2rad(45))
scale = 2.0 / transform.get_vector_magnitude((sin45, sin45, sin32))
downxy = sin45 * scale
downz = -sin32 * scale
# We alternate upper and lower square positions in case of bidentate ligands
SQUARE_ANTIPRISMATIC_LOCATIONS = [
    (cos32_2, 0.0, sin32_2),  # Upper square
    (downxy, downxy, downz),  # Lower square
    (0.0, cos32_2, sin32_2),  # Upper square
    (-downxy, downxy, downz),  # Lower square
    (-cos32_2, 0.0, sin32_2),  # Upper square
    (-downxy, -downxy, downz),  # Lower square
    (0.0, -cos32_2, sin32_2),  # Upper square
    (downxy, -downxy, downz)  # Lower square
]
""" XYZ coordinates of the square antiprismatic sites """

# 9-coordinate tricapped trigonal prismatic
# Three trigonal planar units arranged as follows:
#   1st one in the XZ plane, with one atom +Z
#   Now imagine rotating that group 90 degrees about the Z axis so that the two
#   non-+Z atoms form imaginary atoms now in the YZ plane.
#   Those two imaginary atoms each now denote the location of one of the atoms
#   of the remaining two trigonal planar groups. (Look it up on wikipedia - it's
#   easier than visualizing it).
sin30 = numpy.sin(numpy.deg2rad(45))
cos30 = numpy.cos(numpy.deg2rad(45))
sin30_2 = sin30 * 2.0
cos30_2 = cos30 * 2.0
scale = 2.0 / transform.get_vector_magnitude((cos30, sin30, sin30))
TRICAPPED_TRIGONAL_PRISMATIC_LOCATIONS = [
    (cos30_2, 0.0, -sin30_2),  # 1st trigonal planar unit
    (0.0, cos30_2, -sin30_2),  # 2nd trigonal planar unit
    (-cos30_2, 0.0, -sin30_2),  # 1st trigonal planar unit
    (0.0, -cos30_2, -sin30_2),  # 3rd trigonal planar unit
    (cos30_2, -sin30_2 * cos30, sin30_2 * sin30),  # 2nd trigonal planar unit
    (cos30_2, sin30_2 * cos30, sin30_2 * sin30),  # 3rd trigonal planar unit
    (-cos30_2, sin30_2 * cos30, sin30_2 * sin30),  # 3rd trigonal planar unit
    (-cos30_2, -sin30_2 * cos30, sin30_2 * sin30),  # 2nd trigonal planar unit
    (0.0, 0.0, 2.0)  # 1st trigonal planar unit
]
""" XYZ coordinates of the tricapped trigonal prismatic sites """

AXES = {0: transform.X_AXIS, 1: transform.Y_AXIS, 2: transform.Z_AXIS}

# Takes on a value of 1 for eta-coordination and 0 for normal bond coordination
ATTACHMENT_PROPERTY = 'i_matsci_cbuilder_attacher'

ALLOWED_ISOMERS = {}
ALLOWED_ISOMERS[OCTAHEDRAL] = [FACIAL, MERIDIONAL, NO_ISOMER]
ALLOWED_ISOMERS[TRIGONAL_BIPYRAMIDAL] = [NO_ISOMER]
ALLOWED_ISOMERS[SQUARE_PLANAR] = [CIS, TRANS, NO_ISOMER]
ALLOWED_ISOMERS[TETRAHEDRAL] = [NO_ISOMER]
ALLOWED_ISOMERS[TRIGONAL_PLANAR] = [NO_ISOMER]
ALLOWED_ISOMERS[LINEAR] = [NO_ISOMER]
ALLOWED_ISOMERS[PENTAGONAL_BIPYRAMIDAL] = [NO_ISOMER]
ALLOWED_ISOMERS[SQUARE_ANTIPRISMATIC] = [NO_ISOMER]
ALLOWED_ISOMERS[TRICAPPED_TRIGONAL_PRISMATIC] = [NO_ISOMER]

IDEAL_SLOTS = {}
IDEAL_SLOTS[OCTAHEDRAL] = OCTAHEDRAL_LOCATIONS
IDEAL_SLOTS[TRIGONAL_BIPYRAMIDAL] = TRIGONAL_BIPYRAMIDAL_LOCATIONS
IDEAL_SLOTS[SQUARE_PLANAR] = SQUARE_PLANAR_LOCATIONS
IDEAL_SLOTS[TETRAHEDRAL] = TETRAHEDRAL_LOCATIONS
IDEAL_SLOTS[TRIGONAL_PLANAR] = TRIGONAL_PLANAR_LOCATIONS
IDEAL_SLOTS[LINEAR] = LINEAR_LOCATIONS
IDEAL_SLOTS[PENTAGONAL_BIPYRAMIDAL] = PENTAGONAL_BIPYRAMIDAL_LOCATIONS
IDEAL_SLOTS[SQUARE_ANTIPRISMATIC] = SQUARE_ANTIPRISMATIC_LOCATIONS
IDEAL_SLOTS[TRICAPPED_TRIGONAL_PRISMATIC] = \
        TRICAPPED_TRIGONAL_PRISMATIC_LOCATIONS

SLOTS_TO_GEOMS = {}
for geom, slots in IDEAL_SLOTS.items():
    SLOTS_TO_GEOMS.setdefault(len(slots), []).append(geom)

DENTATES = [MONODENTATE, BIDENTATE]

# By default, we just use slots in the order they are defined
SLOT_ORDER = {}
for geom in SUPPORTED_GEOMETRIES:
    SLOT_ORDER[geom] = {}
    for isomer in ALLOWED_ISOMERS[geom]:
        SLOT_ORDER[geom][isomer] = {}
        for dentate in DENTATES:
            SLOT_ORDER[geom][isomer][dentate] = list(
                range(len(IDEAL_SLOTS[geom])))
# Non-default slot order
SLOT_ORDER[OCTAHEDRAL][FACIAL][BIDENTATE] = [0, 5, 1, 4, 2, 3]
SLOT_ORDER[OCTAHEDRAL][FACIAL][MONODENTATE] = [0, 5, 1, 4, 2, 3]
SLOT_ORDER[OCTAHEDRAL][MERIDIONAL][MONODENTATE] = [0, 1, 3, 2, 4, 5]
SLOT_ORDER[OCTAHEDRAL][NO_ISOMER] = SLOT_ORDER[OCTAHEDRAL][MERIDIONAL]
SLOT_ORDER[TRIGONAL_BIPYRAMIDAL][NO_ISOMER][BIDENTATE] = [0, 2, 1, 3, 4]
SLOT_ORDER[SQUARE_PLANAR][TRANS][BIDENTATE] = [0, 1, 3, 2]
SLOT_ORDER[SQUARE_PLANAR][TRANS][MONODENTATE] = [0, 3, 1, 2]

# Properties for marking attachment points in ligands
ATOM_MARKER_PROP_BASE = 's_matsci_rx_marker_atom_'
HIGHEST_RX_MARKER_XVAL = 'i_matsci_highest_rx_xval'
# A '_'-delimited string that gives the index of each marker atom that marks an
# eta-position
ETA_ATOMS_PROP = ATOM_MARKER_PROP_BASE + 'eta'
# These old properties were used by versions before 2014-3. Users may be using
# custom templates with these properties
OLD_FIRST_MARKER_PROP = 'i_matsci_complex_builder_site_atom1'
OLD_SECOND_MARKER_PROP = 'i_matsci_complex_builder_site_atom2'
ALL_OLD_MARKER_PROPS = [OLD_FIRST_MARKER_PROP, OLD_SECOND_MARKER_PROP]

LIGANDS_INFO = {}
LIGAND_INFO_FILE = 'ligands_info.csv'
SMILES = 'smiles'
LIGAND_INFO_COL = {
    'abbreviation', 'charge', 'coordination', 'family', 'formula', 'name',
    SMILES
}
LigandInfo = namedtuple('LigandInfo',
                        [col for col in LIGAND_INFO_COL if col != SMILES])


[docs]def get_ligands_info(): """ Get ligands info from csv file :return: Dict with key as smiles string of ligand and values as `LIGANDINFO` :rtype: Dict """ global LIGANDS_INFO def read_csv(filename): """ Read csv file with pandas as Dataframe. If file is empty return empty Dataframe. Verify the integrity of dataframe by comparing the headers. If headers do not match, return empty Dataframe :param str filename: name of the file to read ligands info from. :return: Dataframe containing info about ligands :rtype: `Pandas.Dataframe` """ try: df = pandas.read_csv(filename) except pandas.errors.EmptyDataError: return pandas.DataFrame() input_columns = {header.strip() for header in df.columns} # Compare unordered list. CSV file does not have to be in same order. if LIGAND_INFO_COL != input_columns: return pandas.DataFrame() return df.replace({numpy.nan: None}) if not LIGANDS_INFO: ligands_df = read_csv( os.path.join(fileutils.get_mmshare_data_dir(), LIGAND_INFO_FILE)) user_input_file = os.path.join(msutils.get_matsci_user_data_dir(), 'ligands', LIGAND_INFO_FILE) if os.path.isfile(user_input_file): user_df = read_csv(user_input_file) if not user_df.empty: ligands_df = ligands_df.append(user_df) # Prioritize user input over template when dropping duplicates ligands_df.drop_duplicates(subset=SMILES, keep='last', inplace=True) ligands_df = ligands_df.set_index(SMILES) ligand_dict = ligands_df.transpose().to_dict() LIGANDS_INFO = { ligand_smiles: LigandInfo(**ligand_info) for ligand_smiles, ligand_info in ligand_dict.items() } return LIGANDS_INFO
[docs]def get_ligand_name(struct): """ Get abbreviated name of structures from the ligand list :param `schrodinger.structure.Structure` struct: structure of which abbreviated name to get :return: Get ligand name based on abbreviated name or full name or chemical formula :rtype: str """ st_copy = struct.copy() ligand_info = get_ligands_info() smile = rdpattern.to_smiles(st_copy, fall_back=True) try: info = ligand_info[smile] except KeyError: st_copy.deleteAtoms( [atom.index for atom in st_copy.atom if atom.atomic_number == -2]) return analyze.generate_molecular_formula(st_copy) else: return info.abbreviation or info.name or info.formula
[docs]def find_lone_hydrogen(struct, heavy_index): """ Find the lone hydrogen attached to an atom :type struct: schrodinger.structure.Structure :param struct: The structure in which the atoms are present :type heavy_index: int :param heavy_index: The index of the heavy atom to search for a single hydrogen :rtype: int or None :return: The index of the lone attached hydrogen atom or None if 0 or more than 1 hydrogen is attached to the heavy atom """ ans = [ atom for atom in struct.atom[heavy_index].bonded_atoms if atom.atomic_number == 1 ] if len(ans) == 1: return ans[0].index else: return None
[docs]def get_sentinel_sites(sentinel): """ Find the complex sites on the sentinel ligand using the Template Rx markers :type sentinel: `schrodinger.structure.Structure` :param sentinel: The sentinel ligand - this should come from a Template created with the Complex builder or this panel, and have markers indicating the complex sites. :rtype: list :return: A list of tuples. Each tuple is (X, Y), where X is the ligand atom index of the coordinating atom and Y is the ligand atom index of the atom to remove upon coordination. Y=0 if no atom is to be removed. One tuple for each coordination site is given. """ sites = [] # Find all the marker atoms in the ligand amarkers, highx = get_marker_atom_indexes_from_structure(sentinel) marker_indexes = [] for xval in range(1, highx + 1): indexes = amarkers[xval] marker_indexes.extend(indexes) # Create the site tuple for each marker eta_indexes = get_eta_marker_indexes(sentinel) for marker in marker_indexes: if marker in eta_indexes: # This marker is for an eta ligand. Site tuples for an eta ligands # are denoted by a negative atom index and a 0 for the second index sites.append([-marker, 0]) else: # Site tuples for standard ligands are # (index of atom that binds to metal, index of marker atom) mark_atom = sentinel.atom[marker] for neighbor in mark_atom.bonded_atoms: break sites.append([neighbor.index, marker]) return sites
[docs]def get_pretty_geom_str(geom): """ Return geometry string in pretty format. :type geom: str :param geom: a geometry from SUPPORTED_GEOMETRIES in cli format :rtype: str :return: a geometry from SUPPORTED_GEOMETRIES in pretty format """ return geom.replace('_', ' ').capitalize()
[docs]def get_cli_geom_str(geom): """ Return geometry string in cli format. :type geom: str :param geom: a geometry from SUPPORTED_GEOMETRIES in pretty format :rtype: str :return: a geometry from SUPPORTED_GEOMETRIES in cli format """ return geom.replace(' ', '_').lower()
CLI_SUPPORTED_GEOMETRIES = [ get_cli_geom_str(geom) for geom in SUPPORTED_GEOMETRIES ]
[docs]def minimize_complexes(structs, forcefield=minimize.OPLS_2005, **kwargs): """ Minimize the given structures using the new MMFFLD method of determining parameters for metal complexes. Additional keyword arguments are passed to the Minimizer class constructor :type structs: list(`schrodinger.structure.Structure`) :param structs: The structures to minimize :type ffld_version: integer :param ffld_version: The force field to use. Should be a module constant from the minimize module. :raise ValueError: Typically means atom typing error or valence violations :raise mm.MmException: Due to overlapping atoms """ with minimize.minimizer_context(ffld_version=forcefield, struct=None, no_zob_restrain=True, cleanup=False, **kwargs) as mizer: for st in structs: mizer.setStructure(st) min_res = mizer.minimize() st.property[mm.OPLS_POTENTIAL_ENERGY] = min_res.potential_energy
[docs]def minimize_complex(struct, forcefield=minimize.OPLS_2005, **kwargs): """ Minimize the given structure using the new MMFFLD method of determining parameters for metal complexes. Additional keyword arguments are passed to the Minimizer class constructor :type struct: `schrodinger.structure.Structure` :param struct: The structure to minimize :type ffld_version: integer :param ffld_version: The force field to use. Should be a module constant from the minimize module. :raise ValueError: Typically means atom typing error or valence violations :raise mm.MmException: Due to overlapping atoms """ minimize_complexes([struct], forcefield=forcefield, **kwargs)
[docs]def fix_metal_bond_orders(struct, index): """ Fix all the bonds to atom index to be either single or dative depending on whether the other atom has a full valence without this bond or not. Full valence without this bond = dative bond, otherwise bond order = number of open valences. Formal charges are also set to 0 for the metal atom and bonded neighbors. Note - no bonds are added or removed by this function, only bond orders are changed. :type struct: `schrodinger.structure.Structure` :param struct: The structure to operate on - bonds are modified on this structure directly, not a copy :type index: int :param index: The atom index of the metal atom with bonds to adjust """ metal_atom = struct.atom[index] bt = structure.BondType # Set all bonded atoms to formal charge of 0 - do this in its own loop so we # only have to retype once. This enforces MATSCI-622. metal_atom.formal_charge = 0 for neighbor in metal_atom.bonded_atoms: neighbor.formal_charge = 0 # Set all metal bonds to dative so that atom types are determined # correctly - MATSCI-4517 struct.getBond(metal_atom, neighbor).type = bt.Dative struct.retype() valence_to_bond_order = {1: bt.Single, 2: bt.Double, 3: bt.Triple} # Now determine the proper bond order for all metal bonds for neighbor in metal_atom.bonded_atoms: # The metal bond is dative so it doesn't count towards the neighbor's # valence bonds open_sites = rxn_channel.open_bonding_sites(struct, neighbor.index) # carbon monoxide is a special case for FF typing reasons - it must stay # zero-order bonded with charges on the C & O MATSCI-6195 monoxide = False if neighbor.element == 'C': batoms = list(neighbor.bonded_atoms) if len(batoms) == 2: for bonded_atom in batoms: if bonded_atom.element == 'O': bond = struct.getBond(neighbor, bonded_atom) monoxide = bond.order == 3 neighbor.formal_charge = -1 bonded_atom.formal_charge = 1 if open_sites and not monoxide: # The neighbor has an open valence, so change the bond order bond = struct.getBond(metal_atom, neighbor) bond.type = valence_to_bond_order.get(open_sites, bt.Single) struct.retype()
[docs]def fix_all_metal_bond_orders(struct): """ Set the order of bonds containing metals to zero. :param `schrodinger.structure.Structure` struct: input structure containing metal bonds """ for idx in analyze.evaluate_asl(struct, 'metals'): fix_metal_bond_orders(struct, idx)
[docs]def transmute_atom(atom, element, color=None): """ Transmute atom from its current element to a new element. The new name will be element + index (ex. H17), and the new color if not supplied will be the Maestro default (or purple if no Maestro default). :type atom: `schrodinger.structure._StructureAtom` :param atom: The atom object to transmute to a new element :type element: str :param element: The atomic symbol of the new element :type color: str :param color: The new color of the atom in a format accepted by the _StructAtom.color property. The default is to use Maestro's default color for the new element, or purple if the default color is not defined. :raise ValueError: if element is not a recognized atomic symbol """ atom.element = element atom.name = element + str(atom.index) # Set the color of the atom if color is None: atomic_number = mm.mmelement_get_atomic_number_by_symbol(element) wild_type = mm.mmat_get_wildcard(atomic_number) if wild_type > 0: color = mm.mmat_get_color(wild_type) else: # An element not parameterized color = 'purple' atom.color = color
[docs]def find_atoms_to_remove(struct, keep_atom, root_atom): """ Return a list of atoms bound to root atom (and recursively all atoms bound to those atoms, ad infinitum). keep_atom and all atoms recursively bound to it will not be added to the list. If keep_atom and root_atom are part of the same ring system, root_atom will be the only atom returned in the list. For structure A-B-C-D-E, if keep_atom=B and root_atom=C, the returned list will be [C, D, E]. :type struct: schrodinger.structure.Structure :param struct: The structure to use :type keep_atom: int :param keep_atom: The index of the atom to keep :type root_atom: int :param root_atom: The index of the first atom to remove. All neighbors of this atom that are not keep_atom will be added to the list. :rtype: list :return: A list of all atoms recursively bound to root atom. keep_atom and all atoms bound to it are excluded from the list. """ atoms_not_yet_checked = set([root_atom]) indexes_to_remove = set() while atoms_not_yet_checked: check_atom = atoms_not_yet_checked.pop() indexes_to_remove.add(check_atom) for atom in struct.atom[check_atom].bonded_atoms: index = atom.index if index == keep_atom: # Don't add the keep_atom to the list if check_atom != root_atom: # We've circled around to the keep atom again, must have # traversed a ring return [root_atom] elif index not in indexes_to_remove and \ index not in atoms_not_yet_checked: # We haven't encountered this atom before atoms_not_yet_checked.add(index) return list(indexes_to_remove)
[docs]def convert_old_marker_props_to_new(struct): """ Some template strutures may still use old-style properties to mark Rx atoms. This function converts those properties to new-style properties and removes the old ones. :type struct: `schrodinger.structure.Structure` :param struct: The structure with properties to read and modify """ max_x_val = struct.property.get(HIGHEST_RX_MARKER_XVAL, 0) for prop in ALL_OLD_MARKER_PROPS: # Old style stored a single atom index in structure property propnameX, # where X was the Rx value (x = 1 or 2) xval = int(prop[-1]) index = struct.property.get(prop) if index: strindex = str(index) # The new style is to store a list of atoms in newpropnameX, where # the atom indexes are separated by '_' and X can be any value newprop = ATOM_MARKER_PROP_BASE + str(xval) # There may already be atoms stored for this x value, if so, just # add to the list current_val = struct.property.get(newprop, "") current_indexes = set(current_val.split('_')) if strindex not in current_indexes: new_val = '_'.join([x for x in [current_val, strindex] if x]) struct.property[newprop] = new_val max_x_val = max(max_x_val, xval) del struct.property[prop] struct.property[HIGHEST_RX_MARKER_XVAL] = max_x_val
[docs]def get_marker_atom_indexes_from_structure(struct): """ Get the indexes of atoms marked as Rx atoms :type struct: `schrodinger.structure.Structure` :param struct: The structure with the Rx atoms :rtype: (dict, int) :return: dict keys are the int value of x in Rx, values are lists of atom indexes set to that Rx value (atom indexes are 1-based). The int return value is the highest value of x in the keys of the dictionary. """ # This checks for old-style properties for backwards compatibility with # files created by versions prior to 2014-3. convert_old_marker_props_to_new(struct) rx_atoms = {} max_x_val = struct.property.get(HIGHEST_RX_MARKER_XVAL, 0) for xval in range(1, max_x_val + 1): prop = ATOM_MARKER_PROP_BASE + str(xval) atom_string = struct.property.get(prop) if atom_string: indexes = [int(x) for x in atom_string.split('_')] rx_atoms[xval] = indexes return rx_atoms, max_x_val
[docs]def mark_eta_positions(struct, rx_atoms): """ Add a structure property that gives the index of each eta-coordination marker :type struct: `schrodinger.structure.Structure` :param struct: The structure with the Rx atoms :type rx_atoms: dict :param rx_atoms: Keys are x value and values are lists of atoms denoted with that Rx marker """ eta_indexes = set() for indexes in rx_atoms.values(): for index in indexes: if struct.atom[index].bond_total > 1: eta_indexes.add(index) struct.property[ETA_ATOMS_PROP] = '_'.join([str(x) for x in eta_indexes])
[docs]def get_eta_marker_indexes(struct): """ Get a set of all atom indexes for eta-coordination markers :type struct: `schrodinger.structure.Structure` :param struct: The structure with the Rx atoms :rtype: set :return: Each item of the set is the atom index of a marker for an eta-coordination site """ eta_indexes = set() eta_string = struct.property.get(ETA_ATOMS_PROP, "") if eta_string: eta_indexes.update([int(x) for x in eta_string.split('_')]) return eta_indexes
[docs]def clear_marker_properties(struct): """ Clear any marker properties that exist on the structure :type struct: `schrodinger.structure.Structure` :param struct: The structure with the marker properties to clear """ for prop in list(struct.property): if prop.startswith(ATOM_MARKER_PROP_BASE): del struct.property[prop]
[docs]def set_marker_properties(struct, rx_atoms, clear=True): """ Set the structure properties that store the atoms :type struct: `schrodinger.structure.Structure` :param struct: The structure with the Rx atoms :type rx_atoms: dict :param rx_atoms: keys are the int value of x in Rx, values are lists of atom indexes set to that Rx value (atom indexes are 1-based) :type clear: bool :param clear: Clear any existing marker properties before setting new ones """ if clear: clear_marker_properties(struct) max_x_val = 0 for xval, indexes in rx_atoms.items(): prop = ATOM_MARKER_PROP_BASE + str(xval) struct.property[prop] = '_'.join([str(x) for x in indexes]) if xval > max_x_val: max_x_val = xval struct.property[HIGHEST_RX_MARKER_XVAL] = max_x_val mark_eta_positions(struct, rx_atoms)
[docs]class EtaFFAssignmentError(Exception): """ Raised when a FF assignment error occurs """
[docs]class Ligand(object): """ Stores information about a ligand structure """
[docs] def __init__(self, struct, sites=None, slots=None): """ Create a Ligand object :type struct: `schrodinger.structure.Structure` :param struct: The ligand structure :type sites: list of tuple :param sites: Each item of the list is a (X, Y) tuple. X is the index of the atom that will attach to the central metal atom in the complex, and Y is the index of the atom that should be removed to make the attachment. The X-Metal bond will be made along the X-Y bond vector. If Y is 0, the bond will be assumed to be a dative bond, and the X-Metal bond will be formed along an angle that is chosen to minimize sterics. If X is negative, the site is an eta-coordination site. :type slots: list of int :param slots: The coordination slots this ligand will occupy. The coordination slot is the index into the GEOMETRY_LOCATIONS array that specifies the xyz coordinates for this ligand coordination. If not supplied, the slots will be supplied based on the isomer of the complex. """ self.struct = struct.copy() if sites: self.sites = sites[:] else: self.sites = [] if len(self.sites) == 2: self.type = BIDENTATE else: self.type = MONODENTATE self.has_eta_site = False for index, site in enumerate(self.sites[:]): if site[0] < 0: # This is an eta-coordination site self.has_eta_site = True if self.type == BIDENTATE: other = self.sites[abs(index - 1)] else: other = None self.sites[index] = self._findEtaSite(site, other_site=other) elif site[1] == 0: # No atom specified for removal, must find the correct bond # direction. self.sites[index] = self._findDativeSite(site) self.slots = slots if self.has_eta_site and self.type == BIDENTATE: self.minimizeEtaPosition()
def _doEtaMinimization(self, metal, xatoms): """ Run the forcefield minimization to create ideal binding geometries for an eta ligand :param `_StructureAtoms` metal: The metal atom :param list xatoms: A list of _StructureAtom objects whose neighbor atoms should be restrained to the metal atom during minimization :raise EtaFFAssignmentError: If the force field can't be assigned """ with minimize.minimizer_context() as mizer: # Set the structure and check for FF assignment errors try: mizer.setStructure(self.struct) except (ValueError, mm.MmException): raise EtaFFAssignmentError # Set up a bond between every eta atom and the metal. This will force # the eta-plane to minimize to a position face-on to the metal position for mark in xatoms: for neighbor in mark.bonded_atoms: # Tether the eta atoms to the metal mizer.addDistanceRestraint(neighbor.index, metal.index, 100., 2.1) # Also lock the marker atom in place relative to the eta atoms - # this forces the eta atoms to keep their geometry relative to # each other. mizer.addDistanceRestraint( neighbor.index, mark.index, 10000., self.struct.measure(neighbor, mark)) # We also need to lock in any non-eta binding site so that we bring # the two binding sites together. for xdex, ydex in self.sites: if xdex > 0: mizer.addDistanceRestraint(xdex, metal.index, 100., 2.1) mizer.minimize()
[docs] def minimizeEtaPosition(self): """ For bidentate eta ligands, orient the eta plane(s) to be face-on to roughly where the metal atom will be Eta sites are marked by two dummy atoms X and Y. X is located at the centroid of all eta atoms in the plane of the eta atoms. Y is located at roughly the site of the metal atom. """ # The y atom of the x,y site sits at the end of a vector that is normal # to the eta plane, roughly where the metal should go. yatoms = [self.struct.atom[y] for x, y in self.sites] # Change the temp element of non-eta, non-dative Y atoms to H. Eta and # dative Y atoms are already dummy atoms. We use H here because these Y # atoms are "real" atoms, so needed to fulfill atom valences. for atom in yatoms: if atom.element != 'DU': transmute_atom(atom, 'H') # Locate a dummy metal atom at the average of the y atom positions dcoords = [numpy.array(a.xyz) for a in yatoms] mcoords = 0.5 * (dcoords[0] + dcoords[1]) metal = self.struct.addAtom('C', *mcoords) transmute_atom(metal, 'DU') # Also change the x atoms of the x,y eta site to dummy atoms - they just # mark the centroid of the eta plane so should not be allowed to affect # minimization xatoms = [self.struct.atom[abs(x)] for x, y in self.sites if x < 0] for mark in xatoms: transmute_atom(mark, 'DU') # Ensure that no two X or Y atoms occupy the exact same location (this # will crash the minimizer). This could happen if the same set of atoms # forms two different eta bonds. xyatoms = xatoms + yatoms for iatom in xyatoms[:-1]: for jatom in xyatoms[1:]: if iatom.xyz == jatom.xyz: # Slightly shift one of the atoms a bit jatom.x += 0.02 jatom.y += 0.02 try: self._doEtaMinimization(metal, xatoms) except EtaFFAssignmentError: # Force field assignment failed, we are using the unminimized # structure with no good way to notify the user, but almost # certainly will result in errors later that will raise a warning. pass # Remove the metal atom and move the yatoms to the metal atom position # Note - for now there appears to be no reason to restore the marker # atoms to their previous element mcoords = metal.xyz self.struct.deleteAtoms([metal.index]) for atom in yatoms: atom.xyz = mcoords
def _findDativeSite(self, site): """ Normally, we'd put the X-Metal bond on the same vector as the X-Y bond vector, where Y is the atom that is being removed. However, for a dative X-Metal bond, there is no atom to remove and therefore, there is no predefined bond vector for the X-Metal bond. Find the best vector and place an atom there for removal later. :type site: tuple :param site: (X, Y) tuple indicating the index of atom X, which will be bonded to the metal atom. Y is ignored. :rtype: tuple :return: (X, Y) tuple, where Y is a new atom added on the vector that the X-Metal bond should be made. """ xatom = self.struct.atom[site[0]] vector = rxn_channel.find_good_bond_vector(xatom) xatom_xyz = xatom.xyz[:] return (site[0], self._addPhantomMarkerAtom(xatom_xyz, vector)) def _addPhantomMarkerAtom(self, partner_xyz, vector, other_site=None): """ Add an atom at the predetermined marker site. This atom represents where the metal atom should be placed relative to the ligand. :type partner_xyz: list :param partner_xyz: The x, y, z coordinates of the atom this phantom atom will be attached to :type vector: list :param vector: The x, y, z displacement from partner_xyz to place the phantom atom at. See also other_site. :type other_site: tuple :param other_site: An (X, Y) tuple of atom indexes for the other binding site in a bidentate ligand. The X atom index is used to choose the vector direction (either + or - the vector displacement) that puts this phantom atom closer to the other site. Used for eta-coordination. The Y index is ignored and may be None """ coordinates = numpy.array( [partner_xyz[a] + vector[a] for a in range(3)]) # For eta coordination, vector is just the normal to the plane - we # don't know which face of the plane it should point away from. Pick the # direction that points the vector closer to the other binding site if other_site: # The marker position if we choose the other direction alt_coords = numpy.array( [partner_xyz[a] - vector[a] for a in range(3)]) # We want the marker position closer to the other binding site anchor = numpy.array(self.struct.atom[abs(other_site[0])].xyz) c_dist = numpy.linalg.norm(anchor - coordinates) alt_dist = numpy.linalg.norm(anchor - alt_coords) if alt_dist < c_dist: coordinates = alt_coords # Add a dummy atom as this atom doesn't really exist and an actual atom # will mess up any future minization of this ligand. newobj = self.struct.addAtom('DU', *coordinates) return newobj.index def _findEtaSite(self, site, other_site=None): """ Determine the vector from the centroid of the eta-coordinating atoms to the metal atom. Mark this vector in the structure with a new atom positioned where we determine the metal atom should be. :type site: tuple :param site: (X, Y) tuple indicating the index of atom X, which is the centroid of the eta-coordinating atoms. Y is ignored. :type other_site: tuple :param other_site: An (X, Y) tuple of atom indexes for the other binding site in a bidentate ligand. The X atom index is used to choose the vector direction (either + or - the vector displacement) that puts this sitecloser to the other site. The Y index is ignored and may be None. :rtype: tuple :return: (X, Y) tuple, where Y is a new atom added on the vector that the X-Metal bond should be made. """ # Eta sites are noted with a negative X in the (X, Y) site tuple xatom = self.struct.atom[-site[0]] eta_atoms = list(xatom.bonded_atoms) centroid = transform.get_centroid( self.struct, atom_list=[x.index for x in eta_atoms])[:3] # All these algorithms work only if xatom is located at the centroid of # the eta atoms, so move it to make it so. xatom.xyz = centroid plane_atoms = list(eta_atoms) # Find the best fit plane of the eta-coordinated atoms normal = None while normal is None: # OK - for normal eta-ligands such as benzene or Cp, this is easy to # find a plane - just fit a plane to the atoms directly involved in # the coordination. The most common failure is going to be when # ethene or ethyne are eta-coordinated as we can't fit a plane to # only two atoms. For ethene, if we expand the atoms we are fitting # to those bonded to the eta-coordinated atoms, we can fit a plane # nicely. So the algorithm is going to be: try to fit a plane to the # eta atoms. Each time we fail, expand out the set of atoms by the # bonded neighbors and try again until the entire ligand has been # tried. If we still fail, it's probably a linear ligand. coords = numpy.array([x.xyz for x in plane_atoms]) try: normal = measure.fit_plane_to_points(coords) except (ValueError, measure.LinearError, numpy.linalg.LinAlgError): # Unable to fit a point due to the atoms being linear or some # other unknown fitting error. Expand out the set of fit atoms # by one bond. current = len(plane_atoms) for atom in list(plane_atoms): for neighbor in atom.bonded_atoms: if neighbor not in plane_atoms and neighbor != xatom: plane_atoms.append(neighbor) if len(plane_atoms) == current: # We added no new atoms, this algorithm has failed to find a # plane break if normal is None: # We were never successful in fitting a plane normal = self._stupidlyGuessEtaNormal(xatom, eta_atoms) return (site[0], self._addPhantomMarkerAtom(centroid, normal, other_site=other_site)) def _stupidlyGuessEtaNormal(self, xatom, eta_atoms): """ Fallback attempt at finding the eta binding direction if we can't fit a plane to the eta-coordinating atoms. We simply pick a binding direction that forms a 90 degree A-X-Y angle, where Y is the atom we are adding to define this direction, X is the eta atom centroid marker and A in an arbitrarily chosen eta atom (the first one in the list of atoms bonded to X). Since by far and away the most likely failure to fit a plane is due to all eta atoms being arranged in a line, this stupid guess is fine. :type xatom: `schrodinger.structure._StructureAtom` :param xatom: The atom that marks the centroid of the eta atoms :type eta_atoms: list :param eta_atoms: The eta-coordinating atoms. Each item is a _StructureAtom instance. :rtype: `numpy.array` :return: The vector that gives the direction of the eta binding direction """ # Craft a vector that is not parallel to the vector running from xatom # to one of the eta atoms. Take the cross product of that vector with # the xatom-eta vector. The resulting vector is perpendicular to # xatom-eta. xa_coords = numpy.array(xatom.xyz) for etom in eta_atoms: # Find an atom that isn't at the exact same location as xatom eta_coords = numpy.array(eta_atoms[0].xyz) eta_vec = xa_coords - eta_coords if any([bool(x) for x in eta_vec]): break random_vec = numpy.array([eta_vec[2], eta_vec[0], eta_vec[1]]) perpendicular = numpy.cross(eta_vec, random_vec) return transform.get_normalized_vector(perpendicular)
[docs]class ComplexBuilder(object): """ A class used to build an organometallic complex """
[docs] def __init__(self, metal='Ir', geometry=OCTAHEDRAL, isomer=FACIAL, homoleptic=True, dentation=BIDENTATE): """ Create a ComplexBuilder instance :type metal: str :param metal: The atomic symbol of the central atom :type geometry: str :param geometry: VESPR geometry of the complex. Should be a module constant: OCTAHEDRAL, TETRAHEDRAL, SQUARE_PLANAR :type isomer: str or None :param isomer: For octahedral complexes, can be module constants FACIAL, MERIDIONAL, or NO_ISOMER. For square planar complexes, can be module constants CIS, TRANS or NO_ISOMER. It is ignored for tetrahedral. None may be used instead of NO_ISOMER. :type homoleptic: bool :param homoleptic: If True, the complex is homoleptic and only one ligand should be supplied. If False, the complex is heteroleptic and every ligand must be supplied. Homoleptic = all ligands are identical, heteroleptic = ligands may or may not be identical. :type dentation: int :param dentation: Module-level constant describing the dentation type of the ligand - either MONODENTATE or BIDENTATE. Only used to determine the coordination slot order (the order coordination sites are filled) for isomers. """ self.metal = metal # Create a new structure with the proper central atom if self.metal not in atomicsymbols.ATOMIC_SYMBOLS: raise ValueError('%s is not a known atomic symbol. A valid symbol ' 'is required for the metal.' % self.metal) if isomer is None: isomer = NO_ISOMER if isomer not in ALLOWED_ISOMERS[geometry]: raise ValueError('Isomer %s is not allowed for %s complexes' % (isomer, geometry)) self.geometry = geometry self.isomer = isomer self.resetSlots(dentation=dentation) self.homoleptic = homoleptic self.ligands = [] self.used_site_count = 0
[docs] def resetSlots(self, dentation=BIDENTATE): """ Reset the slot order back to ideal slot order :type dentation: int :param dentation: Module-level constant describing the dentation type of the ligand - either MONODENTATE or BIDENTATE """ self.ideal_slots = IDEAL_SLOTS[self.geometry] self.slot_order = SLOT_ORDER[self.geometry][self.isomer][dentation]
[docs] def setSlotOrder(self, slot_order): """ Set the order that coordination sites should be used. This should be a list of indexes into the slot_order property. Ligands will be attached at these coordination sites in the order they are added. :type slot_order: list of int :param slot_order: List of indexes that specifies the order of coordination sites to use. :raise IndexError: If the list is not the correct length (6 for octahedral, 4 for tetrahedral/square_planar). An example for square_planar might be [0, 2, 1, 3]. :raise ValueError: If the list contains duplicated indexes or indexes outside the allow range of 0 to len(list)-1 """ slots = len(self.slot_order) if len(slot_order) > slots: raise IndexError('Too many slots requested - should be no more than' ' %d.' % slots) elif len(set(slot_order)) != len(slot_order): raise ValueError('Slot order list may not contain duplicate values') elif (any([x < 0 for x in slot_order]) or any([x > slots - 1 for x in slot_order])): raise ValueError('The allowed range of values is 0 to %d' % (slots - 1)) self.slot_order = slot_order[:]
[docs] def getNumUsedCoordSites(self): """ Get the current number of coordination sites required for all copies of all ligands set so far. :rtype: int :return: The total number of sites required for all currently set ligands. Accounts for the number of copies requested and mono/bi-dentation of each ligand. """ if not self.ligands: return 0 if self.homoleptic: return len(self.ideal_slots) return sum([len(x.sites) for x in self.ligands])
[docs] def addMonodentateLigand(self, struct, site, slot=None, copies=1): """ Add a monodentate ligand for the complex. :type struct: `schrodinger.structure.Structure` :param struct: The structure of the ligand :type site: tuple :param site: An (X, Y) tuple. X is the index of the atom that will attach to the central metal atom in the complex, and Y is the index of the atom that should be removed to make the attachment. The X-Metal bond will be made along the X-Y bond vector. If Y is 0, the bond will be assumed to be a dative bond, and the X-Metal bond will be formed along an angle that is chosen to minimize sterics. If X is negative, the site is an eta-coordination site. :type slot: int :param slot: The coordination slot this ligand will occupy. The coordination slot is the index into the GEOMETRY_LOCATIONS array that specifies the xyz coordinates for this ligand coordination. :type copies: int :param copies: The number of copies of this ligand. It is a ValueError to specify slot & copies > 1. """ if slot is not None: slot = [slot] self._addLigand(struct, [site], slots=slot, copies=copies)
[docs] def addBidentateLigand(self, struct, sites, slots=None, copies=1): """ Add a bidentate ligand for the complex. :type struct: `schrodinger.structure.Structure` :param struct: The structure of the ligand :type sites: list of tuple :param sites: Each item of the list is a (X, Y) tuple. X is the index of the atom that will attach to the central metal atom in the complex, and Y is the index of the atom that should be removed to make the attachment. The X-Metal bond will be made along the X-Y bond vector. If Y is 0, the bond will be assumed to be a dative bond, and the X-Metal bond will be formed along an angle that is chosen to minimize sterics. If X is negative, the site is an eta-coordination site. :type slots: list of int :param slots: The coordination slots this ligand will occupy. The coordination slot is the index into the GEOMETRY_LOCATIONS array that specifies the xyz coordinates for this ligand coordination. :type copies: int :param copies: The number of copies of this ligand. It is a ValueError to specify slot & copies > 1. """ self._addLigand(struct, sites, slots=slots, copies=copies)
def _addLigand(self, struct, sites, slots=None, copies=1): """ Add a ligand to the complex. :type struct: `schrodinger.structure.Structure` :param struct: The structure of the ligand :type sites: list of tuple :param sites: Each item of the list is a (X, Y) tuple. X is the index of the atom that will attach to the central metal atom in the complex, and Y is the index of the atom that should be removed to make the attachment. The X-Metal bond will be made along the X-Y bond vector. If Y is 0, the bond will be assumed to be a dative bond, and the X-Metal bond will be formed along an angle that is chosen to minimize sterics. If X is negative, the site is an eta-coordination site. :type slots: list of int :param slots: The coordination slots this ligand will occupy :type copies: int :param copies: The number of copies of this ligand. It is a ValueError to specify slot & copies > 1. """ if slots is not None and copies > 1: raise ValueError('The slot cannot be specified if copies > 1') if self.ligands and self.homoleptic: raise IndexError('Only one ligand allowed for homoleptic ' 'complexes.') if self.homoleptic and copies > 1: copies = 1 if slots is not None: for slot in slots: for ligand in self.ligands: if slot in ligand.slots: raise ValueError('Added ligand is trying to occupy a ' 'slot (%d) that is already occupied' % slot) for copy in range(copies): self.ligands.append(Ligand(struct, sites=sites, slots=slots))
[docs] def clearLigands(self): """ Remove all added ligands """ self.ligands = []
[docs] def createComplex(self, force=False): """ Create the complex based on the defined ligands :type force: bool :param force: If true, create a complex even if all slots are not filled. If False (default), raise IndexError if all slots are not filled. :raise IndexError: If not all sites are filled and force is not True :raise IndexError: Too many ligands specified for available sites """ self.used_site_count = 0 sites = len(self.ideal_slots) if self.getNumUsedCoordSites() < sites and not force: raise IndexError('Not all coordination sites are filled.') elif self.getNumUsedCoordSites() > sites: raise IndexError('There are too many ligands specified for the ' 'number of coordination sites') complex = structure.create_new_structure(0) # The roundabout method used below (create a carbon, mutate the carbon) # is used because the direct method (addAtom(self.metal, 0, 0, 0)) # throws "WARNING mmat_get_atomic_num: 0 is not a valid atom" SHARED-617 atomobj = complex.addAtom('C', 0.0, 0.0, 0.0) transmute_atom(atomobj, self.metal) for ligand in self.ligands: if self.homoleptic: for index in range( old_div(len(self.ideal_slots), len(ligand.sites))): self._attachLigand(complex, ligand) else: self._attachLigand(complex, ligand) return complex
def _getNextSlot(self): """ Return the next slot for a coordinating atom :rtype: tuple :return: The XYZ coordinates for the next coordinating atom """ slot = self.ideal_slots[self.slot_order[self.used_site_count]] self.used_site_count = self.used_site_count + 1 return slot def _getAttacherMetalVector(self, struct, ligand, attachers, markers, is_eta): """ Get the vector that points from the first attachment atom to the metal. By default, this will be the vector pointing from the attaching atom to the atom being removed. However, for bidentate ligands there's a chance this removing atom isn't in a good binding location. For those ligands, we find a better vector by rotating the A1-M vector (see below) about the P-A1 vector until it is in the A2 P A1 plane and pointed towards A2. We want to turn cases like this:: M P-A1 A2 into this:: P-A1 A2 M Find the dihedral between M-A1-P-A2 where M is the marker atom (which should lie on the A1-metal vector), A1 is the first attachment atom, P is an atom on the path from A1 to A2 and A2 is the second attachment atom :type struct: `schrodinger.structure.Structure` :param struct: The structure containing the atoms involved :type ligand: `Ligand` :param ligand: The Ligand for this structure :type attachers: list :param attachers: Each item is one of the atoms attaching to the metal. The first atom in the list is A1. Items are _StructureAtom objects :type markers: list :param markers: Each item is one of the atoms marking the attacher-metal bond direction. Items are _StructureAtom objects. :type is_eta: list :param is_eta: True/False for each attacher as to whether that attacher is for an eta-coordination. :rtype: numpy.array :return: The XYZ components of the vector pointing from the first attachement atom to where the metal should go """ a1 = attachers[0] matom = markers[0] a1xyz = numpy.array(a1.xyz) mxyz = numpy.array(matom.xyz) if ligand.type != BIDENTATE or is_eta[0]: return mxyz - a1xyz a2 = attachers[1] a2xyz = numpy.array(a2.xyz) # Determine what atom to use for P path = analyze.find_shortest_bond_path(struct, a1.index, a2.index) patom = struct.atom[path[1]] if patom == a2: # There are no atoms between A1 and A2, M-A1-A2 are planar by # definition return mxyz - a1xyz pxyz = numpy.array(patom.xyz) # Find the rotation matrix that will move the A1-M vector into the # A2-P-A1 plane. There are two of them, they differ by 180 degree # rotation torsion = numpy.radians( measure.measure_dihedral_angle(matom, a1, patom, a2)) at1_p_vec = transform.get_normalized_vector(pxyz - a1xyz) matrix1 = transform.get_rotation_matrix(at1_p_vec, torsion) matrix2 = transform.get_rotation_matrix(at1_p_vec, -torsion) # Rotate the A1-M vector in both directions and pick the one that puts # M closest to A2. at1_m_vec = mxyz - a1xyz coords1 = transform.transform_atom_coordinates(at1_m_vec, matrix1) coords2 = transform.transform_atom_coordinates(at1_m_vec, matrix2) dist1 = numpy.linalg.norm(coords1 - a2xyz) dist2 = numpy.linalg.norm(coords2 - a2xyz) if dist1 < dist2: return coords1 else: return coords2 def _attachLigand(self, complex_st, ligand): """ Attach a ligand to the central atom. The ligand will be translated/rotated to occupy the chosen coordination sites. :type complex_st: `schrodinger.structure.Structure` :param complex_st: The Structure object to add the ligand to. :type ligand: `Ligand` :param ligand: The Ligand object to attach :raise ValueError: Too many metal-to-ligand bonds """ struct = ligand.struct.copy() # Must run this on the copied structure so that the markers are atom # objects for the copied structure and not original attachers = [] mymarkers = [] to_delete = [] is_eta = [] for attacher, marker in ligand.sites: if attacher < 0: is_eta.append(True) attacher = -attacher else: is_eta.append(False) attachers.append(struct.atom[attacher]) markobj = struct.atom[marker] mymarkers.append(markobj) # Set the marker and any bonded non-attachment atoms for deletion to_delete.extend(find_atoms_to_remove(struct, attacher, marker)) # First move the attachment atom to the origin mat = attachers[0] transform.translate_structure(struct, x=-mat.x, y=-mat.y, z=-mat.z) # Now rotate the ligand correctly in a series of steps # First rotate the marker atom onto the Lig-M bond vector attach_slot1 = self._getNextSlot() ligm_vector = numpy.array([-x for x in attach_slot1]) ligm_norm = transform.get_normalized_vector(ligm_vector) marker_vector = self._getAttacherMetalVector(struct, ligand, attachers, mymarkers, is_eta) rot_matrix = transform.get_alignment_matrix(marker_vector, ligm_norm) transform.transform_structure(struct, rot_matrix) if ligand.type == BIDENTATE: # Rotate the 2nd attachment point so that it lies on the # M-slot1-slot2 plane. We rotate the attach1-attach2 vector about # the Lig-M bond vector. Note that it is important that attach1 be # at the origin and attach1-marker1 be on the Lig-M vector. # attach_slot2 = self._getNextSlot() # Project out the component of the attach1-attach2 vector that lies # along the axis we are going to rotate around, then find the angle # to slot 2. Projection out the component of vector u out of vector # v is v(projected) = v - u*(v.u)/(u.u) pvec = numpy.array(attachers[1].xyz[:]) dot1 = numpy.dot(pvec, ligm_vector) dot2 = numpy.dot(ligm_vector, ligm_vector) proj_vect = pvec - (old_div(dot1, dot2)) * ligm_vector if any(proj_vect): slot_vect = numpy.array(attach_slot2) # Find the angle between the projected vector and the desired # slot angle = transform.get_angle_between_vectors( proj_vect, slot_vect) else: # The attach1-attach2 vector is perfectly parallel with the # rotation axis. This is messed up. MATSCI-9212. This was caused # by trying to use a linear, two-atom ligand as a bidentate # ligand spanning two coordination sites that were themselves # linear (trans-square-planar). Just don't do any rotation since # the ligand is already aligned along the proper vector. angle = 0.0 # We know the angle, but not the direction of rotation. Pick the # direction that rotates attach2 into the desired plane. # Try the first way a1a2_vector = transform.get_normalized_vector( numpy.array(attachers[1].xyz)) rot_matrix1 = transform.get_rotation_matrix(ligm_norm, angle) check1 = transform.transform_atom_coordinates( a1a2_vector.copy(), rot_matrix1) # Now try the other way rot_matrix2 = transform.get_rotation_matrix(ligm_norm, -angle) check2 = transform.transform_atom_coordinates( a1a2_vector.copy(), rot_matrix2) # Pick the direction that puts attach2 closest to slot2 dist1 = sum([(a - b)**2 for a, b in zip(check1, attach_slot2)]) dist2 = sum([(a - b)**2 for a, b in zip(check2, attach_slot2)]) if dist1 < dist2: rot_matrix = rot_matrix1 else: rot_matrix = rot_matrix2 # Rotate the entire structure transform.transform_structure(struct, rot_matrix) # Now rotate the ligand about the origin so that the attach1-attach2 # vectory is aligned with the slot1-slot2 vector. This will # probably move the attach1-marker vector off the ligand-M vector # (the very first transformation we did), but that's OK. The more # important thing is to try to get the attach1-attach2 vector # correct. attach1 is at the origin (0, 0, 0) a1a2_vector = transform.get_normalized_vector( numpy.array(attachers[1].xyz)) slot_vect = transform.get_normalized_vector( numpy.array(attach_slot2) - numpy.array(attach_slot1)) rot_matrix = transform.get_alignment_matrix(a1a2_vector, slot_vect) transform.transform_structure(struct, rot_matrix) # Shift the ligand so the first attachment atom is at the first slot coords = struct.getXYZ(copy=False) for index in range(struct.atom_total): for axis in range(3): coords[index, axis] = coords[index, axis] + attach_slot1[axis] for index, atom in enumerate(attachers): atom.property[ATTACHMENT_PROPERTY] = int(is_eta[index]) # Delete the marker atom and attached hygrogens struct.deleteAtoms(to_delete) # Add the ligand and attachment bonds complex_st.extend(struct) # Ensure neutral metal and coordination sphere - MATSCI-2756 metal = complex_st.atom[1] metal.formal_charge = 0 # Create bonds and zero out formal charges for atoms attached to metal eta_centroids = [] for atom in complex_st.atom: if ATTACHMENT_PROPERTY in atom.property: is_eta = bool(atom.property[ATTACHMENT_PROPERTY]) if is_eta: # For eta coordination, the "attacher" is just a dummy at # the centroid of the actual attaching atoms. We bond the # metal to all the atoms bonded to the attacher and then # delete the attacher for neighbor in atom.bonded_atoms: neighbor.formal_charge = 0 if complex_st.atom[1].bond_total == mm.MMCT_MAXBOND: msg = ( f'The maximum number of allowed bonds, {mm.MMCT_MAXBOND}, ' 'to the metal has been exceeded.') raise ValueError(msg) complex_st.addBond(1, neighbor.index, 0) eta_centroids.append(atom.index) else: atom.formal_charge = 0 complex_st.addBond(1, atom.index, 0) # Set this property to False so it is not accidently used in # some future ligand addition del atom.property[ATTACHMENT_PROPERTY] # Delete any dummy eta-coordination centroids if eta_centroids: complex_st.deleteAtoms(eta_centroids) # Fix the bond orders fix_metal_bond_orders(complex_st, 1)
[docs]class NoMetalError(Exception): """ Emitted when a metal is not found when one is expected """ pass
[docs]class EtaFindingMixin(object): """ A mixin with a method for finding eta ligands in a metal complex """
[docs] def findEtaGroups(self, dummy_style=True): """ Find each Eta group We define an Eta group as 2 or more atoms that are bound together and also bound to a metal atom :type dummy_style: bool :param dummy_style: Whether to also find eta ligands that are bound by bonding all the ligand atoms to a dummy and then the dummy to the metal. If False, only those ligands that have all eta atoms bound directly to the metal will be found. :note: The function assumes that the self.metals property is set to a list of metal atoms and it creates the self.eta_groups and self_all_eta_atoms properties """ self.eta_groups = [] self.all_eta_atoms = set() metal_set = set(self.metals) for metal in self.metals: # For all non-metal atoms bound to this metal, iteratively group # them into lists of atoms that are connected by bonds to other # atoms bound to the same metal (i.e. eta-ethene or eta-Cp) all_binders = set(metal.bonded_atoms) - metal_set remaining_binders = all_binders.copy() # While there are atoms not yet grouped while remaining_binders: # Grab a random atom to start the group batom = remaining_binders.pop() if batom.atomic_number == -2 and dummy_style: # Dummy atoms bound to a metal may indicate an eta group # bound to the dummy that is then bound to the metal. group = [x for x in batom.bonded_atoms if x != metal] remaining_binders = remaining_binders.difference(group) dm_style_dummy = batom else: group = [] group.append(batom) gindex = 0 # While there are still atoms left in the group whose # neighbors we haven't checked yet while gindex < len(group): for neighbor in group[gindex].bonded_atoms: if neighbor in remaining_binders: # Found a neighbor that is part of this group # (binds to the same metal atom) remaining_binders.remove(neighbor) group.append(neighbor) gindex += 1 dm_style_dummy = None # Groups of len = 1 are just normal ligand binding sites if len(group) > 1: # dm_style_dummy is the dummy atom bound to the metal for # dummy_style eta ligands, and None for all-atom style eta # ligands. egrp = SimpleNamespace(metal=metal, atoms=group, dm_style_dummy=dm_style_dummy) self.eta_groups.append(egrp) self.all_eta_atoms.update(group)
[docs]class ComplexSplitter(EtaFindingMixin): """ Splits a metal complex into a set of ligand structures that bind to the metal """ METAL_BINDER_PROP = 'i_matsci_binding_metal'
[docs] def __init__(self, struct, asl='metals', metals=None): """ Create a ComplexSplitter instance :type struct: `schrodinger.structure.Structure` :param struct: The organometallic complex :type asl: str :param asl: The ASL for finding metal atoms. Ignored if metals is given :type metals: list of `schrodinger.structure._StructureAtom` :param metals: Each item is a metal atom to search for binding ligands. Overrides the asl argument. """ self.struct = struct.copy() msutils.remove_atom_property(self.struct, self.METAL_BINDER_PROP) if metals: self.metals = metals else: self.metals = [ struct.atom[x] for x in analyze.evaluate_asl(struct, asl) ] if not self.metals: raise NoMetalError() self.binding_atoms = set() self.eta_groups = []
[docs] def findBindingAtoms(self): """ Make a list of all atoms that bind to metal atoms """ self.binding_atoms = set() metal_set = set(self.metals) for metal in self.metals: for neighbor in metal.bonded_atoms: if neighbor in metal_set: continue # Note that this doesn't account for atoms that may bridge two # metal atoms. This is currently not an issue, but may need to # be modified in the future. neighbor.property[self.METAL_BINDER_PROP] = metal.index self.binding_atoms.add(neighbor)
[docs] def addDummyAtoms(self): """ Add a dummy atom to each binding atom. For eta ligands, a single dummy atom is added at the centroid of the eta atoms. For non-eta ligands, a dummy atom is added along the atom-metal bond vector. """ binders = self.binding_atoms.copy() for eta_group in self.eta_groups: self.addEtaDummy(eta_group) for atom in eta_group.atoms: # Discard rather than remove because for dummy_style eta # bonding, the eta atoms are bound to a dummy and the dummy # atom is bound to the metal, so the eta_group atoms might not # be in the list of binders binders.discard(atom) if eta_group.dm_style_dummy: binders.discard(eta_group.dm_style_dummy) for atom in binders: self.addBinderDummy(atom)
[docs] def addEtaDummy(self, group): """ Put a dummy atom at the centroid of the haptic ligand :type group: list :param group: A list of atom objects that form the haptic ligand """ if len(group.atoms) == 1: # This shouldn't happen, and would cause a dummy atom at the exact # location of a real atom and that can cause FFLD errors when trying # to minimize. raise RuntimeError('Eta ligands must have more than one binding ' 'atom') indexes = [x.index for x in group.atoms] centroid = transform.get_centroid(self.struct, atom_list=indexes)[:3] dummy = self.struct.addAtom('Du', *centroid) for atom in group.atoms: self.struct.addBond(dummy, atom, structure.BondType.Single)
[docs] def addBinderDummy(self, atom): """ Add a dummy atom on the atom-metal bond vector that will indicate the proper bond direction after the metal atom is deleted. :type atom: `schrodinger.structure._StructureAtom` :param atom: An atom that is bound to the metal """ metal = self.struct.atom[atom.property[self.METAL_BINDER_PROP]] atom_xyz = numpy.array(atom.xyz) # place the dummy atom in the same line that marks the ligand->metal # bond, but not quite on the metal atom. This avoids both the dummy # atoms for a bidentate ligand overlapping completely. vec = 0.9 * (atom_xyz - numpy.array(metal.xyz)) coords = atom_xyz - vec dummy = self.struct.addAtom('Du', *coords) self.struct.addBond(dummy, atom, structure.BondType.Single)
[docs] def createLigandStructures(self): """ Create individual structures for each ligand. A ligand is defined as a molecule that remains intact after deleting the central metal atom. """ ligands_only_st = self.struct.copy() # Also delete any pre-existing dummy atom that marked an eta ligand dummy_style_dummies = [ x.dm_style_dummy for x in self.eta_groups if x.dm_style_dummy is not None ] ligands_only_st.deleteAtoms(self.metals + dummy_style_dummies) self.ligands = [] for mol in ligands_only_st.molecule: lig = mol.extractStructure() msutils.remove_atom_property(lig, self.METAL_BINDER_PROP) self.markRAtomValues(lig) self.ligands.append(lig)
[docs] @staticmethod def markRAtomValues(struct): # Lazy import of builderwidgets to avoid circular importing ratoms = [x for x in struct.atom if x.atomic_number == -2] if len(ratoms) == 2: # The default sorting is just the sum of the atomic weights # of all the neighboring binding atoms - this orders C before N, # single atoms before eta groups and smaller eta groups before # larger ones. numsum = {} for atom in ratoms: numsum[atom] = sum(x.atomic_weight for x in atom.bonded_atoms) ratoms.sort(key=lambda x: numsum[x]) # Covalently bound atoms should come first scopy = struct.copy() dative = structure.BondType.Dative for index, atom in enumerate(ratoms): batoms = list(atom.bonded_atoms) # Find the binding atom. Eta groups have no specific binding # atom. if len(batoms) > 1: # Eta group continue binder = batoms[0] # Change bonds to dative to avoid having the Rx bond take up a # valence slot scopy.getBond(atom.index, binder.index).type = dative binder.formal_charge = 0 open_sites = rxn_channel.open_bonding_sites(scopy, binder.index) if open_sites: # This will covalently bond, move it first (note that # abs(index-1) always equals the other list index) ratoms = [ratoms[index], ratoms[abs(index - 1)]] break rx_values = {x: [y.index] for x, y in enumerate(ratoms, 1)} set_marker_properties(struct, rx_values)
[docs] def splitIntoLigands(self): """ Split the metal complex into ligands :rtype: list :return: A list of `schrodinger.structure.Structure` objects, each one represents a unique ligand from the original complex. The ligands will have binding sites to the metal marked with dummy atoms """ self.findBindingAtoms() self.findEtaGroups() self.addDummyAtoms() self.createLigandStructures() return self.getUniqueLigands(self.ligands, title=self.struct.title)
[docs] @staticmethod def getUniqueLigands(ligands, title=None): """ Remove duplicate ligands :type ligands: list :param ligands: A list of `schrodinger.structure.Structure` objects, each one represents a ligand. :rtype: list :return: A list of `schrodinger.structure.Structure` objects, taken from the input ligands and with duplicates removed. """ species = clusterstruct.find_species(ligands) structs = [] for index, spec in enumerate(species.values(), 1): stindex, molnumber = list(spec.members)[0] struct = ligands[stindex] if title: struct.title = title + '-L' + str(index) structs.append(struct) return structs
[docs]class DuplicateCheckError(Exception): """ Raised internally when checking complexes for duplicates """
[docs]def are_duplicates(ref, comp, tolerance=1.0, check_optical=True): """ Check if both compounds are duplicate structures. Uses canonical SMILES to detect structures with the same chemical bonding, then Works via actual XYZ coordinates to determine if the structures are duplicates. This is slow, but works for metal complexes. Note: Requires compounds with reasonable (or consistent) 3D coordinates Note: Rotomers of haptic ligands about the haptic axis are generally found to be different compounds with the default tolerance of 1.0 Note: Has been tested and seems to work with coordination numbers of 3-6, and monodentate, bidentate and haptic ligands :param `schrodinger.structure.Structure` ref: The reference complex :param `schrodinger.structure.Structure` comp: The structure to compare to the reference :param float tolerance: The maximum displacement of any one atom before the compounds are considered different complexes :param bool check_optical: If True, check for optical isomers (and consider them duplicates) by inverting comp about the first atom (assumed to be the metal atom) and performing the same RMSD check against ref. If False, no check is made. :rtype: bool :return: True if the compounds are found to be identical, False if not :raise ValueError: If there are fewer than 3 usable atoms for the superposiiton """ def are_similar(ref, comp, ref_smiles, asl): """ Check to see if the two given structures are similar :param structure.Structure ref: The reference structure :param structure.Structure comp: The structure to compare to ref :param str ref_smiles: The canonical SMILES for ref :param str asl: The asl to pass into the ComplexConformerRMSD class :rtype: bool :return: Whether the structures are similar in 3D space or not """ comp_smiles = analyze.generate_smiles(comp) # If the SMILES aren't the same, they can't be similar structures if ref_smiles != comp_smiles: return False # Raises ValueError if there are fewer than 3 atoms that don't match asl # the asl crmsd = ComplexConformerRmsd(ref, test_structure=comp, in_place=False, asl_expr=asl) try: rms = crmsd.calculate() except DuplicateCheckError: return False maxd = crmsd.max_distance return maxd <= tolerance # Form an asl that excludes hydrogens of common rotors rotors = ('CH3', 'NH2', 'OH', 'SiH3', 'PH2', 'SH') rotor_asl = ' or '.join(f'smarts.[H][{x}]' for x in rotors) asl = (f'not (atom.ele H and ( {rotor_asl} ))') ref_smiles = analyze.generate_smiles(ref) if are_similar(ref, comp, ref_smiles, asl): return True if check_optical: # Invert the entire molecule about the metal atom (assumed to be atom # #1) and then re-check the RMSD. This will catch chiral (optical) # isomers and consider them identical optiso = comp.copy() move_to_center = [-a for a in optiso.atom[1].xyz] transform.translate_structure(optiso, *move_to_center) optiso.setXYZ(optiso.getXYZ() * numpy.array([-1, -1, -1])) return are_similar(ref, optiso, ref_smiles, asl) return False
[docs]class ComplexConformerRmsd(rmsd.ConformerRmsd):
[docs] def renumberWorkingStructures(self, has_hydrogens): """ Renumber the working structure so that it has atoms in the same order as the reference structure :param has_hydrogens: Ignored """ # We use wantAllH here because some H might be removed by the asl_expr # argument to the class init method (as is done in the are_duplicates # function). If we don't use wantAllH, then getSmilesAndMap only returns # a map for heavy atoms, and we need to renumber any remaining hydrogens # as well. smiles_gen = smiles.SmilesGenerator(unique=True, wantAllH=True) ref_smi, ref_order = smiles_gen.getSmilesAndMap(self._working_ref_st) test_smi, test_order = smiles_gen.getSmilesAndMap(self._working_test_st) if ref_smi != test_smi: raise DuplicateCheckError('SMILES are not identical, these do not ' 'appear to be conformers') # ref_order is a list of ints, each item gives the atom index of the # atom in ref for that position in the SMILES pattern. For instance, # ref_order[0] gives the atom index of the first atom in the SMILES # string. Same for test_order, except that list is for the test # structure. So ref_order.index(atom.index) gives the position in the # SMILES string of atom. So now we create a mapping from a ref atom # index to the test atom index that appears at the same position in the # SMILES string. ref_to_test = {r: t for r, t in zip(ref_order, test_order)} new_order = [ ref_to_test[x] for x in range(1, self._working_ref_st.atom_total + 1) ] self._working_test_st = build.reorder_atoms(self._working_test_st, new_order)
# For running outside of Maestro: if __name__ == '__main__': pass