Source code for schrodinger.structutils.rmsd

"""
Functionality for calculating conformer RMSDs.

Copyright Schrodinger, LLC. All rights reserved.

"""

import warnings

import numpy

import schrodinger.utils.log as log
from schrodinger.analysis import substructure
from schrodinger.infra import mm
from schrodinger.infra import mmlist
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import transform

smiles = None
logger = log.get_logger('rmsd')
logger.setLevel(log.WARNING)

# For superimpose() function:
MOLECULES = mm.MMSUPER_MOLECULES
ATOMS = mm.MMSUPER_ATOMS
CT = mm.MMSUPER_CT


[docs]class DisabledSymmetryWarning(RuntimeWarning): """ class for ConformerRmsd warning about Disabling use_symmetry """
[docs]def calculate_in_place_rmsd(st1, at_list1, st2, at_list2, use_symmetry=False, weights=None): """ :return: Atomic coordinate rmsd between structures. :rtype: float :param st1: Reference structure. :type st1: structure.Structure :param at_list1: List of atom index integers to consider. This must be the same length as at_list2. The coordinates from st1 and st2 are mapped in index order. :type at_list1: list :param st2: Test structure. Must be a conformer of st1 if use_symmetry is True. :type st2: structure.Structure :param at_list2: List of atom index integers to consider. This must be the same length as at_list1. The coordinates from st1 and st2 are mapped in index order. :type at_list2: list :param use_symmetry: Adjust at_list2 index order such that it is optimized with regard to molecular symmetry. The default is False for backwards compatibility, accounting for molecular symmetry is usually desireable. :type use_symmetry: boolean :param weights: A list of weights, which must be the same length as at_list1. The weights in this list pertain to the relative importance to assign to the position deviations of atoms in at_list2 from atoms in at_list1. The weights list is assumed to contain positive values. :raises: ValueError if the shape of the coordinate matrices is not the same, or if the length of the weights array is not the same at the length of the coordinate array. TypeError if the data type of the weights is not float. :note: The input structures are expected to be conformers. See also `ConformerRmsd` which supports calculating the RMSD of a common conformer atom subset specified by ASL with non-conformer input structures. """ # Adjust the second atom list if needed. if use_symmetry: at_list2 = get_optimal_atom_mapping(st1, at_list1, st2, at_list2, in_place=True) # Create indexing vectors for the coordinate arrays. atom_selection1 = numpy.array(at_list1) atom_selection1 -= 1 # Convert from 1-based to 0-based. atom_selection2 = numpy.array(at_list2) atom_selection2 -= 1 # Convert from 1-based to 0-based. # Create an array for the weights. if weights is not None: if len(weights) != len(at_list1): raise ValueError("Length of weights != length of at_list1") try: n_weights = numpy.array(weights, dtype=numpy.float) except TypeError: raise TypeError("calculate_in_place_rmsd: invalid data type for " + "weights list.") # Create a subarray with the indexing vector. st1_coords = st1.getXYZ()[atom_selection1] st2_coords = st2.getXYZ()[atom_selection2] if st1_coords.shape != st2_coords.shape: msg = "calculate_in_place_rmsd: atom list lengths differ: {} != {}" raise ValueError(msg.format(len(st1_coords), len(st2_coords))) diff = st1_coords - st2_coords diff2 = diff * diff if weights is not None: diff2[:, 0] *= n_weights diff2[:, 1] *= n_weights diff2[:, 2] *= n_weights total_wt = numpy.sum(n_weights) else: total_wt = st1_coords.shape[0] rmsd = numpy.sqrt(numpy.sum(diff2.flat) / total_wt) return rmsd
def _interatomic_vector(st1_atom, st2_atom): """ Returns a (numpy) vector pointing from first atom to second :param st1_atom: Atom at end of vector :type st1_atom: Atom object :param st2_atom: Atom at start of vector :type st2_atom: Atom object :return param: vector from atom2 to atom1 :return type: numpy array (3x1) """ x1 = numpy.array(st1_atom.xyz) x2 = numpy.array(st2_atom.xyz) return x1 - x2 def _superimpose_single_atoms(st_fixed, atlist_fixed, st_move, atlist_move, atlist_transform=None): """ Superposes one atom on another one atom :param st_fixed: structure being superposed onto; does not move :type st_fixed: Structure object :param atlist_fixed: list of atom indices in the fixed structure being used for superposition :type atlist_fixed: list of ints (length 1) :param st_move: structure which is superposed by moving :type st_move: Structure object :param atlist_move: list of atom indices in the moving structure being used to calculate superposition :type atlist_move: list of ints (length 1) :param atlist_transform: list of atom indices in the moving structure on which the transformation will be applied. Default value is None, to apply transformation to all atoms in st_move. :type atlist_transform: list of ints or None :return param: root-mean-square deviation (here just 0.0) :return type: float """ if len(atlist_fixed) != 1 or len(atlist_move) != 1: raise ValueError( "_superimpose_single_atoms only works for single atoms") dx = _interatomic_vector(st_fixed.atom[atlist_fixed[0]], st_move.atom[atlist_move[0]]) transform.translate_structure(st_move, dx[0], dx[1], dx[2], atlist_transform) rms = 0.0 return rms def _superimpose_atom_pairs(st_fixed, atlist_fixed, st_move, atlist_move, atlist_transform=None): """ Superposes two atoms on another two atoms :param st_fixed: structure being superposed onto; does not move :type st_fixed: Structure object :param atlist_fixed: list of atom indices in the fixed structure being used for superposition :type atlist_fixed: list of ints (length 2) :param st_move: structure which is superposed by moving :type st_move: Structure object :param atlist_move: list of atom indices in the moving structure being used to calculate superposition :type atlist_move: list of ints (length 2) :param atlist_transform: list of atom indices in the moving structure on which the transformation will be applied. Default value is None, to apply transformation to all atoms in st_move. :type atlist_transform: list of ints or None :return param: root-mean-square deviation :return type: float """ if len(atlist_fixed) != 2 or len(atlist_move) != 2: raise ValueError( "_superimpose_atom_pairs only works for pairs of atoms") # first align bond distances b1 = _interatomic_vector(st_fixed.atom[atlist_fixed[0]], st_fixed.atom[atlist_fixed[1]]) b2 = _interatomic_vector(st_move.atom[atlist_move[0]], st_move.atom[atlist_move[1]]) A = transform.get_alignment_matrix(b2, b1) transform.transform_structure(st_move, A, atlist_transform) # translate to align centroids of the atom pairs pt1 = transform.get_centroid(st_fixed, atlist_fixed) pt2 = transform.get_centroid(st_move, atlist_move) x, y, z = numpy.delete(pt1 - pt2, 3) transform.translate_structure(st_move, x, y, z, atlist_transform) coords_fixed = st_fixed.getXYZ()[[i - 1 for i in atlist_fixed]] coords_move = st_move.getXYZ()[[i - 1 for i in atlist_move]] displacements = coords_fixed - coords_move rms = numpy.sqrt(numpy.mean(displacements * displacements)) return rms def _superimpose_many_atoms(st_fixed, atlist_fixed, st_move, atlist_move, use_symmetry=False, move_which=MOLECULES): """ Superimposes atoms 'atlist_move' in Structure 'st_move' on atoms 'atlist_fixed' in Structure 'st_fixed'. The two structures can be the same. The atom lists must be of equal length and contain at least three atoms. For superimposing structures given only pairs of atoms, see `superimpose_bond`. Return the RMS after superimposition. The function can be called as:: rms = rmsd.superimpose( st, alist1, st, alist2 ) See also the ConformerRmsd class in this module. :param use_symmetry: If True, symmetry will automatically be detected and the lowest RMS obtained by doing all symmetry-related comparisons will be returned. This option can only be used if the input structures are conformers. :type use_symmetry: bool :param move_which: MOLECULES, ATOMS, CT - MOLECULES - All molecules in st2 which also have atoms specified in at_list2 will be transformed to be superimposed on all molecules in st1 which have atoms in at_list1 (default). - ATOMS - Only the atoms given in the lists at_list1 and at_list2 will be transformed. - CT - All of st2 will be transformed. """ rms = 0.0 sym_handle = -1 mm.mmlist_initialize(mm.error_handler) mmlist1 = mmlist._mmlist_from_pylist(atlist_fixed) mmlist2 = mmlist._mmlist_from_pylist(atlist_move) if use_symmetry: mm.mmsym_initialize(mm.error_handler) sym_handle = mm.mmsym_new() try: mm.mmsuper_initialize(mm.error_handler) if len(atlist_fixed) < 3 or len(atlist_move) < 3 or \ len(atlist_fixed) != len(atlist_move): msg = "Lists for superimposition must have at least 3 atoms and be of the same length" raise ValueError(msg) if use_symmetry: map_list = -1 map_list = mm.mmsym_gen_map(sym_handle, st_fixed, mmlist1, st_move, mmlist2) rms = mm.mmsuper_rms(st_fixed, mmlist1, st_move, map_list) if map_list >= 0: mm.mmlist_delete(map_list) else: mm.mmsuper_superimpose(st_fixed, mmlist1, st_move, mmlist2, move_which) rms = mm.mmsuper_rms(st_fixed, mmlist1, st_move, mmlist2) finally: if use_symmetry: mm.mmsym_delete(sym_handle) # Note that there appears to be a bug in mmsym_terminate # which causes problems with subsequent initializations from # the same process. Therefore the terminate call which follows # is commented out: # mm.mmsym_terminate() mm.mmlist_delete(mmlist1) mm.mmlist_delete(mmlist2) mm.mmsuper_terminate() mm.mmlist_terminate() return rms
[docs]def superimpose(st_fixed, atlist_fixed, st_move, atlist_move, use_symmetry=False, move_which=MOLECULES): """ wrapper for _superimpose_single_atoms, _superimpose_atom_pairs, and _superimpose_many_atoms. This handles cases with 1 or 2 atoms which fail in mm_superimpose (in addition to more atoms). This function superposes the two molecules and returns the rms. move_which can be set to structutils.rmsd.X, where X=CT, ATOMS, MOLECULES :param st_fixed: Structure being superposed on, will not be moved :type st_fixed: Structure :param atlist_fixed: list of atom indices of st_fixed to be considered for superposition :type atlist_fixed: list of ints :param st2: Structure being superposed, some fraction will be moved :type st2: Structure :param atlist2: list of atom indices of st2 to be considered for superposition :type atlist2: list of ints :param use_symmetry: If True, symmetry will automatically be detected and the lowest RMS obtained by doing all symmetry-related comparisons will be returned. This option can only be used if the input structures are conformers. Only used for the many-atom case. :type use_symmetry: bool :param move_which: moveable unit for superposing st2 onto st_fixed :type move_which: MOLECULES, ATOMS, CT :return param: rms of st_fixed and st2 (over the atoms in the atlist) after superposition :return type: float """ if len(atlist_fixed) != len(atlist_move): raise ValueError("atom lists for superposition must be the same size") num_ats = len(atlist_fixed) # if lists are empty, nothing to be done if num_ats == 0: rms = 0.0 elif num_ats > 2: # Underlaying mm.mmsuper_superimpose will handle move_which appropriately rms = _superimpose_many_atoms(st_fixed, atlist_fixed, st_move, atlist_move, use_symmetry=use_symmetry, move_which=move_which) else: if num_ats == 1: superimpose_func = _superimpose_single_atoms else: superimpose_func = _superimpose_atom_pairs if move_which == ATOMS: transform_atoms = atlist_move elif move_which == MOLECULES: # most probably all atoms are already in the same molecule, so try to # minimize calls to update & getMoleculeAtoms move_atoms = set(st_move.getMoleculeAtoms(atlist_move[0])) for iat in atlist_move: if st_move.atom[iat] not in move_atoms: move_atoms.update(st_move.getMoleculeAtoms(iat)) transform_atoms = [atom.index for atom in move_atoms] elif move_which == CT: transform_atoms = None else: raise ValueError( f"{move_which} is not a valid option for 'move_which'") rms = superimpose_func(st_fixed, atlist_fixed, st_move, atlist_move, transform_atoms) return rms
[docs]def superimpose_substructure_molecules(st1, atlist1, st2, atlist2): """ Superpose st1 and st2 by moving each molecule independently. :param st1: first structure :type st1: Structure :param atlist1: list of atom indexes for structure 1 :type atlist1: list of ints :param st2: second structure :type st2: Structure :param atlist2: list of atom indexes for structure 2 :type atlist2: list of ints :return: rmsd, st1 and st2 rotated and translated to align """ rms = 0.0 graph = substructure.ConnectedMoleculeChain(st1, atlist1, st2, atlist2) chain = graph.getConnectivityChain() st_list = [st1, st2] for st, fixed_list, move_list in chain: rms += superimpose(st_list[st - 1], fixed_list, st_list[st], move_list) return rms
[docs]def superimpose_bond(st1, at_pair1, st2, at_pair2): """ Translate and rotate st2 in place, putting the first atom of at_pair2 on top of the first atom of at_pair1, and the second atom of at_pair2 as close as possible to the corresponding atom of at_pair1. This is commonly used for aligning bonds, but it is not a requirement that the two atoms in each pair be bonded. st1 and st2 must be different structure objects. :param st1: reference structure :type st1: `structure.Structure` :param at_pair1: pair of atom indexes from st1 :type at_pair1: sequence of int :param st2: structure to be rotated (modified in place) :type st2: `structure.Structure` :param at_pair2: pair of atom indexes from st2 :type at_pair2: sequence of int """ if len(at_pair1) != 2 or len(at_pair2) != 2: raise ValueError("Atom pairs must have two atoms") if st1 == st2: raise ValueError("st1 must be different from st2") coord_st1_1 = numpy.array(st1.atom[at_pair1[0]].xyz) coord_st1_2 = numpy.array(st1.atom[at_pair1[1]].xyz) coord_st2_1 = numpy.array(st2.atom[at_pair2[0]].xyz) coord_st2_2 = numpy.array(st2.atom[at_pair2[1]].xyz) bond_vec_1 = coord_st1_2 - coord_st1_1 bond_vec_2 = coord_st2_2 - coord_st2_1 rot_matrix = transform.get_alignment_matrix(bond_vec_2, bond_vec_1) transform.translate_structure(st2, *-coord_st2_1) transform.transform_structure(st2, rot_matrix) transform.translate_structure(st2, *coord_st1_1)
[docs]def get_super_transformation_matrix(st1, at_list1, st2, at_list2): """ :return: numpy matrix for the tranformation that will best superimpose atoms of st2 onto atoms of st1. :rtype: numpy array :param st1: Reference (non-moving) structure. :type st1: structure.Structure :param at_list1: Atom indexes from st1 to consider. Must be the same size as at_list2 and contain at least three atom indexes. :type at_list1: list :param st2: Test (moving) structure to be transformed onto st1. :type st2: structure.Structure :param at_list2: Atom indexes from st2 to consider. Must be the same size as at_list1 and contain at least three atom indexes. :type at_list2: list """ mm.mmlist_initialize(mm.error_handler) mmlist1 = mmlist._mmlist_from_pylist(at_list1) mmlist2 = mmlist._mmlist_from_pylist(at_list2) try: mm.mmsuper_initialize(mm.error_handler) if len(at_list1) < 3 or len(at_list2) < 3 or \ len(at_list1) != len(at_list2): raise ValueError( "Lists for superimposition must have at least 3 atoms and be of the same length" ) else: xform = mm.mmsuper_get_super_xform(st1, mmlist1, st2, mmlist2) finally: mm.mmlist_delete(mmlist1) mm.mmlist_delete(mmlist2) mm.mmlist_terminate() return xform
[docs]def get_optimal_atom_mapping(reference_structure, reference_atom_list, test_structure, test_atom_list, in_place=True): """ FOR USE BY ConformerRmsd class only! Returns a list of test_structure atom indexes for the optimal, symmetry-aware, pair-wise alignment with the reference_structure. Parameters reference_structure (structure.Structure) reference_atom_list (list of int) A list of atom indices. test_structure (structure.Structure) test_atom_list (list of int) A list of atom indices. :param in_place: If False, will superimpose the test_structure on top of the reference_structure in addition to calculating the mapping. :type in_place: bool This method is semi-private because the ConformerRmsd class depends on it. """ # Initialize the libraries try: mm.mmsym_initialize(mm.error_handler) mm.mmlist_initialize(mm.error_handler) sym_handle = mm.mmsym_new() except Exception: msg = "A problem occured initializing the mmsym and mmlist libraries." raise Exception(msg) # Generate mmlists from python data structures mmlist_reference = mmlist._mmlist_from_pylist(reference_atom_list) mmlist_test = mmlist._mmlist_from_pylist(test_atom_list) map_list = -1 # Set the specific no-transform options if in_place: mm.mmsym_option(sym_handle, mm.MMSYM_OPTION_FROZEN_ATOMS, int(True)) mm.mmsym_option(sym_handle, mm.MMSYM_OPTION_MAP_METHOD, mm.MMSYM_EQUIVALENCE_CLASS) # Get the map try: map_list = mm.mmsym_gen_map(sym_handle, reference_structure, mmlist_reference, test_structure, mmlist_test) except mm.MmException: msg = "Lists for symmetry mapping must have at least 3 atoms and be of the same length." raise ValueError(msg) # Clean up and return ret_list = [] ret_list = mmlist._mmlist_to_pylist(map_list) mm.mmlist_delete(mmlist_reference) mm.mmlist_delete(mmlist_test) mm.mmlist_delete(map_list) mm.mmsym_delete(sym_handle) mm.mmlist_terminate() return ret_list
# For backwards-compatability: _get_mmsym_map = get_optimal_atom_mapping
[docs]def renumber_conformer(ref_st, test_st, use_symmetry=False, in_place=True, has_hydrogens=True): """ Renumbers atoms in <test_st> so that they match <ref_st>. Structures must have hydrogens, see CANVAS-5422. :param ref_st: Structure to use as a reference for renumbering. :type ref_st: `structure.Structure` :param test_st: Structure to renumber (renumbered copy will be returned) :type test_st: `structure.Structure` :param use_symmetry: Whether to consider symmetry. Always set to True unless mmsym is going to be used on the output structure. :type use_symmetry: bool :param in_place: If True (default), the conformer is only renumbered; if False, it will also be superimposed on top of the ref_st based on the new atom numbering. :type in_place: bool :return Renumbered version of test structure. :rtype `structure.Structure` """ global smiles if smiles is None: import schrodinger.structutils.smiles as smiles ref_atoms = list(range(1, ref_st.atom_total + 1)) test_atoms = list(range(1, test_st.atom_total + 1)) smiles_gen = smiles.SmilesGenerator(unique=True) if has_hydrogens: ref_order = smiles_gen.getUniqueOrder(ref_st) test_order = smiles_gen.getUniqueOrder(test_st) else: _, ref_order = smiles_gen.getSmilesAndMap(ref_st) _, test_order = smiles_gen.getSmilesAndMap(test_st) # Convert unique ordering to a renumber map for test structure: renumber_map = [test_order[ref_order.index(anum)] for anum in ref_atoms] test_st = build.reorder_atoms(test_st, renumber_map) if use_symmetry: # FIXME: This will only work if the atom sets include all atoms # in the structures, as reorder_atoms() expects the input list to # contain all atoms in the CT. The fix is to pass in a list of all # atoms instead of ref_atoms and test_atoms. symmetry_map = get_optimal_atom_mapping(ref_st, ref_atoms, test_st, test_atoms, in_place=in_place) test_st = build.reorder_atoms(test_st, symmetry_map) return test_st
[docs]class ConformerRmsd: """ A class to calculate the root mean square deviatation between the atomic coordinates of two conformer structure.Structure objects. The inputs are expected to be conformers in the traditional sense. Working copies of the input structures are modified instead the original. The superimpose transformation is applied to the entire test_structure. The transformation matrix is saved as superposition_matrix property if in_place is set to False. Renumbering is achieved by creating a list of SMARTS patterns, one for each molecule in the reference structure, evaluating the SMARTS pattern with both the reference and test structures to get a standard order of atom indexes, then passing that atom order to mm.mmct_ct_reorder. Renumbering can be slow with protein-sized molecules so you may want to disable that feature when working with large molecules. API Example:: # Calculate in place, heavy atom RMSD. st1 = structure.Structure.read('file1.mae') st2 = structure.Structure.read('file2.mae') conf_rmsd = ConformerRmsd(st1, st2) # in place, heavy atom RMSD calc. if conf_rmsd.calculate() < 2.00: print "Good pose" # Loop over structures in test.mae, comparing to ref.mae st1 = structure.Structure.read('ref.mae') conf_rmsd = ConformerRmsd(st1, st1) # in place, heavy atom RMSD calc. for st in structure.StructureReader('test.mae'): conf_rmsd.test_structure = st print conf_rmsd.calculate() Instance Attributes :ivar use_symmetry: Boolean to control whether the test structure atom list should be determined by with the mmsym library. Mmsym accounts for molecular symmetry and is recommended. This boolean just allows for more detailed testing. NOTE: Make sure use_symmetry is True if using renumber_structures. :ivar renumber_structures: Boolean to control whether the reference and test structures should be renumbered by a SMARTS pattern before calculating the rmsd. For better performance, set to False when the inputs are sure to have the same atomic numbering schemes. NOTE: Make sure use_symmetry is True if using renumber_structures. :ivar use_heavy_atom_graph: Boolean to control whether the reference and test structures should be treated as heavy-atom only, graph topologies. Default is False. Tautomers, and different ionization states are not true conformers, but often require RMSD analysis. If True, the test_structure and reference_structure are treated by deleting all hydrogens, setting all bond orders to 1, setting all formal charges to 0, then adjusting the atom types. :ivar orig_index_prop: m2io dataname for atomic property that stores the original atom index of the input structures. Default is 'i_confrmsd_original_index'. This is needed to we can extract/reduce/renumber and present information about the original index which the end-user perceives. :note: The following attributes are available after calculate() :ivar rmsd: Root mean square deviation of atomic coordintates :vartype rmsd: float :ivar max_distance: Greatest displacement between the atom pairs :vartype max_distance: float :ivar max_distance_atom_1: Reference atom index (in original atom scheme) :vartype max_distance_atom_1: integer :ivar max_distance_atom_2: Test atom index (in the original atom scheme) :vartype max_distance_atom_2: integer :ivar rmsd_str: String of basic rmsd info == str(self) :ivar precision: Precision of rmsd stored to find minimum if search_permutations=True defaults to 6 (meaning a precision of 10^-6) :ivar max_permutations: Maximum number of permutations to search defaults to 10000000 :ivar superposition_matrix: Matrix that was used for superimposing the test structure on top of the reference structure, if in_place=True. :raise: exceptions if a preparation step can't be completed, or if the input structures can't be handled as conformers. """ orig_index_prop = 'i_confrmsd_original_index'
[docs] def __init__( self, reference_structure, test_structure=None, asl_expr="NOT atom.element H", in_place=True, ): """ :param reference_structure: Template structure :type reference_structure: structure.Structure :param test_structure: The mobile structure. :type test_structure: structure.Structure :param asl_expr: Atom Language Expression to identify the the atoms used to calculate the RMSD and base the superimpose alignment. :type asl_expr: string :param in_place: If True, calculate the RMSD without moving the test_structure. Otherwise, perform the optimal alignment then calculate the RMSD. :type in_place: Boolean """ global smiles if smiles is None: import schrodinger.structutils.smiles as smiles self._remove_stereo_annotation = smiles.remove_stereo_annotation self.use_symmetry = True self.renumber_structures = True self.use_heavy_atom_graph = False self.extract = True self.no_transform = False self.max_distance_atom_1 = None self.max_distance_atom_2 = None self.max_distance = None self.superposition_matrix = numpy.identity((4), 'd') self.reference_structure = reference_structure self.test_structure = reference_structure if test_structure: self.test_structure = test_structure self.atom_index_map = [] self.asl_expr = asl_expr self.in_place = in_place self._working_ref_st = None self._working_test_st = None logger.debug("ConformerRmsd: __init__ reference_structure %s" % self.reference_structure) logger.debug("ConformerRmsd: __init__ test_structure %s" % self.test_structure) logger.debug("ConformerRmsd: __init__ asl_expr %s" % self.asl_expr) logger.debug("ConformerRmsd: __init__ in_place %s" % self.in_place)
def _encodeOriginalAtomNumbers(self, st): """ :param st: Structure to store original atom indexes as atom-level properties. :type st: structure.Structure """ logger.debug("ConformerRmsd: _encodeOriginalAtomNumbers %s" % self.orig_index_prop) for atom in st.atom: atom.property[self.orig_index_prop] = atom.index return
[docs] def getRmsdDataname(self): """ :return: m2io property dataname string. The property name indicates the reference structure, the title, the ASL used to identify comparison atoms and if the structure is in-place or mobile. :rtype: string """ # Define the type of RMSD mode = "superimposed" if self.in_place: mode = "in-place" label = 'RMSD' if not self.renumber_structures or not self.use_symmetry: label = 'RMSD*' dataname = "r_user_%s_%s_%s_%s" % ( label, mode, self.asl_expr.replace('"', '').replace("'", ''), # PYAPP-5721 self.reference_structure.property['s_m_title']) dataname = dataname.replace(' ', '_') return dataname
[docs] def writeStructures(self, file_name="rmsd.mae", mode='w'): """ Writes the reference and test structures to file. :param file_name: Path of the structure file to write. :type file_name: string :param mode: 'w' => write, clobber as needed 'a' => append :type mode: string """ if mode == 'w': self.reference_structure.write(file_name) self.test_structure.append(file_name) if mode == 'a': self.reference_structure.append(file_name) self.test_structure.append(file_name) return file_name
[docs] def writeCommand(self, file_name="rmsd.cmd"): """ Writes a Maestro command file and structures with the pair wise atom mapping in command file mode. The Maestro file has the same basename as the command file. Clobbers existing files. :param file_name: Path to the maestro command file with the atom pairings. :type file_name: string :raise: ValueError if file_name does not have '.cmd' extension. """ # Demonstrate that the file_name ends with .cmd if not file_name.endswith('.cmd'): msg = "ConformerRmsd.writeCommand: file_name must end with '.cmd'." raise ValueError(msg) # Create a Maestro file name with the same base mae_file_name = '.'.join([file_name[:-4], 'mae']) # Dump out the strucutres mae_file_name = self.writeStructures(mae_file_name, 'w') # Write the maestro command file file_handle = open(file_name, mode='w') file_handle.write('delete atom all\n') file_handle.write('entryimport %s\n' % mae_file_name) file_handle.write('entrywsincludeonly\n') for item in self.atom_index_map: file_handle.write( 'superimposeatom %d %d \n' % (item[0], item[1] + len(self.reference_structure.atom))) if self.in_place: file_handle.write('superimpose inplace=true\n') file_handle.write('superimpose\n') file_handle.flush() file_handle.close() return file_name
def __str__(self): """ :return: a user friendly string representation of the instance. e.g.: In place RMSD = 1.01; atoms = "a.e C" """ # Define the type of RMSD mode = "Superimposed" if self.in_place: mode = "In place" # Qualify the '=' operator if symmetry or renumbering are not used if self.renumber_structures and self.use_symmetry: operator = '=' else: operator = '<=' user_str = "%s RMSD %s %.2f; atoms = \"%s\"" % ( mode, operator, self.rmsd, self.asl_expr) return user_str def _extractSuperimposeAslStructure(self, st): """ :param st: Structure from which to extract the ASL matching atoms. :type st: structure.Structure :raise: ValueError if ASL does not match any atoms. """ logger.debug("ConformerRmsd: _extractSuperimposeAslStructure(%s)" % str(st)) # NOTE: Will raise RuntimeError if ASL matching failed matching_atoms = self.evaluateAsl(st) return st.extract(matching_atoms)
[docs] def evaluateAsl(self, st): """ Return the atoms of the input structure that match <asl_expr>. :param st: Structure to evaluate :type st: `structure.Structure` :return: Atoms matching the ASL :rtype: list of ints :raise RuntimeError if ASL matching failed. """ try: matched_atoms = analyze.evaluate_asl(st, self.asl_expr) except Exception as err: if "is not a valid ASL expression" in str(err): raise RuntimeError("Invalid ASL: %s" % self.asl_expr) else: msg = "Unexpected error evaluating ASL: %s" % self.asl_expr raise RuntimeError(msg + str(err)) if not matched_atoms: raise RuntimeError("ASL did not match any atoms: %s" % self.asl_expr) # TODO Maybe also check that at least 3 atoms matched? return matched_atoms
[docs] def calculate(self): """ :return: Root-mean-squared difference of atom coordinates. :type: float :raise: ValueError if working versions of the reference and test structures don't have the same shape (non-confs). The order of operations: * prepare working copies of the reference and test structures. ** copy the structures. ** encode the original atom indexes as atom properties. ** extract substructure of the atoms matching the ASL. ** reduce to heavy atom graph (instance option, non-default). ** normalize numbering scheme (instance option, default). * determine molecular symmetry mapping (optional, default). * create a numpy coordinate array for the working structures. * numpy linear algebra SVD to superimpose (instance option). ** transform test_structure * numpy array used to calculate RMSD, and max_dist. * decode original indexes to identify atoms involve in max dist. """ if not self.use_symmetry and self.renumber_structures: msg = ('WARNING: Disabling use_symmetry with renumber_structures ' 'set to True will result in wrong RMSDs!') warnings.warn(msg, DisabledSymmetryWarning, stacklevel=2) # Create working copies so the original reference and test # structures are not altered. consider_atoms = self._prepareStructures() # Update atom selections to input CT atom numbering schemes: orig_ref_atoms = [ self._working_ref_st.atom[iat].property[self.orig_index_prop] for iat in consider_atoms ] orig_test_atoms = [ self._working_test_st.atom[index].property[self.orig_index_prop] for index in consider_atoms ] self.atom_index_map = list(zip(orig_ref_atoms, orig_test_atoms)) # Calculate and apply rotation matrix if not doing an 'in_place' # calculation. This will update coordinate arrays and transforms # self.test_structure. if not self.in_place: # Superimpose the working test structure on top of the working # reference structure, then apply the same transformation matrix # on the input test structure (self.test_structure). self._superimpose(consider_atoms, consider_atoms) # NOTE: The rest of the method calculates the RMSD between the # reference structure and the (optionally superposed) test structure. # Convert atom selection from 1-based list to 0-based numpy array, # and generate coordinate matrixes for the structures. ref_coords = self._working_ref_st.getXYZ()[numpy.array(consider_atoms) - 1] test_coords = self._working_test_st.getXYZ()[numpy.array(consider_atoms) - 1] if test_coords.shape != ref_coords.shape: raise ValueError("Coordinate matrix shape does not match") # Calculate the rmsd, max distance from the coord arrray. coord_diff = test_coords - ref_coords coord_diff_sqrd = coord_diff * coord_diff self.distances = numpy.sqrt(numpy.sum(coord_diff_sqrd, 1)) self.rmsd = numpy.sqrt( numpy.sum(self.distances * self.distances) / self.distances.shape[0]) self.max_distance = numpy.amax(self.distances) # Determine the max dist atom's using the original input atom # index number scheme. max_dist_index = list(self.distances).index(self.max_distance) ref_max_dist_atom = self._working_ref_st.atom[ consider_atoms[max_dist_index]] test_max_dist_atom = self._working_test_st.atom[ consider_atoms[max_dist_index]] self.max_distance_atom_1 = ref_max_dist_atom.property.get( self.orig_index_prop) self.max_distance_atom_2 = test_max_dist_atom.property.get( self.orig_index_prop) # Create a nice string representation of the data. self.rmsd_str = str(self) logger.debug("ConformerRmsd: RMSD %f" % self.rmsd) logger.debug("ConformerRmsd: RMSD str %s" % self.rmsd_str) logger.debug("ConformerRmsd: dists %s" % str(self.distances)) logger.debug("ConformerRmsd: max dist %s" % str(self.max_distance)) logger.debug("ConformerRmsd: superposition matrix\n %s" % str(self.superposition_matrix)) return self.rmsd
def _prepareStructures(self): """ :return: a working copy of the reference and test structures, renumbered and/or reduced as needed. The original atom index is saved as atom level properties in the working structure. A substructure of just the atoms specified for superposition via ASL. :param reference: If true, seed the renumbering pattern dictionary. :type reference: boolean :raise: RuntimeError if the ASL extraction fails. :return: A list of atoms that should be analyzed for RMSD calculation; includes only atoms that matched the <asl_expr> option. :rtype: list of ints """ logger.debug( "ConformerRmsd: _prepareStructures creating working copies of the structures." ) # Create working copies so the original is not tampered with. self._working_ref_st = self.reference_structure.copy() self._working_test_st = self.test_structure.copy() # Track the original atom indexes so they can be reported back # to the end-user. self._encodeOriginalAtomNumbers(self._working_ref_st) self._encodeOriginalAtomNumbers(self._working_test_st) # If the given structures are not conformers, apply the ASL # and reduce the structure with the given ASL to work with conformers. compare_status = mm.mmct_ct_compare_connect( self._working_ref_st.handle, self._working_test_st.handle) has_hydrogens = True # Assume hydrogens == True, unless we explicitly remove # them below if compare_status != mm.MMCT_SAME: self._working_ref_st = self._extractSuperimposeAslStructure( self._working_ref_st) self._working_test_st = self._extractSuperimposeAslStructure( self._working_test_st) if "NOT atom.element H" in self.asl_expr: has_hydrogens = False if self._working_ref_st.atom_total != self._working_test_st.atom_total: raise RuntimeError("Number of considered atoms differs between " "reference (%i) and test (%i) structures" % (self._working_ref_st.atom_total, self._working_test_st.atom_total)) else: # The 2 structures are conformers; don't remove non-matching ASL # atoms, they just won't be included in the calculation. Save # original atom numbers, as more post-processing is to follow. # NOTE: Will raise RuntimeError if ASL matching failed matches = self.evaluateAsl(self._working_ref_st) orig_consider_atoms = [ self._working_ref_st.atom[anum].property[self.orig_index_prop] for anum in matches ] # ev70650 mmstereo warning when comparing structures with # ConformerRmsd - remove any labels that may be invalidated by # the preparation (extraction, renumbering, heavy atom reduction). self._remove_stereo_annotation(self._working_ref_st) self._remove_stereo_annotation(self._working_test_st) # Strip structures down to heavy atom skeleton if doing # tautomers/ionization state agnostic calculation. if self.use_heavy_atom_graph: self._working_ref_st = self._getReducedStructure( self._working_ref_st) self._working_test_st = self._getReducedStructure( self._working_test_st) # Assign patterns from the reference structure. # And give the structures the same atom numbering scheme, except for # symmetrical atom groups - which will be renumbered by mmsym. if self.renumber_structures: self.renumberWorkingStructures(has_hydrogens) # Calculate molecular symmetry aware atom mapping of the test # structure on the reference. if self.use_symmetry: # Even if conformers are already numbered the same, the symmetrical # parts of the conformers may be in different conformation, so we # need to renumber them: self.renumberBySymmetry() if compare_status != mm.MMCT_SAME: # If the structures are not conformers, then we have already # removed atoms that don't match the ASL; so analyze all # remaining atoms: consider_atoms = list(range(1, self._working_ref_st.atom_total + 1)) else: # If the structures are conformers, then they will have the same # number of atoms, and were already renumbered to have consistent # atom numbering. consider_atoms = [ atom.index for atom in self._working_ref_st.atom if atom.property[self.orig_index_prop] in orig_consider_atoms ] return consider_atoms
[docs] def renumberBySymmetry(self): """ Renumber the atoms in _working_test_st based off the reference structure ussing mmsym. """ # Copy of test CT is passed in so that superposition when in_place # is False is not actually done to _working_test_st, and is used # for calculating the renumber map only. all_atoms = list(range(1, self._working_test_st.atom_total + 1)) symmetry_map = get_optimal_atom_mapping(self._working_ref_st, all_atoms, self._working_test_st.copy(), all_atoms, in_place=self.in_place) self._working_test_st = build.reorder_atoms(self._working_test_st, symmetry_map)
[docs] def renumberWorkingStructures(self, has_hydrogens): """ Renumber the working structures to give them identical numbering. By default, the test structure is renumbered to match the reference using renumber_conformer() functon; but this behavior can be changed by over-riding this method in a subclass. """ # NOTE: This method is overridden in # schrodinger.application.jaguar.rmsd.ConformerRmsd self._working_test_st = renumber_conformer(self._working_ref_st, self._working_test_st, use_symmetry=False, has_hydrogens=has_hydrogens)
def _getReducedStructure(self, st): """ :return: a Structure that is a simplified copy of the input: a neutral, single bond connected, heavy atom only frame. The returned structure is handy for making RMSD comparisons between pseudo conformers that differ by ionization state or tautomerism. :param st: Structure to reduce. :type st: structure.Structure """ logger.debug("ConformerRmsd: _getReducedStructure(%s)" % str(st)) # Don't tamper with the original structure. st_reduced = st.copy() # Remove the hydrogens to get heavy atom framework. build.delete_hydrogens(st_reduced) # Set bond orders and formal charges. for iatom in st_reduced.atom: iatom.formal_charge = 0 for ibond in iatom.bond: ibond.order = 1 # Set united-atom atom types. st_reduced.retype() return st_reduced def _superimpose(self, ref_st_selection, test_st_selection): """ Calculate optimal rotation-translation transform of test_st_selection of atoms in working_test_st onto ref_st_selection of atoms in working_ref_st, and apply to test_structure. :type ref_st_selection: list :param ref_st_selection: A list of 1-based atom indices to consider from reference structure. :type test_st_selection: list :param test_st_selection: A list of 1-based atom indices to consider from test structure. :returntype: tuple :return: (coordinate arrays in reference structure, coordinate arrays in test structure) """ logger.debug("ConformerRmsd: _superimpose") self.superposition_matrix = get_super_transformation_matrix( self._working_ref_st, ref_st_selection, self._working_test_st, test_st_selection) transform.transform_structure(self._working_test_st, self.superposition_matrix, test_st_selection) transform.transform_structure(self.test_structure, self.superposition_matrix) logger.debug("ConformerRmsd: _superimpose done.")
# Both names are valid.
[docs]class ConformerRmsdX(ConformerRmsd):
[docs] def __init__(self, *args, **kwargs): msg = "ConformerRmsdX class is depreacted. Use ConformerRmsd instead." warnings.warn(msg, DeprecationWarning, stacklevel=2) super(ConformerRmsdX, self).__init__(*args, **kwargs)
# EOF