Source code for schrodinger.application.desmond.struc

"""
A collection of miscellaneous molecular-structure-related functions.

Copyright Schrodinger, LLC. All rights reserved.
"""

import collections
from collections import defaultdict
from itertools import chain
from itertools import zip_longest
from typing import TYPE_CHECKING
from typing import Dict
from typing import Iterable
from typing import Iterator
from typing import List
from typing import Optional
from typing import Tuple

import numpy as np
from rdkit import Chem
from rdkit.Chem.MolStandardize import rdMolStandardize

from schrodinger import structure
from schrodinger.application.desmond import constants
from schrodinger.application.desmond import gcmc_utils
from schrodinger.application.desmond import util
from schrodinger.application.desmond.constants import CT_TYPE
from schrodinger.application.desmond.ffiostructure import FFIOStructure
from schrodinger.infra import mm
from schrodinger.protein import captermini
from schrodinger.structure import Structure
from schrodinger.structure import StructureReader
from schrodinger.structure import _Residue
from schrodinger.structure import _StructureAtom
from schrodinger.structure import create_new_structure
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.thirdparty import rdkit_adapter

if TYPE_CHECKING:
    from schrodinger.application.desmond.cms import Cms

# - These are the names normally used for protein backbone atoms in PDB files.
# - Note that the name of the hydrogen atom bonded to the backbone N atom has
#   two options: 'H' or 'HN'.
BACKBONE_ATOM_NAME = [
    "H",
    "HN",
    "HA",
    "N",
    "CA",
    "C",
    "O",
]


[docs]def get_sidechain_att_atom(residue: _Residue) \ -> Optional[Tuple[_StructureAtom, _StructureAtom]]: """ Given an amino acid residue `residue`, this function returns two atoms: The first atom is the alpha carbon, and the 2nd is the beta atom belonging to the sidechain and is bonded to the first atom. If no attachment atoms cannot be identified, `None` is returned. """ # Finds out the attachment atoms: first the "CA" atom, then the "CB" atom # (or the hydrogen atom for GLY). for a in residue.atom: if a.pdbname.strip().upper() == "CA": c_atom = a break else: return None # DESMOND-2774 - 2HA is the name for Gly's beta hydrogen in PDB v2, and HA3 # in PDB v3. beta_atom_names = ["2HA", "HA3"] \ if residue.pdbres.strip().upper() == "GLY" else ["CB"] for a in residue.atom: if a.pdbname.strip().upper() in beta_atom_names: s_atom = a break else: return None return (c_atom, s_atom)
[docs]def get_residue_sitename(residue: _Residue): """ Given a `_Residue` object ('residue'), this function returns a string that represents the name of the residue site. The string is in the format C:RESXX[I] where C is the chain name, RES is the residue type name, XX is the residue number in the chain, and I is the insertion code. For example, a string could be "A:Tyr45[B]". If there is no insertion code, the string will be reduced to C:RESXX, e.g. "A:Tyr45". """ resname = residue.pdbres.strip().capitalize() resnum = residue.resnum chain = residue.chain inscode = residue.inscode if (chain != ' ' and inscode != ' '): s = "%s:%s%d[%s]" % ( chain, resname, resnum, inscode, ) elif (chain != ' '): s = "%s:%s%d" % ( chain, resname, resnum, ) elif (inscode != ' '): s = "%s%d[%s]" % ( resname, resnum, inscode, ) else: s = "%s%d" % ( resname, resnum, ) return s
[docs]def all_atoms(structures): """ Returns an iterator over all atoms in all given structures. The order of iteration is the following: The atoms of the first structure, the atoms of the second, and so on. If a `Cms' object is given, the order of iteration is the atoms of the full-system CT, those of an internal copy of the full-systemm CT within the `Cms` object [when writing the `Cms` object into a file (or string), it's this copy that is outputted], the atoms of the first component CT, of the 2nd component CT, and so on. :type stuctures: Iterable[Structure] or Cms """ # Imports `Cms` here to avoid the circular dependency between the `struc` # and `cms` modules. from schrodinger.application.desmond.cms import Cms if isinstance(structures, Cms): c = structures structures = [c, c._raw_fsys_ct] + c.comp_ct return chain(*[st.atom for st in structures])
[docs]def selected_atoms(st: Structure, atom_indices: Iterable[int] = None, asl: str = None) -> Iterator[_StructureAtom]: """ Returns an iterator over the atoms selected either by `atom_indices` or `asl`. The order of the iteration is the same as that of `atom_indices` or the result of `analyze.evaluate_asl`. It's an error to provide both `atom_indices` and `asl` arguments. """ assert bool(atom_indices) != bool(asl) if asl: atom_indices = analyze.evaluate_asl(st, asl) for i in atom_indices: yield st.atom[i]
[docs]def set_atom_properties(atoms, propnames, values=None): """ Set atom properties for the given atoms. :type atoms: Iterable[_StructureAtom] :param atoms: Atoms whose properties will be set. :type propnames: List[str] :param propnames: A list of atom property names :param values: A list of atom property values. Each value is for the corresponding property in `propnames`. If not given, the default value will be `0`, `False`, and `""` for the numeric types, the boolean type, and the string type, respectively. """ if values is None: values = [] for name in propnames: if name.startswith("i_") or name.startswith("r_"): values.append(0) elif name.startswith("b_"): values.append(False) elif name.startswith("s_"): values.append("") kvpairs = list(zip(propnames, values)) for a in atoms: for k, v in kvpairs: a.property[k] = v
[docs]def delete_atom_properties(st, propnames): """ Deletes atom properties for all atoms of the given structure(s). :type st: Union[Structure, List[Structure]] :type propnames: Union[str, Iterable[str]] """ if isinstance(st, Structure): st = [st] if isinstance(propnames, str): propnames = [propnames] for e in st: for pn in propnames: e.deletePropertyFromAllAtoms(pn)
[docs]def set_structure_properties(structures, keyvalues): """ Set the structure properties with the key-value pairs. :param structures: a list of structures (or a single structure) for which to set the specified properties :type structures: Union[Structure, Iterable[Structure]] :param keyvalues: The value must be either an iterable of `(key, value)` tuples, or a `dict` object whose key values will be used to update the structure properties. Note that the keys must be `str` and follow the MAE property name format. :type keyvalues: Optional[Union[dict, Iterable[Tuple[str, value]]]] """ if isinstance(structures, Structure): structures = [structures] if not isinstance(keyvalues, dict): keyvalues = list(keyvalues) for st in structures: st.property.update(keyvalues)
[docs]def delete_ffio_ff(ffio_ct: FFIOStructure) -> Structure: """ Remove ffio_ff block by first converting FFIOStructure to Structure and then deleting the block. """ ct = structure.Structure(ffio_ct.handle) handle = mm.mmct_ct_m2io_get_unrequested_handle(ct.handle) mm.m2io_delete_named_block(handle, constants.FFIO_FF) return ct
[docs]def delete_structure_properties(structures, propnames): """ For a supplied list of structures and structure property keys, delete the property values associated with those keys from each structure if possible. :param structures: a list of structure (or a single structure) from which to delete the specified properties :type structures: Union[Structrue, Iterable[Structure]] :param propnames: a list of structure property keys (or a single structure property keys) that should be deleted from the supplied structures :type propnames: Union[str, Iterable[str]] """ if isinstance(structures, Structure): structures = [structures] if isinstance(propnames, str): propnames = [propnames] for st in structures: for prop_key in propnames: if prop_key in st.property: del st.property[prop_key]
# DEPRECATED!!! Use `read_all_structures` below instead.
[docs]def read_all_ct(fname): return read_all_structures(fname)
[docs]def read_all_structures(fname: str) -> List[Structure]: """ Read a `*.mae` file and return all CTs in a list. """ return list(StructureReader(fname))
[docs]def set_atom_reference_coordinates( atom: _StructureAtom, coords: List[float], prop_names=constants.REFERENCE_COORD_PROPERTIES): """ :param atom: An atom whose properties will be updated to save the given reference coordinates :param coords: [x, y, z] coordinates for atom :param prop_names: Property names for the reference coordinates. Default REFERENCE_COORD_PROPERTIES. """ for i, p in enumerate(prop_names): atom.property[p] = coords[i]
[docs]def get_atom_reference_coordinates( atom: _StructureAtom, prop_names=constants.REFERENCE_COORD_PROPERTIES) -> List[float]: """ Return the given atom's reference coordinates that have been previously saved into the atom properties. If no reference coordinates was saved before, return the atom's current coordinates. :param prop_names: Property names for the reference coordinates. Default REFERENCE_COORD_PROPERTIES. """ ret_val = [atom.x, atom.y, atom.z] if set(prop_names).issubset(set(atom.property)): for i, p in enumerate(prop_names): ret_val[i] = atom.property[p] return ret_val
[docs]def set_ct_reference_coordinates(ct: Structure, prop_names=constants.REFERENCE_COORD_PROPERTIES ): """ Set reference coordinates for all atoms in `ct` using current coordinates. :param prop_names: Property names for the reference coordinates. Default REFERENCE_COORD_PROPERTIES. """ for atom in ct.atom: set_atom_reference_coordinates(atom, atom.xyz, prop_names)
[docs]def get_reference_ct( ct: Structure, prop_names=constants.REFERENCE_COORD_PROPERTIES) -> Structure: """ Returns a copy of the input structure. If the input structure contains previously saved reference coordinates, these coordinates are recovered in the return structure. :param prop_names: Property names for the reference coordinates. Default REFERENCE_COORD_PROPERTIES. """ ref_ct = ct.copy() for a in ref_ct.atom: a.xyz = get_atom_reference_coordinates(a, prop_names) return ref_ct
[docs]def fixup_duplicate_properties(cts: List[Structure]) -> List[Structure]: """ Return a copy of the original structures. In the returned structures, properties that only differ by the type are deleted. """ cts = list(ct.copy() for ct in cts) ct_properties = [] for ct in cts: ct_properties.extend(ct.property.keys()) ct_props_to_remove = _get_duplicate_properties(ct_properties) atom_properties = [] for ct in cts: # Need to loop through all atoms since the property # may be 'null' and not show up on the first atom for a in ct.atom: atom_properties.extend(a.property.keys()) atom_props_to_remove = _get_duplicate_properties(atom_properties) delete_structure_properties(cts, ct_props_to_remove) delete_atom_properties(cts, atom_props_to_remove) return cts
def _get_duplicate_properties(typed_props: List[str]) -> List[str]: """ Given a list of typed properties, return a list of properties that only differ by the type. :param typed_props: List of typed properties, e.g., ['i_sd_chiral_flag', 'b_sd_chiral_flag'] """ duplicate_properties = [] # map from property name to typed properties prop_name_to_typed_props = defaultdict(set) for typed_prop in typed_props: # typed_prop: t_prop_name prop_name_to_typed_props[typed_prop[2:]].add(typed_prop) for typed_props in prop_name_to_typed_props.values(): if len(typed_props) > 1: typed_props.pop() duplicate_properties.extend(typed_props) return duplicate_properties
[docs]def get_res_ct_and_atom_idx(res: _Residue, cap=False) -> Tuple[Structure, Dict[int, int]]: """ Given a `_Residue` object, extract the residue structure and a map of from the original atom index to the residue fragment atom index. :param res: Residue to extract :param cap: Set to True to add capping groups, default is False. These atoms are not included in the atom index. """ # Keep track of the original atom index TMP_IDX = 'i_fep_tmp_idx' for a in res.atom: a.property[TMP_IDX] = a.index # Extract the residue ct res_ct = res.extractStructure(copy_props=True) res_ct.title = res.pdbres # Add cap if requested if cap: build.add_hydrogens(res_ct) cap = captermini.CapTermini(res_ct, frag_min_atoms=0) res_ct = cap.outputStructure() # Map from original to residue atom index atom_idx = { a.property[TMP_IDX]: a.index for a in res_ct.atom if a.property.get(TMP_IDX) is not None } # Clean up the index for a in res.atom: del a.property[TMP_IDX] return res_ct, atom_idx
[docs]def align_cap(ct0: Structure, ct1: Structure) -> None: """ Given two capped peptides, set the coordinates for the cap groups in `ct1` to match those of `ct0`. :param ct0: The reference structure. :param ct1: The structure to modify in place. """ atom_to_coord = {} for a in ct0.atom: if a.pdbres.strip() in ['ACE', 'NMA']: atom_to_coord[(a.pdbname, a.pdbres)] = a.xyz for a in ct1.atom: if a.pdbres.strip() in ['ACE', 'NMA']: a.xyz = atom_to_coord[(a.pdbname, a.pdbres)]
# DEPRECATED!!! Use something like `util.str2hexid(ct.title)` instead.
[docs]def hash_title(ct: Structure) -> str: """ Hash a structure's title string and return the hash. :param ct: Structure to get title from. """ return util.str2hexid(ct.title)
[docs]def struc_iter(*structures): """ Iterates over all structures passed in as the arguments. Containers of `Structure` objects will be flatten out and iterated over. Non-`Structure` objects will be skipped. For example: struc_iter(st0, st1, None, [st2, st3, [st4, None, 1, "2", 3.0, st5]]) The iteration sequence will be `st0, st1, st2, st3, st4, st5`. """ for e in structures: if isinstance(e, Structure): yield e elif isinstance(e, collections.abc.Iterable) and not isinstance(e, str): yield from struc_iter(*e)
[docs]def struc_merge(*structures): """ Merges all given structures and returns a single `Structure` object. For example: struc_merge(st0, st1, None, [st2, st3, [st4, None, 1, "2", 3.0, st5]]) The returned object will be a merged structure containing `st0`, `st1`, `st2`, `st3`, `st4`, and `st5`, in that order. """ merged = create_new_structure(0) for e in structures: if isinstance(e, Structure): merged = structure.Structure( mm.mmffld_extendCTwithCustomCharges(merged.handle, e)) elif isinstance(e, collections.abc.Iterable) and not isinstance(e, str): merged = structure.Structure( mm.mmffld_extendCTwithCustomCharges(merged.handle, struc_merge(*e))) return merged
[docs]class CompStruc: __slots__ = ("solute", "membrane", "ion", "solvent", "cosolvent", "crystal_water", "alchemical_water", "positive_salt", "negative_salt", "receptor", "ligand", "fep_ref", "fep_mut", "gcmc_solvent") # NOTE: Order matters, additional properties should be appended to # this __slots__. # FIXME: Correctly assign ligand # Joe's comment: # It really depends on the fep type. # For protein fep, the ligand would be a stand alone ligand structure # and fep_ref, fep_mut contain the wildtype/mutant protein structures. # For covalent fep, the asl "protein" will match, so ligand will be # empty and the receptor will contain both structures. # Whether this is ok depends on your use case. # But you can't assume that ligands = fep_ref + fep_mut. """ Definitions of the components (tentative for now): - solute CT_TYPE is "solute" - membrane CT_TYPE is "membrane" - ion CT_TYPE is "ion", if such structure is not defined, it will be the sum of the "positive_salt" and the "negative_salt" component structures. - solvent CT_TYPE is "solvent" and any inactive solvent molecules have been removed - gcmc_solvent CT_TYPE is "solvent" and all solvent molecules are included, even inactive ones - cosolvent CT_TYPE is "cosolvent" - crystal_water CT_TYPE is "crystal_water" - alchemical_water The water structure in either the "fep_ref" or the "fep_mut" component structure. - positive_salt CT_TYPE is "positive_salt" - negative_salt CT_TYPE is "negative_salt" - receptor All "solute" structures that contain a "large" (more than 5 residues) protein molecule. - ligand All "solute" structures that are not "receptor". For FEP systems, this component includes both the reference and the mutant species. - fep_ref One of the "ligand" structures whose structure-level property `constants.FEP_FRAGNAME`'s value is `"none"`. - fep_mut One of the "ligand" structures whose structure-level property `constants.FEP_FRAGNAME`'s value is NOT `"none"`. Several things to note: - The value of each component will be either `None`, or a single `Structure` object, or a `tuple` of two (or more) `Structure` objects. If this causes inconveniences of having to check the type, consider using the above `struc_iter` and `struc_merge` functions, which will always return an iterator of structures or a single merged structure. - Some components may have overlaps. For example, the "receptor", "ref", and "mut" component structures are also included in "solute". - Some components may have more than one CTs, e.g., "solute", whereas some components may originally be only part of one CT, e.g., "alchemical_water", and now a new CT which is extracted from the original one. - If there are two or more `Structure` objects in a component, the order of these objects is guaranteed to be the same as the original. - This class is similiar to namedtuple but its attributes can be set after creating the object. """
[docs] def __init__(self, *args): num_slots = len(self.__slots__) for attr, arg in zip_longest(self.__slots__, args[:num_slots]): setattr(self, attr, arg)
def __str__(self): s = "" for name, attr in zip(self.__slots__, self): if attr: s += f"{name}:\n" for st in struc_iter(attr): s += ( f" {st}, {st.title}: {st.mol_total} molecules, " + f"{len(st.residue)} residues, {st.atom_total} atoms\n") return s def __repr__(self): return str(self) def __iter__(self): for name in self.__slots__: yield getattr(self, name)
[docs] def __len__(self): return len(self.__slots__)
def __getitem__(self, index): return getattr(self, self.__slots__[index]) def __setitem__(self, index, value): setattr(self, self.__slots__[index], value)
[docs]def component_structures(model: "Cms") -> CompStruc: def append(c: Optional[List], t: list): return (c and (c + t)) or t ret = CompStruc() has_solvent = False for st in model.comp_ct: ct_type = st.property[CT_TYPE] if ct_type == CT_TYPE.VAL.SOLUTE: ret.solute = append(ret.solute, [st]) elif ct_type == CT_TYPE.VAL.SOLVENT: has_solvent = True ret.gcmc_solvent = append(ret.gcmc_solvent, [st]) elif ct_type == CT_TYPE.VAL.MEMBRANE: ret.membrane = append(ret.membrane, [st]) elif ct_type == CT_TYPE.VAL.ION: ret.ion = append(ret.ion, [st]) elif ct_type == CT_TYPE.VAL.COSOLVENT: ret.cosolvent = append(ret.cosolvent, [st]) elif ct_type == CT_TYPE.VAL.POSITIVE_SALT: ret.positive_salt = append(ret.positive_salt, [st]) elif ct_type == CT_TYPE.VAL.NEGATIVE_SALT: ret.negative_salt = append(ret.negative_salt, [st]) elif ct_type == CT_TYPE.VAL.LIGAND: ret.ligand = append(ret.ligand, [st]) elif ct_type == CT_TYPE.VAL.RECEPTOR: ret.receptor = append(ret.receptor, [st]) if has_solvent: active_model = gcmc_utils.remove_inactive_solvent(model, copy_if_gcmc=True) for st in active_model.comp_ct: if st.property[CT_TYPE] == CT_TYPE.VAL.SOLVENT: ret.solvent = append(ret.solvent, [st]) if not ret.ion: if ret.positive_salt: ret.ion = append(ret.ion, ret.positive_salt) if ret.negative_salt: ret.ion = append(ret.ion, ret.negative_salt) if not ret.receptor and ret.solute: ret.receptor = [] for st in ret.solute: if analyze.evaluate_asl(st, "protein"): # Excludes small peptide. # 5 residue cutoff is chosen mainly because in protein FEPs # the fragment cut out of the protein is practically 5 # residues maximum. if len(st.residue) > 5: ret.receptor.append(st) if not ret.ligand and ret.solute: ret.ligand = [] for st in ret.solute: if st not in ret.receptor: ret.ligand.append(st) if not ret.fep_ref and ret.solute: for st in ret.solute: if st.property.get(constants.FEP_FRAGNAME) == "none": ret.fep_ref = [st] break if not ret.fep_mut and ret.solute: for st in ret.solute: if st.property.get(constants.FEP_FRAGNAME) != "none": ret.fep_mut = [st] break if not ret.alchemical_water: # We do NOT assume which FEP structure the alchemical water is in, # but it has to be in only one of the FEP structures. for st in struc_iter(ret.fep_ref, ret.fep_mut): indices = analyze.evaluate_asl( st, f'a.{constants.ALCHEMICAL_WATER} > 0') if indices: ret.alchemical_water = [st.extract(indices)] break # Converts list into tuples, and reduces one-tuple to the element. for i, val in enumerate(ret): if val: ret[i] = tuple(val) if len(val) > 1 else val[0] return ret
[docs]def write_structures(strucs: Iterable[Optional[Structure]], fname: Optional[str] = None, *, overwrite: bool = True) -> Optional[str]: """ Writes the given `Structure` objects into the same file (or string) in the MAE format. Similar functions: - structure.write_ct_to_string - structure.write_cts :param structures: `Structure` objects to be written. `None`, are allowed to be in the iterable and will be ignored. If the iterable is empty or contains only `None`, for writing to a file this function will have no side effects, for writing to a string an empty string will be returned. :param fname: Name of the file to be written. If it's `None` or an empty string, the structures will be written into a string. :param overwrite: If true, this function will overwrite the file; otherwise, it will append the structures into the existing file. :return: Returns a string which the structures are written into, or `None` if the structures are written into a file. """ strucs = filter(None, strucs) if fname: strucs = list(strucs) if strucs: # Ensures empty `strucs` has no effects on files in disk. with structure.MaestroWriter(fname, overwrite) as writer: for st in strucs: writer.append(st) else: return "\n".join(st.writeToString(structure.MAESTRO) for st in strucs)
[docs]def find_duplicate_titles(strucs: Iterable[Structure]) -> List[Tuple[int, str]]: """ Return a list of tuples, where each tuple gives the duplicate index and title in `strucs`. Return an empty list if no duplicate titles were found. """ seen_titles = set() duplicated_titles = [] for index, st in enumerate(strucs, start=1): if st.title not in seen_titles: seen_titles.add(st.title) else: duplicated_titles.append((index, st.title)) return duplicated_titles
[docs]def get_fep_structures(strucs: Iterable[Structure], struc_tags: List[str]) -> List[Structure]: """ Structures with a `FEP_STRUC_TAG` that match one of the `struc_tags` will be returned. """ return [ struc for struc in strucs if struc.property.get(constants.FEP_STRUC_TAG) in struc_tags ]
[docs]def is_charged_or_formal_charged(st: structure.Structure) -> bool: return st.formal_charge != 0 or any(a.formal_charge != 0 for a in st.atom)
[docs]def find_box(cts: Iterable[structure.Structure]) -> Optional[Tuple[float]]: """ Look for box dimension properties on any of the given structures. :raise ValueError: if there are different box values on different environment strucs :return: a 9-tuple of box vector values or None if no box is found """ from schrodinger.application.desmond import cms box = None for ct in cts: try: cur_box = cms.get_box(ct) except KeyError: continue if box is not None and not np.allclose(cur_box, box, atol=1E-3, rtol=0): # check if cur_box differs from another box raise ValueError("Conflicting boxes on input structures") box = cur_box return box
def _neutralize_atoms(mol: Chem.rdchem.Mol) -> Chem.rdchem.Mol: """ Neutralize the atoms in a molecule. Based on the RDKIT cookbook: https://rdkit.org/docs/Cookbook.html This function is under the https://creativecommons.org/licenses/by-sa/4.0/legalcode. This had been modified from the original form. :param mol: A molecule that may or may not be charged. :return: The neutralized molecule. """ # Get list of atoms with charge pattern = Chem.MolFromSmarts( "[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]") at_matches = mol.GetSubstructMatches(pattern) at_matches_list = [y[0] for y in at_matches] for at_idx in at_matches_list: # Get charged atom from the structure atom = mol.GetAtomWithIdx(at_idx) chg = atom.GetFormalCharge() hcount = atom.GetTotalNumHs() # Neutralize the atom atom.SetFormalCharge(0) # Update hydrogens based on the charge atom.SetNumExplicitHs(hcount - chg) atom.UpdatePropertyCache() return mol
[docs]def get_reduced_smiles(st: structure.Structure) -> str: """ Read in a molecular structure and return a SMILES that has canonicalized the charge and tautomer state. NOTE: Canonicalization is tied to a specific schrodinger release, and can only be compared to other keys generated from this function. Different versions may produce different results as bugs are fixed. :param st: A structure of a molecule that could be in an arbitrary charge or tautomer state. :return: The SMILES of the structure that has a canonical charge and tautomer. """ ORIG_IDX = 'ORIG_IDX' enumerator = rdMolStandardize.TautomerEnumerator() with rdkit_adapter.suppress_rdkit_log(): smile = analyze.generate_smiles(st, unique=True) m = Chem.MolFromSmiles(smile) # Save the chirality tag and formal charge of each atom chirality_chrg = { a.GetIdx(): (a.GetChiralTag(), a.GetFormalCharge()) for a in m.GetAtoms() } for a in m.GetAtoms(): a.SetIntProp(ORIG_IDX, a.GetIdx()) # Neutralize molecule atom by atom: m = _neutralize_atoms(m) # Get the atomic formal charges after neutralization final_charge = { int(a.GetProp(ORIG_IDX)): a.GetFormalCharge() for a in m.GetAtoms() } # Standardize the tautomer m = enumerator.Canonicalize(m) # Re-instate the atomic chirality before being neutralized, EXCEPT if the charge on the atom has changed. for a in m.GetAtoms(): i = int(a.GetProp(ORIG_IDX)) chirality, orig_charge = chirality_chrg[i] if (orig_charge - final_charge[i]) == 0: a.SetChiralTag(chirality) return Chem.MolToSmiles(m)