Source code for schrodinger.application.jaguar.utils

"""
Jaguar utility functions.

Copyright Schrodinger, LLC. All rights reserved.

"""

import base64
import hashlib
import itertools
import os
import re
import sys
from collections import Counter
from collections import namedtuple

import numpy as np

from schrodinger.utils.fileutils import WINDOWS_EXTENDED_PATH_TAG
from schrodinger.utils.units import BOLTZMANN_HARTREE_PER_KELVIN as BOLTZ
from schrodinger.utils.units import MOLAR_GAS_CONSTANT as IDEAL_GAS_CONSTANT_SI
from schrodinger.utils.units import KCAL_PER_MOL_PER_HARTREE as KCAL
from schrodinger.utils.units import PASCALS_PER_ATM as PASCAL_IN_ATM
from schrodinger.application.jaguar.constants import STD_CONC_FREE_ENERGY
from schrodinger.application.jaguar.constants import \
    STD_CONC_INF_SEP_FREE_ENERGY
from schrodinger.application.jaguar.constants import TOTAL_FREE_ENERGY
from schrodinger.application.jaguar.constants import TOTAL_INF_SEP_FREE_ENERGY
from schrodinger.infra import mm
from schrodinger.structure import Structure

FreeEnergy = namedtuple('FreeEnergy', ['temp', 'gibbs', 'property_key'])
LewisStructure = namedtuple('LewisStructure',
                            ['charges', 'bonds', 'score', 'unpaired'])
ChgAt = namedtuple('ChgAt', ['index', 'charge'])
STD = 0
PRINT = 1
RINGCHAIN = 2
THOROUGH = 3
FINDALL = 4
LEWIS_MODES = (STD, PRINT, RINGCHAIN, THOROUGH, FINDALL)

# Search for a posteriori model suffixes,
# see base_functional
DISP_RE = re.compile(
    r"""-( 
                        d2? # earlier d models
                        |d3m?(\(bj\))? #d3 models
                        |ulg|mm|loc(-mm)? # other corrections
                        )$""", re.VERBOSE | re.IGNORECASE)


[docs]def append_outfiles_to_recover_file(recover_file, outfiles): """ Append list of output file paths to a YAML-format .recover file. :type recover_file: str :param recover_file: .recover file name :type outfiles: list of str :param outfiles: list of output file paths """ with open(recover_file, 'a') as fh: for outfile in outfiles: tmp = f"- '{outfile}'\n" fh.write(tmp)
[docs]def get_jobname(prefix, str_to_hash): """ Construct a jobname based on the given string prefix (typically the backend script name) and a string to be hashed (typically based on the cmdline being used to invoke the job.) """ hasher = hashlib.sha1(str_to_hash.encode('utf-8')) suffix = base64.urlsafe_b64encode(hasher.digest()[:6]).decode('utf-8') jobname = prefix + '.' + suffix return jobname
def _extract_basename(filename): """ Get basename of file, removing path and extension Only removes rightmost extension, e.g. `_extract_basename("inp.mae.gz")->"inp.mae"` :type filename: str :param filename: full path of file with extensions """ return os.path.splitext(os.path.basename(filename))[0]
[docs]def restart_name(input_name, mae_name=None): """Wrapper to extract restartname from mmjag routine Allows for calls where `mae_name` is not defined or where input name is an empty string (would cause segfault in mmjag function) """ if input_name == "": return ".01" if mae_name is None: mae_name = " " restartname = mm.mmjag_restartname(input_name, mae_name) return restartname
[docs]def basic_windows_path(dos_path): """ Convert extended length Windows path to standard. Does nothing on other OS's. :type dos_path: string :param dos_path: a (Windows) file path, which may have WINDOWS_EXTENDED_PATH_TAG to indicate extended path length :rtype: string :return: A file path which has not extended length tags """ if sys.platform == 'win32' and os.path.isabs( dos_path) and dos_path.startswith(WINDOWS_EXTENDED_PATH_TAG): dos_path = dos_path.replace(WINDOWS_EXTENDED_PATH_TAG, '', 1) return dos_path
[docs]def get_stoichiometry_string(atom_list): """ Take atom list and return stoichiometry string. For example, atom_list = ['H', 'H', 'O'] yields stoichimetry string = 'H2O'. :param atom_list: list of strings :type atom_list: list :return: stoichiometry string :rtype: str """ c = list(Counter(atom_list).items()) sc = sorted(c) stoich = '' for atom_name, num in sc: if num == 1: index = '' else: index = str(num) if atom_name: stoich += atom_name + index return stoich
def _get_atom_elements(jag_input): """ This function returns list of elements for atoms stored in a given JaguarInput object. This function is only used when calling validate_stoichiometry function. :return: list of atom elements :rtype: list """ atoms = [] if jag_input.hasStructure(): st = jag_input.getStructure() atoms = [atom.element for atom in st.atom] return atoms def _get_atoms_and_charge(inputs): """ This function iterates over the list of given JaguarInput objects and creates a list of all atom element names as well as total charge. This function is only used when calling validate_stoichiometry function. :return: tuple that contains list of atom elements and total charge :rtype: tuple """ atoms = [] charge = 0 for jag_input in inputs: atoms += _get_atom_elements(jag_input) charge += jag_input.getValue('molchg') return atoms, charge
[docs]def validate_stoichiometry(reactants, products): """ This function validates stoichiometry for a reaction defined by the list of reactants and products. If stoichiometry is not valid this function return text string explaining what was wrong. In case of valid stoichiometry returns None. :param reactants: list of `JaguarInput` objects for reactants :type reactants: list :param products: list of `JaguarInput` objects products :type products: list :return: string with warning message or None :rtype: str or None """ reactant_atoms, reactant_charge = _get_atoms_and_charge(reactants) reactant_stoich = get_stoichiometry_string(reactant_atoms) product_atoms, product_charge = _get_atoms_and_charge(products) product_stoich = get_stoichiometry_string(product_atoms) msg = None if len(reactant_atoms) != len(product_atoms): msg = ("Numbers of reactants atoms (%d) and products atoms (%d)" " differ." % (len(reactant_atoms), len(product_atoms))) if reactant_stoich != product_stoich: msg = ("Total stoichiometries of reactants (%s) and products (%s)" " differ." % (reactant_stoich, product_stoich)) if reactant_charge != product_charge: msg = ("Total charges of reactants (%d) and products (%d)" " differ." % (reactant_charge, product_charge)) return msg
[docs]def base_functional(func): """ Extract basename of DFT functional Removes a posteriori model suffixes from the functional, e.g. B3LYP-D3 -> B3LYP PBE0-d -> PBE0 b3lyp-loc-MM -> b3lyp :param func: full name of functional, case insensitive :type func: string :return: functional name, with corrections (e.g. dispersion, LOC) removed :rtype: str """ return re.sub(DISP_RE, '', func)
[docs]def get_number_electrons(st): """ Count the number of electrons disregarding charges. :type st: Structure instance :param st: the structure :rtype: int :return: the number of electrons """ n = sum([at.atomic_number for at in st.atom]) return n
[docs]def get_total_charge(structure): """ Return the total charge of the structure If the property i_m_Molecular_charge is defined we use that, else we sum the formal charges :param structure: whose total charge must be calculated :type structure: Structure object :return: total charge of structure :rtype: int """ return structure.property.get( 'i_m_Molecular_charge', sum([a.formal_charge for a in structure.atom]))
[docs]def elmnt_mult_dict(): """ make a dictionary of element:multiplicity for all neutral elements up to Lawrencium The values are from the ground state term symbol as reported by NIST at http://physics.nist.gov/PhysRefData/Elements/index.html as of 4.2014 """ # elmnt name, multiplicity elmnt_mult = { 'H': 2, 'He': 1, 'Li': 2, 'Be': 1, 'B': 2, 'C': 3, 'N': 4, 'O': 3, 'F': 2, 'Ne': 1, 'Na': 2, 'Mg': 1, 'Al': 2, 'Si': 3, 'P': 4, 'S': 3, 'Cl': 2, 'Ar': 1, 'K': 2, 'Ca': 1, 'Sc': 2, 'Ti': 3, 'V': 4, 'Cr': 7, 'Mn': 6, 'Fe': 3, 'Co': 4, 'Ni': 3, 'Cu': 2, 'Zn': 1, 'Ga': 2, 'Ge': 3, 'As': 4, 'Se': 3, 'Br': 2, 'Kr': 1, 'Rb': 2, 'Sr': 1, 'Y': 2, 'Zr': 3, 'Nb': 6, 'Mo': 7, 'Tc': 6, 'Ru': 5, 'Rh': 4, 'Pd': 1, 'Ag': 2, 'Cd': 1, 'In': 2, 'Sn': 3, 'Sb': 4, 'Te': 3, 'I': 2, 'Xe': 1, 'Cs': 2, 'Ba': 1, 'La': 2, 'Ce': 1, 'Pr': 4, 'Nd': 5, 'Pm': 6, 'Sm': 7, 'Eu': 8, 'Gd': 9, 'Tb': 6, 'Dy': 5, 'Ho': 4, 'Er': 3, 'Tm': 2, 'Yb': 1, 'Lu': 2, 'Hf': 3, 'Ta': 4, 'W': 5, 'Re': 6, 'Os': 5, 'Ir': 4, 'Pt': 3, 'Au': 2, 'Hg': 1, 'Tl': 2, 'Pb': 3, 'Bi': 4, 'Po': 3, 'At': 2, 'Rn': 1, 'Fr': 2, 'Ra': 1, 'Ac': 2, 'Th': 3, 'Pa': 4, 'U': 5, 'Np': 6, 'Pu': 7, 'Am': 8, 'Cm': 9, 'Bk': 6, 'Cf': 5, 'Es': 4, 'Fm': 3, 'Md': 2, 'No': 1, 'Lr': 2 } # in case we want to support these at some point # 'Rf':3} # mmlibs seem to have stopped here # 'Db':4, # 'Sg':5, # not listed on NIST - use previous row to guess # 'Bh':6, # 'Hs':5, # 'Mt':4, # 'Ds':1, # 'Rg':2, # 'Cn':2, # 'Uut':2, # 'Fl':3, # 'Uup':4, # 'Lv':3, # 'Uus':2, # 'Uuo':1} return elmnt_mult
[docs]def remove_gibbs_energies(st, allowed_temps=()): """ Remove gibbs energy properties from a structure but allow some exceptions. This allows one to 'unclutter' the project table. :type st: Structure :param st: structure containing gibbs energy properties :type allowed_temps: tuple :param allowed_temps: temperatures that are allowed to remain as properties """ for b1, b2 in itertools.product([True, False], [True, False]): for t in parse_gibbs_energies(st, inf_sep=b1, std_conc=b2): if t not in allowed_temps: key = gibbs_energy_property_string(t, inf_sep=b1, std_conc=b2) st.property.pop(key)
[docs]def parse_gibbs_energies(st, inf_sep=False, std_conc=False): """ Extract the temperature, gibbs energy and property keys for free energy and store these in a dict relating temperature to FreeEnergy instances. :type st: Structure :param st: the structure :type std_con: bool :param std_con: True indicates Gibbs energy at std state concentration :return: a dict relating temperature to a FreeEnergy instance with attributes storing these data """ key = _gibbs_energy_property_string_prefix(inf_sep, std_conc) regex = key + r"([0-9]+\.[0-9]{2})K_\(au\)" gibbs = {} for key in st.property.keys(): m = re.match(regex, key) if m is not None: temp = float(m.groups()[0]) g = st.property[key] gibbs[temp] = FreeEnergy(temp, g, key) return gibbs
[docs]def gibbs_energy_property_string(temp, inf_sep=False, std_conc=False): """ Construct the property key string for Gibbs energy at a particular temperature :type temp: float :param temp: temperature in Kelvin :type inf_sep: bool :param inf_sep: True indicates infinitely separated energy :type std_conc: bool :param std_conc: True indicates Gibbs energy at std state concentration :return: a property key string """ key = _gibbs_energy_property_string_prefix(inf_sep, std_conc) return key + "%.2fK_(au)" % temp
def _gibbs_energy_property_string_prefix(inf_sep, std_conc): """ Retrieve the prefix property key string for Gibbs energy :type inf_sep: bool :param inf_sep: True indicates infinitely separated energy :type std_conc: bool :param std_conc: True indicates Gibbs energy at std state concentration :return: the first part of the property key string """ if std_conc: if inf_sep: key = STD_CONC_INF_SEP_FREE_ENERGY else: key = STD_CONC_FREE_ENERGY else: if inf_sep: key = TOTAL_INF_SEP_FREE_ENERGY else: key = TOTAL_FREE_ENERGY return key
[docs]def convert_gibbs_energy_to_std_conc(st): """ Convert std state (1 atm) Gibbs energies to a std state of 1 Molar concentration. The energies are returned as a dict relating temperature to FreeEnergy instances. The energies are also stored as structure level properties. This is intended to be used with AutoTS for rate calculations and we assume the free energies were computed at 1 atm of pressure. :type st: Structure :param st: the structure :return: a dict relating temperature to a FreeEnergy instance with attributes storing these data """ std_p_gibbs_data = parse_gibbs_energies(st) std_c_gibbs_data = {} for t, free_energy in std_p_gibbs_data.items(): std_c_gibbs = compute_std_conc_gibbs_energy( free_energy.gibbs, t, 1.0, 1.0) if free_energy.gibbs is not None else None std_c_gibbs_key = gibbs_energy_property_string(t, inf_sep=False, std_conc=True) std_c_gibbs_data[t] = FreeEnergy(t, std_c_gibbs, std_c_gibbs_key) st.property[std_c_gibbs_key] = std_c_gibbs return std_c_gibbs_data
[docs]def compute_std_conc_gibbs_energy(gibbs, temp, press, con): """ Convert Gibbs energy which was computed at a pressure of press to a concentration of con using the formula G = G_0 + kT log(CRT/P_0) where we've used the ideal gas law to relate P = CRT :type gibbs: float :param gibbs: Gibbs free energy in a.u. evaluated at a pressure of press :type press: float :param press: Pressure at which the Gibbs energy was evaluated in atm :type temp: float :param temp: Temperature at which the Gibbs energy was evaluated in Kelvin :type con: float :param con: Concentration which defines the standard state in moles/Liter """ # For computing TST rate constants is convenient to convert # the Gibbs energy from a standard pressure (1 atm) to # a standard concentration of 1 mol/Liter # see Ch. 22 of Physical Chemistry by Ira Levine beta = beta_au(temp) liter_in_m3 = 1.0e3 CRT = con * IDEAL_GAS_CONSTANT_SI * temp * liter_in_m3 P0 = press * PASCAL_IN_ATM correction = (1.0 / beta) * np.log(CRT / P0) return gibbs + correction
[docs]def group_items(items, comparator, *args): """ Put items into groups using a comparator. These will be returned as a list of lists, each list representing a group. The first item of the first group will be the first item in the list items. The groups have the property that Comparator(item1, item2, args) returns True for all pairs in a group. :type items: list :param items: a list of items to group :type comparator: function :param comparator: this function compares two items and returns a boolean indicating whether or not they are equivalent. :type args: argument list :param args: arguments passed to the comparator which is called as comparator(item1, item2, `*args`) :return: a list of lists of items """ groups = [] for item in items: found_group = False for group in groups: if all(comparator(item, item2, *args) for item2 in group): group.append(item) found_group = True break if not found_group: groups.append([item]) return groups
[docs]def beta_au(temp): """ Beta = 1/kT in a.u. """ kT = BOLTZ * temp return 1.0 / kT
[docs]def beta_kcalmol(temp): """ Beta = 1/kT in kcal/mol """ k_kcal = BOLTZ * KCAL beta = 1.0 / (k_kcal * temp) return beta
[docs]def copy_structure_bonding(ref_st, updated_st): """ Copy all bonding and formal charges from reference structure onto a structure instance we want to update. Also update FF atom-typing. Assumes number of atoms and atom numbering is the same in both structures. :type ref_st: Structure object :param ref_st: structure to copy bonding from :type updated_st: Structure object :param updated_st: structure to copy bonding to """ if ref_st.atom_total != updated_st.atom_total: msg = 'Structures have different numbers of atoms.' msg += f'\n{ref_st.atom_total} != {updated_st.atom_total}' raise RuntimeError(msg) # Update relevant bonding and structure-level properties for bond in list(updated_st.bond): updated_st.deleteBond(bond.atom1.index, bond.atom2.index) for bond in ref_st.bond: updated_st.addBond(bond.atom1.index, bond.atom2.index, bond.order) # Update atom-level bonding info for ref_at, updated_at in zip(ref_st.atom, updated_st.atom): assert ref_at.atomic_number == updated_at.atomic_number if ref_at.atomic_number > 0: assert np.allclose(ref_at.xyz, updated_at.xyz, atol=1e-5) updated_at.formal_charge = ref_at.formal_charge # This is intended to copy over the FF atom-typing updated by mmjag # to keep the code consistent with legacy behaviour. Apparently this # gives different results from using .retype() for ref_at, at in zip(ref_st.atom, updated_st.atom): at.atom_type = ref_at.atom_type
[docs]def sync_dummy_atoms(ref_st, st): """ Sync dummy atoms in ref_st and st. This is useful after calling mmjag_connect to reset bonding. We check atomic number and XYZ coordinates to classify which atoms are the "same" or "different". But note for dummy atoms, we do not require XYZ to be the same. We then try to sync the two structures by handling two possibilities: (1) Existence of new dummy atoms at the end of the atom list in ref_st compared to st; they will be added to the end of st. (2) Non-existence of dummy atoms at any location in ref_st compared to st; they will be deleted from st. Otherwise, an AssertionError is raised. :type ref_st: Structure object :param ref_st: structure to copy atoms from :type st: Structure object :param st: structure to copy atoms to """ # First pass, delete any dummy atoms from st not present in ref_st ir = 1 to_delete = [] for iu in range(1, st.atom_total + 1): # This loop range assumes only dummy atoms differ if (ref_st.atom[ir].atomic_number == st.atom[iu].atomic_number) and \ np.allclose(ref_st.atom[ir].xyz, st.atom[iu].xyz, atol=1e-5): # Atoms are the same, move on to next pair ir += 1 else: # Atoms are the not the same. if (ref_st.atom[ir].atomic_number == st.atom[iu].atomic_number) and \ (st.atom[iu].atomic_number <= 0): # dummy atom position has changed; we allow it ir += 1 elif st.atom[iu].atomic_number <= 0: # st atom is an extra dummy, so mark it for deletion to_delete.append(iu) elif ref_st.atom[iu].atomic_number <= 0: msg = 'Extra dummy atom not added to end of ref_st' raise AssertionError(msg) else: msg = 'Unexpected non-dummy atoms differ. Aborting...' raise AssertionError(msg) st.deleteAtoms(to_delete) # Second pass, add any dummy atoms from ref_st not present in st new_atoms = [] assert ref_st.atom_total >= st.atom_total for ref_at, at in itertools.zip_longest(ref_st.atom, st.atom): # We assume any extra dummy atoms are added to the end of the atom list if at is not None: # Double-check that the atoms are the same when present in both assert ref_at.atomic_number == at.atomic_number if at.atomic_number > 0: assert np.allclose(ref_at.xyz, at.xyz, atol=1e-5) else: # Check that we're only adding a dummy atom, then add it assert ref_at.atomic_number <= 0 x, y, z = ref_at.xyz new_atoms.append(ref_at.index) st.addAtom('Du', x, y, z, color=ref_at.color, atom_type=ref_at.atom_type) # 'Du' element type is not guaranteed to have the same properties as # "mmjag dummy" atoms, so we manually set some. for i in new_atoms: st.atom[i].atomic_number = ref_st.atom[i].atomic_number st.atom[i].name = ref_st.atom[i].name # Double-check atom ordering and numbers are now the same assert ref_st.atom_total == st.atom_total for ref_at, at in zip(ref_st.atom, st.atom): assert ref_at.atomic_number == at.atomic_number if at.atomic_number > 0: assert np.allclose(ref_at.xyz, at.xyz, atol=1e-5)
[docs]def mmjag_reset_connectivity(st): """ Reset connectivity and Lewis structure using mmjag algorithm. i.e input st is modified to have new bonds, bond orders and formal charges. All other properties should be preserved. :type st: Structure object :param st: structure to clean up """ charge = get_total_charge(st) # Create temporary new structure mm.mmjag_initialize(mm.error_handler) # Pass in copy just in case there are unknown side-effects to calling mmjag st_tmp = st.copy() handle = mm.mmjag_connect(st_tmp, charge) new_st = Structure(handle) mm.mmjag_terminate() # Sync dummy atoms added or removed by mmjag onto the input Structure sync_dummy_atoms(new_st, st) # Copy new bonding and charges onto input structure copy_structure_bonding(new_st, st) assert get_total_charge(st) == charge assert sum([a.formal_charge for a in st.atom]) == charge
[docs]def mmjag_update_lewis(st, mode=STD): """ Update Lewis structure for a given connectivity using mmjag algorithm. This can be used instead of, or to complement, e.g. the mmlewis code. Unlike mmjag_reset_connectivity, the connectivity will be preserved. If mode is PRINT, the lewis structure determination information will be returned If mode is RINGCHAIN, special Lewis structure scoring for ring-chain tautomers will be used (and lewis structure information will be returned) If mode if THOROUGH, the lewis structure search will be lengthened If mode if FINDALL, the lewis structure search will be lengthened and suboptimal results will be included in the output data (which is sorted to have the best first). If the mmjag Lewis code fails to update the bonding, the original bonding will be preserved. :type st: Structure object :param st: structure to clean up :type mode: int :param mode: specifies if std or ring-chain scoring should be used and whether or not the data is returned from lewis.cpp. Only STD, PRINT, RINGCHAIN, THOROUGH, and FINDALL are acceptable values :rtype: list of LewisStructure namedtuples :return: If mode is PRINT, RINGCHAIN, THOROUGH, or FINDALL, parsed lewis structure data is returned as namedtuple; an empty list is returned if mode is STD. """ assert mode in LEWIS_MODES # Store the total charge before resetting it. charge = get_total_charge(st) # We need to reset the Lewis structure before calling the mmjag # code to trigger it to do something useful. # i.e. make all bonds single bonds and remove all charges. new_st = st.copy() for bond in new_st.bond: bond.order = 1 for at in new_st.atom: at.formal_charge = 0 # Update Lewis structure, preserving connectivity mm.mmjag_initialize(mm.error_handler) try: handle, lewis_data = mm.mmjag_create_lewis(new_st, charge, mode) new_st = Structure(handle) except mm.MmException as e: if e.rc == mm.MMJAG_LEWIS_FAIL: print('WARNING: mmjag_update_lewis failed!') return else: raise e finally: mm.mmjag_terminate() # Loosely check that atom ordering is preserved, ignoring dummy atoms assert [at.atomic_number for at in st.atom if at.atomic_number > 0] == [ at.atomic_number for at in new_st.atom if at.atomic_number > 0 ] # Copy new Lewis structure onto old structure copy_structure_bonding(new_st, st) assert get_total_charge(st) == charge assert sum([a.formal_charge for a in st.atom]) == charge return parse_lewis_data(lewis_data)
[docs]def parse_lewis_data(lewis_data): """ Parse the string-formatted Lewis data from mmjag into a data structure for easy use. :type lewis_data: list of strings: :param lewis_data: the line-by-line output of mmjag/lewis.cpp run on a structure :rtype: list of LewisStructure namedtuples :return: Namedtuples containing charge, multiple bond, and score """ charges, unpaired = get_charges(lewis_data) bonds = get_bonds(lewis_data) scores = get_scores(lewis_data) assert len(charges) == len(unpaired) assert len(charges) == len(bonds) assert len(charges) == len(scores) parsed_lewis_structures = [] for c, u, b, s in zip(charges, unpaired, bonds, scores): new_lewis = LewisStructure(charges=c, unpaired=u, bonds=b, score=s) parsed_lewis_structures.append(new_lewis) return parsed_lewis_structures
[docs]def get_charges(lewis_output): """ Parse charges from lewis code string output :type lewis_output: list of strings :param lewis_output: the line-by-line output of mmjag/lewis.cpp run on a structure :rtype: (list of list of ChgAt namedtuples, list of bools) :return: For each structure in the lewis_output (the length of the list), list of the charged atom and their charges, whether or not structure contains unpaired spins """ charges = [] unpaired = [] charge_line = [line for line in lewis_output if "charges:" in line] for line in charge_line: unpaired.append("(.)" in line) matches = re.findall(r'(\d+)\((-?\d+)\)', line) charges.append([ChgAt(int(i), int(j)) for i, j in matches]) return charges, unpaired
[docs]def get_bonds(lewis_output): """ Parse bonds from lewis code string output :type lewis_output: list of strings :param lewis_output: the line-by-line output of mmjag/lewis.cpp run on a structure :rtype: list of list of 2-tuples of ints :return: For each structure in the lewis_output (the length of the list), list of the multiple bonds in the structure (NB: a triple bond is noted by being present in the list twice) """ bonds = [] bond_line = [line for line in lewis_output if "bonds:" in line] for line in bond_line: matches = re.findall(r'(\d+)-(\d+)', line) bonds.append([(int(i), int(j)) for i, j in matches]) return bonds
[docs]def get_scores(lewis_output): """ Parse scores from lewis code string output :type lewis_output: list of strings :param lewis_output: the line-by-line output of mmjag/lewis.cpp run on a structure :rtype: list of floats :return: For each structure in the lewis_output (the length of the list), that structures score according to the mmjag/lewis.cpp code (see there for more details) """ scores = [] score_line = [line for line in lewis_output if "score = " in line] for line in score_line: matches = re.search(r'score = (\d+.\d+)', line) scores.append(float(matches[1])) return scores
[docs]def apply_lewis(st, lewis_st): """ Return a copy of a structure with a Lewis structure as described by the LewisStructure given. :type st: Structure instance :param st: structure on which to apply Lewis structure :type lewis_st: LewisStructure namedtuple :param lewis_st: Namedtuple containing charge, multiple bond, and score :rtype: Structure instance :return: a structure with the desired Lewis structure applied """ new_st = st.copy() #make clean copy for bond in new_st.bond: bond.order = 1 for at in new_st.atom: at.formal_charge = 0 for i, j in lewis_st.bonds: new_st.getBond(i, j).order += 1 for at, chg in lewis_st.charges: new_st.atom[at].formal_charge = chg return new_st