Source code for schrodinger.application.jaguar.utils

"""
Jaguar utility functions.

Copyright Schrodinger, LLC. All rights reserved.

"""

import base64
import hashlib
import itertools
import os
import re
import sys
from collections import Counter
from collections import namedtuple

import numpy as np

from schrodinger.utils.fileutils import WINDOWS_EXTENDED_PATH_TAG
from schrodinger.utils.units import BOLTZMANN_HARTREE_PER_KELVIN as BOLTZ
from schrodinger.utils.units import MOLAR_GAS_CONSTANT as IDEAL_GAS_CONSTANT_SI
from schrodinger.utils.units import KCAL_PER_MOL_PER_HARTREE as KCAL
from schrodinger.utils.units import PASCALS_PER_ATM as PASCAL_IN_ATM
from schrodinger.application.jaguar.constants import STD_CONC_FREE_ENERGY
from schrodinger.application.jaguar.constants import \
    STD_CONC_INF_SEP_FREE_ENERGY
from schrodinger.application.jaguar.constants import TOTAL_FREE_ENERGY
from schrodinger.application.jaguar.constants import TOTAL_INF_SEP_FREE_ENERGY
from schrodinger.infra import mm
from schrodinger.structure import Structure

FreeEnergy = namedtuple('FreeEnergy', ['temp', 'gibbs', 'property_key'])
LewisStructure = namedtuple('LewisStructure',
                            ['charges', 'bonds', 'score', 'unpaired'])
ChgAt = namedtuple('ChgAt', ['index', 'charge'])
STD = 0
PRINT = 1
RINGCHAIN = 2
THOROUGH = 3
FINDALL = 4
LEWIS_MODES = (STD, PRINT, RINGCHAIN, THOROUGH, FINDALL)

# Search for a posteriori model suffixes,
# see base_functional
DISP_RE = re.compile(
    r"""-( 
                        d2? # earlier d models
                        |d3m?(\(bj\))? #d3 models
                        |ulg|mm|loc(-mm)? # other corrections
                        )$""", re.VERBOSE | re.IGNORECASE)


[docs]def append_outfiles_to_recover_file(recover_file, outfiles):
    """
    Append list of output file paths to a YAML-format .recover file.

    :type  recover_file: str
    :param recover_file: .recover file name

    :type  outfiles: list of str
    :param outfiles: list of output file paths
    """

    with open(recover_file, 'a') as fh:
        for outfile in outfiles:
            tmp = f"- '{outfile}'\n"
            fh.write(tmp)


[docs]def get_jobname(prefix, str_to_hash):
    """
    Construct a jobname based on the given string prefix (typically the
    backend script name) and a string to be hashed (typically based on the
    cmdline being used to invoke the job.)
    """

    hasher = hashlib.sha1(str_to_hash.encode('utf-8'))
    suffix = base64.urlsafe_b64encode(hasher.digest()[:6]).decode('utf-8')
    jobname = prefix + '.' + suffix
    return jobname


def _extract_basename(filename):
    """
    Get basename of file, removing path and extension

    Only removes rightmost extension, e.g. 
    `_extract_basename("inp.mae.gz")->"inp.mae"`

    :type filename: str
    :param filename: full path of file with extensions

    """
    return os.path.splitext(os.path.basename(filename))[0]


[docs]def restart_name(input_name, mae_name=None):
    """Wrapper to extract restartname from mmjag routine

    Allows for calls where `mae_name` is not defined or where
    input name is an empty string (would cause segfault in mmjag function)
    """
    if input_name == "":
        return ".01"
    if mae_name is None:
        mae_name = " "
    restartname = mm.mmjag_restartname(input_name, mae_name)
    return restartname


[docs]def basic_windows_path(dos_path):
    """
    Convert extended length Windows path to standard. Does nothing on other OS's.

    :type dos_path: string
    :param dos_path: a (Windows) file path, which may have WINDOWS_EXTENDED_PATH_TAG
                     to indicate extended path length
    :rtype: string
    :return: A file path which has not extended length tags
    """
    if sys.platform == 'win32' and os.path.isabs(
            dos_path) and dos_path.startswith(WINDOWS_EXTENDED_PATH_TAG):
        dos_path = dos_path.replace(WINDOWS_EXTENDED_PATH_TAG, '', 1)
    return dos_path


[docs]def get_stoichiometry_string(atom_list):
    """
    Take atom list and return stoichiometry string. For example,
    atom_list = ['H', 'H', 'O'] yields stoichimetry string = 'H2O'.

    :param atom_list: list of strings
    :type atom_list: list

    :return: stoichiometry string
    :rtype: str
    """

    c = list(Counter(atom_list).items())
    sc = sorted(c)
    stoich = ''
    for atom_name, num in sc:
        if num == 1:
            index = ''
        else:
            index = str(num)
        if atom_name:
            stoich += atom_name + index
    return stoich


def _get_atom_elements(jag_input):
    """
    This function returns list of elements for atoms stored in a
    given JaguarInput object. This function is only used when calling
    validate_stoichiometry function.

    :return: list of atom elements
    :rtype: list
    """

    atoms = []
    if jag_input.hasStructure():
        st = jag_input.getStructure()
        atoms = [atom.element for atom in st.atom]
    return atoms


def _get_atoms_and_charge(inputs):
    """
    This function iterates over the list of given JaguarInput objects
    and creates a list of all atom element names as well as total
    charge. This function is only used when calling validate_stoichiometry
    function.

    :return: tuple that contains list of atom elements and total charge
    :rtype: tuple
    """

    atoms = []
    charge = 0
    for jag_input in inputs:
        atoms += _get_atom_elements(jag_input)
        charge += jag_input.getValue('molchg')
    return atoms, charge


[docs]def validate_stoichiometry(reactants, products):
    """
    This function validates stoichiometry for a reaction defined by
    the list of reactants and products. If stoichiometry is not valid
    this function return text string explaining what was wrong. In
    case of valid stoichiometry returns None.

    :param reactants: list of `JaguarInput` objects for reactants
    :type reactants: list

    :param products: list of `JaguarInput` objects products
    :type products: list

    :return: string with warning message or None
    :rtype: str or None
    """

    reactant_atoms, reactant_charge = _get_atoms_and_charge(reactants)
    reactant_stoich = get_stoichiometry_string(reactant_atoms)
    product_atoms, product_charge = _get_atoms_and_charge(products)
    product_stoich = get_stoichiometry_string(product_atoms)

    msg = None

    if len(reactant_atoms) != len(product_atoms):
        msg = ("Numbers of reactants atoms (%d) and products atoms (%d)"
               " differ." % (len(reactant_atoms), len(product_atoms)))

    if reactant_stoich != product_stoich:
        msg = ("Total stoichiometries of reactants (%s) and products (%s)"
               " differ." % (reactant_stoich, product_stoich))

    if reactant_charge != product_charge:
        msg = ("Total charges of reactants (%d) and products (%d)"
               " differ." % (reactant_charge, product_charge))

    return msg


[docs]def base_functional(func):
    """
    Extract basename of DFT functional

    Removes a posteriori model suffixes from the functional, e.g.
    B3LYP-D3 -> B3LYP
    PBE0-d -> PBE0
    b3lyp-loc-MM -> b3lyp
    
    
    :param func: full name of functional, case insensitive
    :type func: string

    :return: functional name, with corrections 
             (e.g. dispersion, LOC) removed
    :rtype: str
    """

    return re.sub(DISP_RE, '', func)


[docs]def get_number_electrons(st):
    """
    Count the number of electrons disregarding charges.

    :type st: Structure instance
    :param st: the structure
    :rtype: int
    :return: the number of electrons
    """
    n = sum([at.atomic_number for at in st.atom])
    return n


[docs]def get_total_charge(structure):
    """
    Return the total charge of the structure
    If the property i_m_Molecular_charge is defined we use
    that, else we sum the formal charges

    :param structure: whose total charge must be calculated
    :type structure: Structure object

    :return: total charge of structure
    :rtype: int
    """
    return structure.property.get(
        'i_m_Molecular_charge', sum([a.formal_charge for a in structure.atom]))


[docs]def elmnt_mult_dict():
    """
    make a dictionary of element:multiplicity
    for all neutral elements up to Lawrencium

    The values are from the ground state term symbol
    as reported by NIST at
    http://physics.nist.gov/PhysRefData/Elements/index.html
    as of 4.2014

    """

    # elmnt name, multiplicity
    elmnt_mult = {
        'H': 2,
        'He': 1,
        'Li': 2,
        'Be': 1,
        'B': 2,
        'C': 3,
        'N': 4,
        'O': 3,
        'F': 2,
        'Ne': 1,
        'Na': 2,
        'Mg': 1,
        'Al': 2,
        'Si': 3,
        'P': 4,
        'S': 3,
        'Cl': 2,
        'Ar': 1,
        'K': 2,
        'Ca': 1,
        'Sc': 2,
        'Ti': 3,
        'V': 4,
        'Cr': 7,
        'Mn': 6,
        'Fe': 3,
        'Co': 4,
        'Ni': 3,
        'Cu': 2,
        'Zn': 1,
        'Ga': 2,
        'Ge': 3,
        'As': 4,
        'Se': 3,
        'Br': 2,
        'Kr': 1,
        'Rb': 2,
        'Sr': 1,
        'Y': 2,
        'Zr': 3,
        'Nb': 6,
        'Mo': 7,
        'Tc': 6,
        'Ru': 5,
        'Rh': 4,
        'Pd': 1,
        'Ag': 2,
        'Cd': 1,
        'In': 2,
        'Sn': 3,
        'Sb': 4,
        'Te': 3,
        'I': 2,
        'Xe': 1,
        'Cs': 2,
        'Ba': 1,
        'La': 2,
        'Ce': 1,
        'Pr': 4,
        'Nd': 5,
        'Pm': 6,
        'Sm': 7,
        'Eu': 8,
        'Gd': 9,
        'Tb': 6,
        'Dy': 5,
        'Ho': 4,
        'Er': 3,
        'Tm': 2,
        'Yb': 1,
        'Lu': 2,
        'Hf': 3,
        'Ta': 4,
        'W': 5,
        'Re': 6,
        'Os': 5,
        'Ir': 4,
        'Pt': 3,
        'Au': 2,
        'Hg': 1,
        'Tl': 2,
        'Pb': 3,
        'Bi': 4,
        'Po': 3,
        'At': 2,
        'Rn': 1,
        'Fr': 2,
        'Ra': 1,
        'Ac': 2,
        'Th': 3,
        'Pa': 4,
        'U': 5,
        'Np': 6,
        'Pu': 7,
        'Am': 8,
        'Cm': 9,
        'Bk': 6,
        'Cf': 5,
        'Es': 4,
        'Fm': 3,
        'Md': 2,
        'No': 1,
        'Lr': 2
    }
    # in case we want to support these at some point
    #                  'Rf':3} # mmlibs seem to have stopped here
    #                  'Db':4,
    #                  'Sg':5, # not listed on NIST - use previous row to guess
    #                  'Bh':6,
    #                  'Hs':5,
    #                  'Mt':4,
    #                  'Ds':1,
    #                  'Rg':2,
    #                  'Cn':2,
    #                  'Uut':2,
    #                  'Fl':3,
    #                  'Uup':4,
    #                  'Lv':3,
    #                  'Uus':2,
    #                  'Uuo':1}

    return elmnt_mult


[docs]def remove_gibbs_energies(st, allowed_temps=()):
    """
    Remove gibbs energy properties from a structure
    but allow some exceptions.  This allows one to
    'unclutter' the project table.

    :type st: Structure
    :param st: structure containing gibbs energy properties
    :type allowed_temps: tuple
    :param allowed_temps: temperatures that are allowed to
           remain as properties
    """

    for b1, b2 in itertools.product([True, False], [True, False]):
        for t in parse_gibbs_energies(st, inf_sep=b1, std_conc=b2):
            if t not in allowed_temps:
                key = gibbs_energy_property_string(t, inf_sep=b1, std_conc=b2)
                st.property.pop(key)


[docs]def parse_gibbs_energies(st, inf_sep=False, std_conc=False):
    """
    Extract the temperature, gibbs energy and property keys for free energy
    and store these in a dict relating temperature to FreeEnergy instances.

    :type st: Structure
    :param st: the structure
    :type std_con: bool
    :param std_con: True indicates Gibbs energy at std state concentration
    :return: a dict relating temperature to a FreeEnergy instance with
             attributes storing these data
    """

    key = _gibbs_energy_property_string_prefix(inf_sep, std_conc)
    regex = key + r"([0-9]+\.[0-9]{2})K_\(au\)"
    gibbs = {}
    for key in st.property.keys():
        m = re.match(regex, key)
        if m is not None:
            temp = float(m.groups()[0])
            g = st.property[key]
            gibbs[temp] = FreeEnergy(temp, g, key)

    return gibbs


[docs]def gibbs_energy_property_string(temp, inf_sep=False, std_conc=False):
    """
    Construct the property key string for Gibbs energy
    at a particular temperature

    :type temp: float
    :param temp: temperature in Kelvin
    :type inf_sep: bool
    :param inf_sep: True indicates infinitely separated energy
    :type std_conc: bool
    :param std_conc: True indicates Gibbs energy at std state concentration
    :return: a property key string
    """

    key = _gibbs_energy_property_string_prefix(inf_sep, std_conc)
    return key + "%.2fK_(au)" % temp


def _gibbs_energy_property_string_prefix(inf_sep, std_conc):
    """
    Retrieve the prefix property key string for Gibbs energy

    :type inf_sep: bool
    :param inf_sep: True indicates infinitely separated energy
    :type std_conc: bool
    :param std_conc: True indicates Gibbs energy at std state concentration
    :return: the first part of the property key string
    """

    if std_conc:
        if inf_sep:
            key = STD_CONC_INF_SEP_FREE_ENERGY
        else:
            key = STD_CONC_FREE_ENERGY
    else:
        if inf_sep:
            key = TOTAL_INF_SEP_FREE_ENERGY
        else:
            key = TOTAL_FREE_ENERGY

    return key


[docs]def convert_gibbs_energy_to_std_conc(st):
    """
    Convert std state (1 atm) Gibbs energies
    to a std state of 1 Molar concentration.
    The energies are returned as a dict relating
    temperature to FreeEnergy instances.
    The energies are also stored as structure level properties.
    This is intended to be used with AutoTS for rate calculations
    and we assume the free energies were computed at 1 atm of pressure.

    :type st: Structure
    :param st: the structure
    :return: a dict relating temperature to a FreeEnergy instance with
             attributes storing these data
    """

    std_p_gibbs_data = parse_gibbs_energies(st)
    std_c_gibbs_data = {}

    for t, free_energy in std_p_gibbs_data.items():
        std_c_gibbs = compute_std_conc_gibbs_energy(
            free_energy.gibbs, t, 1.0,
            1.0) if free_energy.gibbs is not None else None
        std_c_gibbs_key = gibbs_energy_property_string(t,
                                                       inf_sep=False,
                                                       std_conc=True)
        std_c_gibbs_data[t] = FreeEnergy(t, std_c_gibbs, std_c_gibbs_key)
        st.property[std_c_gibbs_key] = std_c_gibbs

    return std_c_gibbs_data


[docs]def compute_std_conc_gibbs_energy(gibbs, temp, press, con):
    """
    Convert Gibbs energy which was computed at a pressure of press
    to a concentration of con using the formula
    G = G_0 + kT log(CRT/P_0)
    where we've used the ideal gas law to relate P = CRT

    :type gibbs: float
    :param gibbs: Gibbs free energy in a.u. evaluated at a pressure of press
    :type press: float
    :param press: Pressure at which the Gibbs energy was evaluated in atm
    :type temp: float
    :param temp: Temperature at which the Gibbs energy was evaluated in Kelvin
    :type con: float
    :param con: Concentration which defines the standard state in moles/Liter
    """
    # For computing TST rate constants is convenient to convert
    # the Gibbs energy from a standard pressure (1 atm) to
    # a standard concentration of 1 mol/Liter
    # see Ch. 22 of Physical Chemistry by Ira Levine

    beta = beta_au(temp)

    liter_in_m3 = 1.0e3
    CRT = con * IDEAL_GAS_CONSTANT_SI * temp * liter_in_m3
    P0 = press * PASCAL_IN_ATM
    correction = (1.0 / beta) * np.log(CRT / P0)

    return gibbs + correction


[docs]def group_items(items, comparator, *args):
    """
    Put items into groups using a comparator.

    These will be returned as a list of lists, each list representing a group.
    The first item of the first group will be the first item in the list items.
    The groups have the property that Comparator(item1, item2, args) returns
    True for all pairs in a group.

    :type items: list
    :param items: a list of items to group

    :type comparator: function
    :param comparator: this function compares two items and returns a boolean
        indicating whether or not they are equivalent.

    :type args: argument list
    :param args: arguments passed to the comparator which is called as
        comparator(item1, item2, `*args`)

    :return: a list of lists of items
    """

    groups = []
    for item in items:
        found_group = False
        for group in groups:
            if all(comparator(item, item2, *args) for item2 in group):
                group.append(item)
                found_group = True
                break
        if not found_group:
            groups.append([item])

    return groups


[docs]def beta_au(temp):
    """
    Beta = 1/kT in a.u.
    """

    kT = BOLTZ * temp
    return 1.0 / kT


[docs]def beta_kcalmol(temp):
    """
    Beta = 1/kT in kcal/mol
    """
    k_kcal = BOLTZ * KCAL
    beta = 1.0 / (k_kcal * temp)
    return beta


[docs]def copy_structure_bonding(ref_st, updated_st):
    """
    Copy all bonding and formal charges from reference structure
    onto a structure instance we want to update.  Also update FF
    atom-typing.  Assumes number of atoms and atom numbering is the
    same in both structures.

    :type  ref_st: Structure object
    :param ref_st: structure to copy bonding from

    :type  updated_st: Structure object
    :param updated_st: structure to copy bonding to
    """

    if ref_st.atom_total != updated_st.atom_total:
        msg = 'Structures have different numbers of atoms.'
        msg += f'\n{ref_st.atom_total} != {updated_st.atom_total}'
        raise RuntimeError(msg)

    # Update relevant bonding and structure-level properties
    for bond in list(updated_st.bond):
        updated_st.deleteBond(bond.atom1.index, bond.atom2.index)
    for bond in ref_st.bond:
        updated_st.addBond(bond.atom1.index, bond.atom2.index, bond.order)

    # Update atom-level bonding info
    for ref_at, updated_at in zip(ref_st.atom, updated_st.atom):
        assert ref_at.atomic_number == updated_at.atomic_number
        if ref_at.atomic_number > 0:
            assert np.allclose(ref_at.xyz, updated_at.xyz, atol=1e-5)
        updated_at.formal_charge = ref_at.formal_charge
    # This is intended to copy over the FF atom-typing updated by mmjag
    # to keep the code consistent with legacy behaviour.  Apparently this
    # gives different results from using .retype()
    for ref_at, at in zip(ref_st.atom, updated_st.atom):
        at.atom_type = ref_at.atom_type


[docs]def sync_dummy_atoms(ref_st, st):
    """
    Sync dummy atoms in ref_st and st. This is useful after calling
    mmjag_connect to reset bonding.  We check atomic number and XYZ coordinates
    to classify which atoms are the "same" or "different".  But note for dummy
    atoms, we do not require XYZ to be the same.

    We then try to sync the two structures by handling two possibilities:

    (1) Existence of new dummy atoms at the end of the atom list in ref_st
        compared to st; they will be added to the end of st.
    (2) Non-existence of dummy atoms at any location in ref_st compared
        to st; they will be deleted from st.

    Otherwise, an AssertionError is raised.

    :type  ref_st: Structure object
    :param ref_st: structure to copy atoms from

    :type  st: Structure object
    :param st: structure to copy atoms to
    """

    # First pass, delete any dummy atoms from st not present in ref_st
    ir = 1
    to_delete = []
    for iu in range(1, st.atom_total + 1):
        # This loop range assumes only dummy atoms differ
        if (ref_st.atom[ir].atomic_number == st.atom[iu].atomic_number) and \
            np.allclose(ref_st.atom[ir].xyz, st.atom[iu].xyz, atol=1e-5):
            # Atoms are the same, move on to next pair
            ir += 1
        else:
            # Atoms are the not the same.
            if (ref_st.atom[ir].atomic_number == st.atom[iu].atomic_number) and \
                (st.atom[iu].atomic_number <= 0):
                # dummy atom position has changed; we allow it
                ir += 1
            elif st.atom[iu].atomic_number <= 0:
                # st atom is an extra dummy, so mark it for deletion
                to_delete.append(iu)
            elif ref_st.atom[iu].atomic_number <= 0:
                msg = 'Extra dummy atom not added to end of ref_st'
                raise AssertionError(msg)
            else:
                msg = 'Unexpected non-dummy atoms differ. Aborting...'
                raise AssertionError(msg)
    st.deleteAtoms(to_delete)

    # Second pass, add any dummy atoms from ref_st not present in st
    new_atoms = []
    assert ref_st.atom_total >= st.atom_total
    for ref_at, at in itertools.zip_longest(ref_st.atom, st.atom):
        # We assume any extra dummy atoms are added to the end of the atom list
        if at is not None:
            # Double-check that the atoms are the same when present in both
            assert ref_at.atomic_number == at.atomic_number
            if at.atomic_number > 0:
                assert np.allclose(ref_at.xyz, at.xyz, atol=1e-5)
        else:
            # Check that we're only adding a dummy atom, then add it
            assert ref_at.atomic_number <= 0
            x, y, z = ref_at.xyz
            new_atoms.append(ref_at.index)
            st.addAtom('Du',
                       x,
                       y,
                       z,
                       color=ref_at.color,
                       atom_type=ref_at.atom_type)

    # 'Du' element type is not guaranteed to have the same properties as
    # "mmjag dummy" atoms, so we manually set some.
    for i in new_atoms:
        st.atom[i].atomic_number = ref_st.atom[i].atomic_number
        st.atom[i].name = ref_st.atom[i].name

    # Double-check atom ordering and numbers are now the same
    assert ref_st.atom_total == st.atom_total
    for ref_at, at in zip(ref_st.atom, st.atom):
        assert ref_at.atomic_number == at.atomic_number
        if at.atomic_number > 0:
            assert np.allclose(ref_at.xyz, at.xyz, atol=1e-5)


[docs]def mmjag_reset_connectivity(st):
    """
    Reset connectivity and Lewis structure using mmjag algorithm.
    i.e input st is modified to have new bonds, bond orders and
    formal charges.  All other properties should be preserved.

    :type  st: Structure object
    :param st: structure to clean up
    """

    charge = get_total_charge(st)

    # Create temporary new structure
    mm.mmjag_initialize(mm.error_handler)
    # Pass in copy just in case there are unknown side-effects to calling mmjag
    st_tmp = st.copy()
    handle = mm.mmjag_connect(st_tmp, charge)
    new_st = Structure(handle)
    mm.mmjag_terminate()

    # Sync dummy atoms added or removed by mmjag onto the input Structure
    sync_dummy_atoms(new_st, st)

    # Copy new bonding and charges onto input structure
    copy_structure_bonding(new_st, st)
    assert get_total_charge(st) == charge
    assert sum([a.formal_charge for a in st.atom]) == charge


[docs]def mmjag_update_lewis(st, mode=STD):
    """
    Update Lewis structure for a given connectivity using mmjag algorithm.
    This can be used instead of, or to complement, e.g. the mmlewis code.
    Unlike mmjag_reset_connectivity, the connectivity will be preserved.

    If mode is PRINT, the lewis structure determination information will be
    returned

    If mode is RINGCHAIN, special Lewis structure scoring for ring-chain
    tautomers will be used (and lewis structure information will be returned)

    If mode if THOROUGH, the lewis structure search will be lengthened

    If mode if FINDALL, the lewis structure search will be lengthened and
    suboptimal results will be included in the output data (which is sorted to
    have the best first).

    If the mmjag Lewis code fails to update the bonding, the original bonding
    will be preserved.

    :type  st: Structure object
    :param st: structure to clean up
    :type  mode: int
    :param mode: specifies if std or ring-chain scoring should be used
                 and whether or not the data is returned from lewis.cpp.
                 Only STD, PRINT, RINGCHAIN, THOROUGH, and FINDALL are
                 acceptable values
    :rtype: list of LewisStructure namedtuples
    :return: If mode is PRINT, RINGCHAIN, THOROUGH, or FINDALL, parsed
             lewis structure data is returned as namedtuple; an empty
             list is returned if mode is STD.
    """

    assert mode in LEWIS_MODES
    # Store the total charge before resetting it.
    charge = get_total_charge(st)

    # We need to reset the Lewis structure before calling the mmjag
    # code to trigger it to do something useful.
    # i.e. make all bonds single bonds and remove all charges.
    new_st = st.copy()
    for bond in new_st.bond:
        bond.order = 1
    for at in new_st.atom:
        at.formal_charge = 0

    # Update Lewis structure, preserving connectivity
    mm.mmjag_initialize(mm.error_handler)
    try:
        handle, lewis_data = mm.mmjag_create_lewis(new_st, charge, mode)
        new_st = Structure(handle)
    except mm.MmException as e:
        if e.rc == mm.MMJAG_LEWIS_FAIL:
            print('WARNING: mmjag_update_lewis failed!')
            return
        else:
            raise e
    finally:
        mm.mmjag_terminate()

    # Loosely check that atom ordering is preserved, ignoring dummy atoms
    assert [at.atomic_number for at in st.atom if at.atomic_number > 0] == [
        at.atomic_number for at in new_st.atom if at.atomic_number > 0
    ]

    # Copy new Lewis structure onto old structure
    copy_structure_bonding(new_st, st)
    assert get_total_charge(st) == charge
    assert sum([a.formal_charge for a in st.atom]) == charge
    return parse_lewis_data(lewis_data)


[docs]def parse_lewis_data(lewis_data):
    """
    Parse the string-formatted Lewis data from mmjag into
    a data structure for easy use.

    :type lewis_data: list of strings:
    :param lewis_data: the line-by-line output of mmjag/lewis.cpp run
                       on a structure
    :rtype: list of LewisStructure namedtuples
    :return: Namedtuples containing charge, multiple bond, and score
    """

    charges, unpaired = get_charges(lewis_data)
    bonds = get_bonds(lewis_data)
    scores = get_scores(lewis_data)

    assert len(charges) == len(unpaired)
    assert len(charges) == len(bonds)
    assert len(charges) == len(scores)

    parsed_lewis_structures = []
    for c, u, b, s in zip(charges, unpaired, bonds, scores):
        new_lewis = LewisStructure(charges=c, unpaired=u, bonds=b, score=s)
        parsed_lewis_structures.append(new_lewis)

    return parsed_lewis_structures


[docs]def get_charges(lewis_output):
    """
    Parse charges from lewis code string output

    :type lewis_output: list of strings
    :param lewis_output: the line-by-line output of mmjag/lewis.cpp run
                         on a structure
    :rtype: (list of list of ChgAt namedtuples, list of bools)
    :return: For each structure in the lewis_output (the length of the list),
             list of the charged atom and their charges, whether or not structure
             contains unpaired spins
    """
    charges = []
    unpaired = []
    charge_line = [line for line in lewis_output if "charges:" in line]
    for line in charge_line:
        unpaired.append("(.)" in line)
        matches = re.findall(r'(\d+)\((-?\d+)\)', line)
        charges.append([ChgAt(int(i), int(j)) for i, j in matches])
    return charges, unpaired


[docs]def get_bonds(lewis_output):
    """
    Parse bonds from lewis code string output

    :type lewis_output: list of strings
    :param lewis_output: the line-by-line output of mmjag/lewis.cpp run
                         on a structure
    :rtype: list of list of 2-tuples of ints
    :return: For each structure in the lewis_output (the length of the list),
             list of the multiple bonds in the structure (NB: a triple bond
             is noted by being present in the list twice)
    """
    bonds = []
    bond_line = [line for line in lewis_output if "bonds:" in line]
    for line in bond_line:
        matches = re.findall(r'(\d+)-(\d+)', line)
        bonds.append([(int(i), int(j)) for i, j in matches])
    return bonds


[docs]def get_scores(lewis_output):
    """
    Parse scores from lewis code string output

    :type lewis_output: list of strings
    :param lewis_output: the line-by-line output of mmjag/lewis.cpp run
                         on a structure
    :rtype: list of floats
    :return: For each structure in the lewis_output (the length of the list),
             that structures score according to the mmjag/lewis.cpp code (see
             there for more details)
    """
    scores = []
    score_line = [line for line in lewis_output if "score = " in line]
    for line in score_line:
        matches = re.search(r'score = (\d+.\d+)', line)
        scores.append(float(matches[1]))
    return scores


[docs]def apply_lewis(st, lewis_st):
    """
    Return a copy of a structure with a Lewis structure as described
    by the LewisStructure given.

    :type st: Structure instance
    :param st: structure on which to apply Lewis structure
    :type lewis_st: LewisStructure namedtuple
    :param lewis_st: Namedtuple containing charge, multiple bond, and score
    :rtype: Structure instance
    :return: a structure with the desired Lewis structure applied
    """
    new_st = st.copy()
    #make clean copy
    for bond in new_st.bond:
        bond.order = 1
    for at in new_st.atom:
        at.formal_charge = 0

    for i, j in lewis_st.bonds:
        new_st.getBond(i, j).order += 1
    for at, chg in lewis_st.charges:
        new_st.atom[at].formal_charge = chg
    return new_st