Source code for schrodinger.application.mopac.results_main

"""
This module parses and stores the results of a MOPAC calculation.
A MopacResultsMain class is populated with data from the output files
from MOPAC_MAIN using a text parser.
"""

# Contributor: Mark A. Watson

import contextlib
import os
import re
import sys
from collections import OrderedDict
from io import StringIO
from past.utils import old_div

import numpy

import schrodinger.infra.mopac as mopaclib
from schrodinger.application.mopac import structure_launchers
from schrodinger.application.mopac.utils import convert_sparse_dict_to_list
from schrodinger.utils.fileutils import get_next_filename

MOPAC_MAIN = 'MOPAC2016'
MOPAC_HEAT_OF_FORMATION_PROP = 'r_mopac_MOPAC_Heat_of_Formation'
MOPAC_TOTAL_ENERGY_PROP = 'r_mopac_MOPAC_Total_Energy_EV'

# Memory use (in GB) for an object before a Warning is raised.
MEMORY_WARNING = 10

# Text to distinguish results from multiple jobs in a single .out file
OUTFILE_JOB_TEXT_SEPARATOR = '**                                '
OUTFILE_JOB_TEXT_SEPARATOR += f'{MOPAC_MAIN}'
OUTFILE_JOB_TEXT_SEPARATOR += '                                  **'
# Text to distinguish results from multiple jobs in a single .aux file
AUXFILE_JOB_TEXT_SEPARATOR = 'START OF MOPAC FILE'


[docs]class MopacPropertyError(Exception):
    pass


[docs]class MopacNumericalError(Exception):
    pass


[docs]class MopacBasisError1(Exception):
    pass


[docs]class MopacBasisError2(Exception):
    pass


[docs]class MopacBasisError3(Exception):
    pass


[docs]class MopacBasisError4(Exception):
    pass


[docs]class MopacBasisError5(Exception):
    pass


[docs]class MopacGradientParserError(Exception):
    pass


def _write_buffer_to_file(buff, filename):
    """
    Write a StringIO buffer to disk with <filename>.

    :type  buff: StringIO instance
    :param buff: memory buffer

    :type  filename: str
    :param filename: name of file to write
    """

    with open(filename, 'w') as fh:
        fh.write(buff.getvalue())


def _partition_files(outfile, separator, skip_first=True):
    """
    Split up <outfile> at each occurrence of line.strip() == <separator>
    and write each section as a new file.

    The leading lines up to the first instance of <separator> are discarded
    by default.

    :type  outfile: str
    :param outfile: name of file to split.

    :type  separator: str
    :param separator: pattern to split file by.

    :type  skip_first: bool
    :param skip_first: if True, skip all lines before before first separator.

    :return: list containing output file names for results that were found and
        "None"s for results that are missing
    """

    if not os.path.isfile(outfile):
        return []

    outfiles = []
    buff = StringIO()
    newname = get_next_filename(outfile, midfix='_')
    expected_index = 1

    with open(outfile, 'r') as fh:
        line = True
        while line:  # True until EOF
            # Create a StringIO buffer for each segment
            with contextlib.closing(StringIO()) as buff:
                while line:
                    # Loop over all lines in the file in buffered chunks
                    line = fh.readline()
                    if skip_first and line.strip() == separator:
                        skip_first = False
                    elif (structure_launchers.TITLE_START in line and
                          structure_launchers.TITLE_END in line):
                        regex = f'{structure_launchers.TITLE_START}(\\d+),'
                        index = int(re.search(regex, line).group(1))
                        if index > expected_index:
                            # Add a None for each index that was skipped
                            outfiles += [None] * (index - expected_index)
                        expected_index = index + 1
                    elif line.strip() == separator:
                        # Dump buffered lines to new file and reset buffer
                        _write_buffer_to_file(buff, newname)
                        outfiles.append(newname)
                        newname = get_next_filename(outfile, midfix='_')
                        break
                    # Save line in current buffer
                    buff.write(line)

                if not line:
                    # Empty out buffer at EOF
                    _write_buffer_to_file(buff, newname)
                    outfiles.append(newname)

    # We want to work with the original file when there's just one to
    # avoid unnecessary suffices etc.
    if len(outfiles) > 1:
        return outfiles
    else:
        return [outfile]


[docs]def split_outfile(outfile):
    """
    Process .out or .aux output files, splitting them up into
    subfiles if more than one output deck is found per file.

    :type  outfile: str
    :param outfile: name of output file to process

    :return: list containing output file names for results that were found and
        "None"s for results that are missing
    """

    suffix = os.path.splitext(outfile)[1]
    if suffix == '.out':
        separator = OUTFILE_JOB_TEXT_SEPARATOR
    elif suffix == '.aux':
        separator = AUXFILE_JOB_TEXT_SEPARATOR
    else:
        raise RuntimeError(f'Outfile suffix "{suffix}" not recognized.')
    outfiles = _partition_files(outfile, separator)

    return outfiles


[docs]class MopacMainTextParser(object):
    """
    Parser for MOPAC_MAIN's .out and .aux files.
    Using decorators, the callback() function below populates the "callbacks"
    dictionary in this class with key-value pairs of the form (regex,func)
    where "regex" is a regular expression to catch patterns in the output
    file, and "func" is the callback function which is triggered when the
    regex is matched.

    In this way, each specific callback is triggered by a given pattern
    such as 'HOMO LUMO ENERGIES (EV) =', and the callback function
    processes the matched lines.

    Note that multiple regex's can be associated with a given callback
    function by associating it with two or more decorators.  In this way,
    the same callback can be triggered multiple times. e.g. for both
    the .aux and .out files, perhaps for consistency checking etc.

    Typically, however, only one regex is associated with each callback
    so that data for a given property is grepped from either the .out
    file OR the .aux file, but not both.
    """

    callbacks = OrderedDict()
    callback_minimal = OrderedDict()

[docs]    def __init__(self, file_iter):
        self.file_iter = file_iter

[docs]    def parse(self):
        """
        Loop over self.file_iter and trigger the callbacks.

        :return: OrderedDict of structure properties
        """

        # The following try/except is needed for an empty file
        try:
            line = next(self.file_iter)
        except StopIteration:
            line = ''

        # Test each line in the file against the regexs
        properties = OrderedDict()
        for line in self.file_iter:
            # Loop over all patterns and gather any recognized data
            for pattern, func in self.callbacks.items():
                m = pattern.search(line)
                if m:
                    func(properties, m, self.file_iter)

        return properties

[docs]    def parse_multiple_jobs(self, minimal=False):
        """
        Loop over self.file_iter and trigger the callbacks.
        This is intended for use with output files that have results
        from multiple jobs in them (MOPAC-224).

        The optional arg <minimal> can be used to accelerate the
        parsing when only a few important (hard-coded) properties are
        required, as indicated by the @callback_minimal decorators.
        This is useful for large file and fast screening by energy.

        :type  minimal: bool
        :param minimal: parse only a minimal subset of properties

        :return: list of OrderedDict of structure properties
        """

        # The following try/except is needed for an empty file
        try:
            line = next(self.file_iter)
        except StopIteration:
            line = ''

        if minimal:
            callbacks = self.callback_minimal
        else:
            callbacks = self.callbacks

        # Test each line in the file against the regexs
        all_properties = []
        properties = OrderedDict()
        for line in self.file_iter:
            # Assume we've hit a new set of results and reinitialize results
            if line.strip() == OUTFILE_JOB_TEXT_SEPARATOR:
                all_properties.append(properties)
                properties = OrderedDict()
            # Loop over all patterns and gather any recognized data
            for pattern, func in callbacks.items():
                m = pattern.search(line)
                if m:
                    func(properties, m, self.file_iter)
        all_properties.append(properties)

        return all_properties[1:]


[docs]def fortfloat(val):
    """
    Convert a floating point number in Fortran notation to a regular float.
    """
    val = val.replace('D', 'E')
    return float(val)


[docs]def check_memory_use(nvals, name):
    """
    Check memory requirment for a long list of floats.
    Print message if size exceeds MEMORY_WARNING.

    :type nvals: int
    :type nvals: number of floats in list
    """

    # size of float
    flt = sys.getsizeof(3.141)
    # size of empty list
    overhead = sys.getsizeof([])
    # size of each reference in a list
    ref = sys.getsizeof([1.0]) - sys.getsizeof([])

    # Total predicted size in bytes of large list
    total = overhead + nvals * (flt + ref)
    total = old_div(total, 1000000000.0)

    if total > MEMORY_WARNING:
        print("WARNING: parsing %s from the MOPAC .aux file" % name)
        print("uses approximately %d GB of memory." % total)


[docs]def update_props(props, key, value, precision=5):
    """
    Update the props dictionary with the key-value pair, performing
    consistency checks if the key already exists.
    """

    if key in props:
        # Compare old and new values and raise an exception
        # unless it's an insignificant numerical difference.
        try:
            numpy.testing.assert_approx_equal(value,
                                              props[key],
                                              significant=precision)
        except AssertionError:
            print('property = ', key)
            print('value1 = ', props[key])
            print('value2 = ', value)
            msg = 'MOPAC output values differ within %d digits' % precision
            raise MopacNumericalError(msg)
        except ValueError:
            # Assume not floats, therefore must be exactly the same.
            msg = 'Error: Non-float MOPAC output values inconsistent.'
            if value != props[key]:
                print('property = ', key)
                print('value1 = ', props[key])
                print('value2 = ', value)
                raise MopacPropertyError(msg)

    else:
        props[key] = value


[docs]def callback(regex):
    """
    This decorator is just a convenient way to populate the
    callbacks dictionary in the MopacMainTextParser.  The associated
    functions themselves are not actually decorated.
    """

    def deco(func):
        regexp = re.compile(regex)
        if regexp in MopacMainTextParser.callbacks:
            raise KeyError("This regex is already being handled!")
        else:
            MopacMainTextParser.callbacks[regexp] = func
        return func

    return deco


[docs]def callback_minimal(regex):
    """
    This decorator is just a convenient way to populate the
    callbacks dictionary in the MopacMainTextParser.  The associated
    functions themselves are not actually decorated.
    """

    def deco(func):
        regexp = re.compile(regex)
        if regexp in MopacMainTextParser.callbacks:
            raise KeyError("This regex is already being handled!")
        else:
            MopacMainTextParser.callbacks[regexp] = func
            MopacMainTextParser.callback_minimal[regexp] = func
        return func

    return deco


#-----------------------------------------------------------------------------
#
# Callbacks
#
# We can supply an arbitrary number of regular expressions using
# additional decorators; e.g. this allows for parsing multiple files
# which may output the information in different formats.

SPACE = r'\s+'
# Regex for integer
INUM_REGEX = r'(\s*\d+)'
# Regex for regular decimal float
DNUM_REGEX = r'(\s*\+?\-?\d+\.\d+)'
# Regex for Fortan decimal
FNUM_REGEX = r'(\s*\+?\-?\d+\.\d+D[\+,\-]\d+)'


[docs]@callback_minimal(r'FINAL HEAT OF FORMATION\s+=' + DNUM_REGEX)
@callback_minimal(r'HEAT_OF_FORMATION:KCAL/MOL=' + FNUM_REGEX)
def heat_of_formation(props, match, it):
    key = ('mae', MOPAC_HEAT_OF_FORMATION_PROP)
    update_props(props, key, fortfloat(match.group(1)))


# in MOPAC-267, the upgrade to the newest version of MOPAC
# has a different printout. The total energy now seems to be
# only output with the ENPART keyword and has a different regex.
# Commenting out old regexes, but keeping around in case just in case
#@callback(r'TOTAL ENERGY\s+=' + DNUM_REGEX + r'\s+EV')
#@callback(r'TOTAL_ENERGY:EV=' + FNUM_REGEX)
[docs]@callback(r'ETOT \(EONE \+ ETWO\)\s+' + DNUM_REGEX + r'\s+EV')
def total_energy(props, match, it):
    key = ('mae', MOPAC_TOTAL_ENERGY_PROP)
    update_props(props, key, fortfloat(match.group(1)))


# commented out as MOPAC sometimes overwrites the fixed width
# which breaks the extraction with regex
#@callback(r'^ GRADIENTS:KCAL\/MOL\/ANGSTROM\[(\d+)\]=')
#def func(props, match, it):
#    """ Parse gradient data from .aux """
#    ndim = int(match.group(1))
#    natom = ndim//3

# extract
#    gradients = []
#    while len(gradients) < ndim:
#        line = next(it)
#        gradients.extend([float(x) for x in line.split()])

# split into XYZ
#    for idir, direction in enumerate('XYZ'):
#        grad = [gradients[3*iat + idir] for iat in range(natom)]
#        key = ('atomic_mae', 'r_mopac_GRADIENT_%s_Kcal_Angstrom' % direction)
#        update_props(props, key, grad)


[docs]@callback(r'FINAL  POINT  AND  DERIVATIVES')
def func(props, match, it):
    "Parse gradient data from .out"

    # consume the next two lines
    blank_line = next(it)
    title_line = next(it)

    # extract atom number, XY or Z, gradient and position
    # example of line we are parsing
    #       6          2  C    CARTESIAN Z     0.000000    -4.262741  KCAL/ANGSTROM
    regex = re.compile(
        r'^\s+\d+\s+' + INUM_REGEX + r'\s+[A-Z,a-z]+\s+CARTESIAN\s+([X,Y,Z])\s+' \
        + DNUM_REGEX + r'\s+' + DNUM_REGEX + r'\s+KCAL\/ANGSTROM$'
    )

    # this count keeps track of which atom we expect
    cnt = 1
    atomic_gradients = {}
    while True:
        line = next(it)
        m = regex.search(line)
        if m:
            at_no, direction, position, gradient = m.groups()

            # test that atoms are ordered
            # gradients are ordered as atom 1 x, atom 1 y, atom 1 z, atom2 x, ...
            if str((cnt - 1) // 3 + 1) != at_no:
                raise MopacGradientParserError(
                    'atom indexes in gradient are not in increasing order')

            atomic_gradients.setdefault(direction,
                                        []).append(fortfloat(gradient))
            cnt += 1

        else:
            break

    # store gradient data
    for direction in atomic_gradients:
        key = ('atomic_mae', 'r_mopac_GRADIENT_%s_Kcal_Angstrom' % direction)
        update_props(props, key, atomic_gradients[direction])


#FIXME method names printed differently in .out and .aux file by MOPAC2016
#      .out has '"METHOD" CALCULATION RESULTS', .aux has 'METHOD="METHOD"'
#@callback_minimal(r'The\s+(.*)\s+Hamiltonian')
[docs]@callback_minimal(r'                              (.*) CALCULATION')
#@callback_minimal(r'METHOD=(\S+)')
def method(props, match, it):
    key = ('mae', 's_mopac_SemiEmpirical_Method')
    update_props(props, key, match.group(1))
    update_props(props, 'method', match.group(1))


[docs]@callback(r'IONIZATION_POTENTIAL:EV=' + FNUM_REGEX)
def ionization_potential(props, match, it):
    key = ('mae', 'r_mopac_Ionization_Energy')
    update_props(props, key, fortfloat(match.group(1)))


[docs]@callback(r'HOMO LUMO ENERGIES \(EV\) =' + DNUM_REGEX + SPACE + DNUM_REGEX)
def homo_lumo_energy(props, match, it):
    key = ('mae', 'r_mopac_HOMO_Energy')
    update_props(props, key, float(match.group(1)))
    key = ('mae', 'r_mopac_LUMO_Energy')
    update_props(props, key, float(match.group(2)))


[docs]@callback(r'ALPHA SOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX)
def alpha_somo_lumo_energy(props, match, it):
    key = ('mae', 'r_mopac_Alpha_SOMO_Energy')
    update_props(props, key, float(match.group(1)))
    key = ('mae', 'r_mopac_Alpha_LUMO_Energy')
    update_props(props, key, float(match.group(2)))


[docs]@callback(r'ALPHA HOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX)
def alpa_homo_lumo_energy(props, match, it):
    key = ('mae', 'r_mopac_Alpha_HOMO_Energy')
    update_props(props, key, float(match.group(1)))
    key = ('mae', 'r_mopac_Alpha_LUMO_Energy')
    update_props(props, key, float(match.group(2)))


[docs]@callback(r'BETA\s+SOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX)
def beta_somo_lumo_energy(props, match, it):
    key = ('mae', 'r_mopac_Beta_SOMO_Energy')
    update_props(props, key, float(match.group(1)))
    key = ('mae', 'r_mopac_Beta_LUMO_Energy')
    update_props(props, key, float(match.group(2)))


[docs]@callback(r'BETA\s+HOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX)
def beta_homo_lumo_energy(props, match, it):
    key = ('mae', 'r_mopac_Beta_HOMO_Energy')
    update_props(props, key, float(match.group(1)))
    key = ('mae', 'r_mopac_Beta_LUMO_Energy')
    update_props(props, key, float(match.group(2)))


[docs]@callback(r'Parr & Pople absolute hardness:' + DNUM_REGEX)
def parr_and_pople(props, match, it):
    key = ('mae', 'r_mopac_Parr_Pople_Hardness')
    update_props(props, key, float(match.group(1)))
    # According to MOPAC author James Stewart, these are the same.
    key = ('mae', 'r_mopac_Molecular_Hardness')
    update_props(props, key, float(match.group(1)))


[docs]@callback(r'Mulliken electronegativity:' + DNUM_REGEX)
def mulliken_electronegativity(props, match, it):
    key = ('mae', 'r_mopac_Mulliken_Electronegativity')
    update_props(props, key, float(match.group(1)))
    # According to MOPAC author James Stewart, these are the same
    # except for a sign change.
    key = ('mae', 'r_mopac_Molecular_Electronegativity')
    update_props(props, key, -float(match.group(1)))


[docs]@callback(r'DIPOLE:DEBYE=' + FNUM_REGEX)
def dipole(props, match, it):
    key = ('mae', 'r_mopac_Dipole')
    update_props(props, key, fortfloat(match.group(1)))


[docs]@callback(r'DIP_VEC:DEBYE\[3\]=' + FNUM_REGEX + SPACE + FNUM_REGEX + SPACE +
          FNUM_REGEX)
def dipole_xyz(props, match, it):
    key = ('mae', 'r_mopac_Dipole_X')
    update_props(props, key, fortfloat(match.group(1)))
    key = ('mae', 'r_mopac_Dipole_Y')
    update_props(props, key, fortfloat(match.group(2)))
    key = ('mae', 'r_mopac_Dipole_Z')
    update_props(props, key, fortfloat(match.group(3)))


[docs]@callback(r'DIPOLE MOMENT EVALUATED FROM THE POINT CHARGES')
def dipole_esp(props, match, it):
    regex = re.compile(DNUM_REGEX + SPACE + DNUM_REGEX + SPACE + DNUM_REGEX +
                       SPACE + DNUM_REGEX)
    while True:
        line = next(it)
        m = regex.search(line)
        if m:
            key = ('mae', 'r_mopac_Dipole_ESP_X')
            update_props(props, key, float(m.group(1)))
            key = ('mae', 'r_mopac_Dipole_ESP_Y')
            update_props(props, key, float(m.group(2)))
            key = ('mae', 'r_mopac_Dipole_ESP_Z')
            update_props(props, key, float(m.group(3)))
            key = ('mae', 'r_mopac_Dipole_ESP')
            update_props(props, key, float(m.group(4)))
            break


#
# Atomic properties
#
[docs]@callback(r'ATOM_X:ANGSTROMS\[(\d+)\]=')
def atom_x_angstroms(props, match, it):
    natoms = old_div(int(match.group(1)), 3)
    coords = []
    for i in range(natoms):
        line = next(it)
        coords.append([float(x) for x in line.split()])
    key = 'initial_geometry'
    update_props(props, key, coords)


[docs]@callback(r'ATOM_X_OPT:ANGSTROMS\[(\d+)\]=')
def atom_x_opt(props, match, it):
    natoms = old_div(int(match.group(1)), 3)
    coords = []
    for i in range(natoms):
        line = next(it)
        coords.append([float(x) for x in line.split()])
    key = 'final_geometry'
    update_props(props, key, coords)


[docs]@callback(r'^ ATOM_CHARGES\[(\d+)\]=')
def atom_charges(props, match, it):
    natoms = int(match.group(1))
    charges = []
    while len(charges) < natoms:
        line = next(it)
        charges.extend([float(x) for x in line.split()])
    key = ('atomic_mae', 'r_mopac_NDDO_Charge')
    update_props(props, key, charges)


[docs]@callback(r'^ MULLIKEN_ATOM_CHARGES\[(\d+)\]=')
def mulliken_charge(props, match, it):
    natoms = int(match.group(1))
    charges = []
    while len(charges) < natoms:
        line = next(it)
        charges.extend([float(x) for x in line.split()])
    key = ('atomic_mae', 'r_mopac_Mulliken_Charge')
    update_props(props, key, charges)


[docs]@callback(r'^ ELECTOSTATIC_POTENTIAL_CHARGES\[(\d+)\]=')
def esp_charge(props, match, it):
    natoms = int(match.group(1))
    charges = []
    while len(charges) < natoms:
        line = next(it)
        charges.extend([float(x) for x in line.split()])
    key = ('atomic_mae', 'r_mopac_ESP_Charge')
    update_props(props, key, charges)


[docs]@callback(r'a\s+n\s+Dn\(r\)\s+De\(r\)\s+q\(r\) - Z\(r\)')
def total_nucleophilic_superdelocalizability(props, match, it):
    regex1 = re.compile(r'\S+' + INUM_REGEX + SPACE + DNUM_REGEX + SPACE +
                        DNUM_REGEX + SPACE + DNUM_REGEX)
    regex2 = re.compile(r'Total:' + DNUM_REGEX + SPACE + DNUM_REGEX)
    Dn = OrderedDict()
    De = OrderedDict()
    while True:
        line = next(it)
        m1 = regex1.search(line)
        m2 = regex2.search(line)
        if m1:
            Dn[int(m1.group(1))] = float(m1.group(2))
            De[int(m1.group(1))] = float(m1.group(3))
        elif m2:
            key = ('mae', 'r_mopac_Total_Nucleophilic_Superdelocalizability')
            update_props(props, key, float(m2.group(1)))
            key = ('mae', 'r_mopac_Total_Electrophilic_Superdelocalizability')
            update_props(props, key, float(m2.group(2)))
            break
    key = ('atomic_mae', 'r_mopac_Nucleophilic_Superdelocalizability')
    Dn = convert_sparse_dict_to_list(Dn)
    update_props(props, key, Dn)
    key = ('atomic_mae', 'r_mopac_Electrophilic_Superdelocalizability')
    De = convert_sparse_dict_to_list(De)
    update_props(props, key, De)


[docs]@callback(r'a\s+n\s+piS\(r\)')
def self_polarizability(props, match, it):
    regex1 = re.compile(r'\S+' + INUM_REGEX + SPACE + DNUM_REGEX)
    regex2 = re.compile(r'Total:' + DNUM_REGEX)
    piS = OrderedDict()
    while True:
        line = next(it)
        m1 = regex1.search(line)
        m2 = regex2.search(line)
        if m1:
            piS[int(m1.group(1))] = float(m1.group(2))
        elif m2:
            key = ('mae', 'r_mopac_Total_Atom_Self_Polarizability')
            update_props(props, key, float(m2.group(1)))
            break
    key = ('atomic_mae', 'r_mopac_Atom_Self_Polarizability')
    piS = convert_sparse_dict_to_list(piS)
    update_props(props, key, piS)


#
# Stoichiometry and charge
#
[docs]@callback(r'^ ATOM_EL\[(\d+)\]=')
def atom_el(props, match, it):
    natoms = int(match.group(1))
    update_props(props, 'natoms', natoms)
    update_props(props, 'numat', natoms)
    #FIXME when is numat and natoms different? Real atoms, dummy atoms??


[docs]@callback(r'^ NUM_ELECTRONS=(\d+)')
def num_electrons(props, match, it):
    nelecs = int(match.group(1))
    key = 'nelecs'
    update_props(props, key, nelecs)


#
# Basis function data
#
[docs]@callback(r'^ AO_ATOMINDEX\[(\d+)\]=')
def ao_index(props, match, it):
    norbs = int(match.group(1))
    update_props(props, 'norbs', norbs)
    ao_atomindex = []
    while len(ao_atomindex) < norbs:
        line = next(it)
        ao_atomindex.extend([int(x) for x in line.split()])
    update_props(props, 'ao_atomindex', ao_atomindex)


[docs]@callback(r'^ ATOM_SYMTYPE\[(\d+)\]=')
def atom_symtype(props, match, it):
    norbs = int(match.group(1))
    ao_syms = []
    while len(ao_syms) < norbs:
        line = next(it)
        ao_syms.extend(line.split())
    update_props(props, 'ao_syms', ao_syms)


[docs]@callback(r'^ AO_ZETA\[(\d+)\]=')
def ao_zeta(props, match, it):
    norbs = int(match.group(1))
    ao_zeta = []
    while len(ao_zeta) < norbs:
        line = next(it)
        ao_zeta.extend([float(x) for x in line.split()])
    update_props(props, 'ao_zeta', ao_zeta)


[docs]@callback(r'^ ATOM_PQN\[(\d+)\]=')
def atom_pqn(props, match, it):
    norbs = int(match.group(1))
    atom_pqn = []
    while len(atom_pqn) < norbs:
        line = next(it)
        atom_pqn.extend([int(x) for x in line.split()])
    update_props(props, 'atom_pqn', atom_pqn)


#
# Orbital data
#
[docs]@callback(r'^ EIGENVALUES\[(\d+)\]=')
def eigenvalues(props, match, it):
    norbs = int(match.group(1))
    eigs = []
    while len(eigs) < norbs:
        line = next(it)
        eigs.extend([float(x) for x in line.split()])
    update_props(props, 'eigenvalues', eigs)


[docs]@callback(r'^ MOLECULAR_ORBITAL_OCCUPANCIES\[(\d+)\]=')
def mo_occupancies(props, match, it):
    norbs = int(match.group(1))
    mo_occs = []
    while len(mo_occs) < norbs:
        line = next(it)
        if not line.strip().startswith('#'):
            mo_occs.extend([float(x) for x in line.split()])
    update_props(props, 'mo_occs', mo_occs)


#
#FIXME these quantities are maybe too big to load into core by
#      default?  This maybe should be optimized?
#
[docs]@callback(r'^ OVERLAP_MATRIX\[(\d+)\]=')
def overlap_matrix(props, match, it):
    nvals = int(match.group(1))
    check_memory_use(nvals, 'overlap_matrix')
    line = next(it)  # skip line printing "Lower half triangle only"
    overlap = []
    while len(overlap) < nvals:
        line = next(it)
        overlap.extend([float(x) for x in line.split()])
    update_props(props, 'overlap_matrix', overlap)


[docs]@callback(r'^ EIGENVECTORS\[(\d+)\]=')
def eigenvectors(props, match, it):
    nvals = int(match.group(1))
    check_memory_use(nvals, 'eigenvectors')
    evecs = []
    while len(evecs) < nvals:
        line = next(it)
        evecs.extend([float(x) for x in line.split()])
    update_props(props, 'eigenvectors', evecs)


[docs]class MopacResultsMain():
    """
    A class to parse and store the results of a calculation from MOPAC_MAIN
    """

[docs]    def __init__(self, outfile, auxfile=None, struct=None):
        """
        :type  outfile: str
        :param outfile: name of .out file generated by MOPAC binary

        :type  auxfile: str
        :param auxfile: name of .aux file generated by MOPAC binary

        :type  struct: structure.Structure
        :param struct: structure instance to populate with data
        """

        self.properties = OrderedDict()
        self._structure = struct
        self._method = None
        self._status = False
        self._name = None
        self._output_file = outfile

        # Note we prioritize data from the .aux file ahead of the .out file
        if auxfile is not None:
            self._populate_from_file(auxfile)
        self._populate_from_file(outfile)
        self._name, suffix = os.path.splitext(outfile)
        if struct is not None:
            self._populate_structure()

    @property
    def name(self):
        return self._name

    @property
    def structure(self):
        return self._structure

    @property
    def method(self):
        return self._method

    @property
    def output_file(self):
        return self._output_file

    @property
    def statusOk(self):
        return self._status

    def _populate_from_file(self, filename):
        """
        Parse "filename" containing MOPAC_MAIN's versions's output data.
        """

        if os.path.exists(filename):
            with open(filename, 'r') as fh:
                parser = MopacMainTextParser(fh)

                # only update properties we don't have yet
                properties = parser.parse()
                for k in properties:
                    if k not in self.properties:
                        self.properties[k] = properties[k]

            if filename.endswith('.out'):
                self._output_file = filename
                self._status = self._verify_output_file()

        else:
            print('Error: file %s to parse does not exist.\n' % filename)
            print(f"\nWARNING: {MOPAC_MAIN} did not generate file\n%s" %
                  filename)
            print("Output results may be incomplete.\n")
            self._status = False

    def _verify_output_file(self):
        """
        Look for error messages and print tail of output file if any errors found.

        :return True if no errors found, otherwise False.
        """

        outfile = self._output_file
        buff = StringIO()
        ok = False
        if os.path.isfile(outfile):
            ok = True
            with open(outfile, 'rb') as fh:
                collect = False
                for line in fh:
                    text = line.decode('utf8')
                    if 'errors detected' in text.lower():
                        ok = False
                    if 'error and normal termination messages reported' in text.lower(
                    ):
                        ok = False
                    if 'JOB ENDED' in text:
                        collect = True
                    if collect:
                        buff.write(text)

                if not ok:
                    print("\nTail of outfile showing possible errors:\n")
                    print(buff.getvalue())
                    print()
        return ok

    def _populate_structure(self):
        """
        Compile parsed results into the structure.properties dictionary.
        """

        self.properties['atomic_number'] = []
        for at in self._structure.atom:
            self.properties['atomic_number'].append(at.atomic_number)

        for key, val in self.properties.items():
            if key == 'initial_geometry':
                xyz = self._structure.getXYZ(copy=False)
                if not numpy.allclose(numpy.array(val), xyz, atol=1e-4):
                    print('MOPAC initial geometry =\n', numpy.array(val))
                    print('.mae  initial geometry =\n', xyz)
                    msg = 'Initial geometries in MOPAC and input .mae ' \
                          'file are inconsistent'
                    raise ValueError(msg)
            elif key == 'final_geometry':
                self._structure.setXYZ(numpy.array(val))
            elif key[0] == 'atomic_mae':
                # Atomic Maestro property
                name = key[1]
                for atom, val2 in zip(self._structure.atom, val):
                    if val2 is not None:
                        atom.property[name] = val2
            elif key[0] == 'mae':
                # Molecular Maestro property
                name = key[1]
                self._structure.property[name] = val

    def _get_basis_functions(self):
        """
        Get basis function exponents for each atom
        Note 0 implies no such basis function on that atom.

        :rtype: ( list, list, list )
        :return:  zs = s-function exponents indexed by atom
                  zp = p-function exponents indexed by atom
                  zd = d-function exponents indexed by atom
        """
        # unittested

        # Initialize s,p,d exponents by atom.
        numat = self.properties['numat']
        zs = [0.0] * numat
        zpx = [0.0] * numat
        zpy = [0.0] * numat
        zpz = [0.0] * numat
        zdx2 = [0.0] * numat
        zdxz = [0.0] * numat
        zdz2 = [0.0] * numat
        zdyz = [0.0] * numat
        zdxy = [0.0] * numat

        # Process data parsed from MOPAC output files.
        ao_atomindex = self.properties['ao_atomindex']
        ao_zeta = self.properties['ao_zeta']
        ao_syms = self.properties['ao_syms']

        if len(ao_zeta) != len(ao_syms):
            raise MopacBasisError1(f"{MOPAC_MAIN} basis not read in correctly.")
        if len(ao_zeta) != len(ao_atomindex):
            raise MopacBasisError2(f"{MOPAC_MAIN} basis not read in correctly.")
        if max(ao_atomindex) != numat:
            raise MopacBasisError3(f"{MOPAC_MAIN} basis not read in correctly.")

        for i in range(len(ao_atomindex)):
            iatom = ao_atomindex[i] - 1
            # s functions
            if ao_syms[i] == 'S':
                zs[iatom] = ao_zeta[i]
            # p functions
            elif ao_syms[i] == 'PX':
                zpx[iatom] = ao_zeta[i]
            elif ao_syms[i] == 'PY':
                zpy[iatom] = ao_zeta[i]
            elif ao_syms[i] == 'PZ':
                zpz[iatom] = ao_zeta[i]
            # d functions
            elif ao_syms[i] == 'X2':
                zdx2[iatom] = ao_zeta[i]
            elif ao_syms[i] == 'XZ':
                zdxz[iatom] = ao_zeta[i]
            elif ao_syms[i] == 'Z2':
                zdz2[iatom] = ao_zeta[i]
            elif ao_syms[i] == 'YZ':
                zdyz[iatom] = ao_zeta[i]
            elif ao_syms[i] == 'XY':
                zdxy[iatom] = ao_zeta[i]
            else:
                raise MopacBasisError4('Unsupported basis function ang. mom.')

        # Assume all Px,Py,Pz components are the same
        zp = zpx
        if zpx != zpy:
            raise MopacBasisError5('Px, Py not equivalent!')
        if zpx != zpz:
            raise MopacBasisError5('Px, Pz not equivalent!')

        # Assume all D-function components are the same
        zd = zdx2
        if zdx2 != zdxz:
            raise MopacBasisError5('Dx2 and Dxz components not equivalent!')
        if zdx2 != zdz2:
            raise MopacBasisError5('Dx2 and Dz2 components not equivalent!')
        if zdx2 != zdyz:
            raise MopacBasisError5('Dx2 and Dyz components not equivalent!')
        if zdx2 != zdxy:
            raise MopacBasisError5('Dx2 and Dxy components not equivalent!')

        return zs, zp, zd

[docs]    def write_vis_files(self, nplot, gridres, gridext):
        """
        This function will call the Fortran routines to generate the
        3D data and .vis files for plotting surfaces.

        :type nplot: int
        :param nplot: number of MOs to plot around HOMO/LUMO gap.
        """

        # from .aux file
        nelecs = self.properties['nelecs']
        numat = self.properties['numat']
        natoms = self.properties['natoms']
        #FIXME when is numat and natoms different?

        # coord = [ [x,y,z], [x,y,z], [x,y,z], ... ]
        coord = self.properties['final_geometry']

        # nat = atomic numbers of real atoms in system
        nat = [at.atomic_number for at in self.structure.atom]

        # natorb = number of orbitals on each atom
        ao_atomindex = self.properties['ao_atomindex']
        natorb = [ao_atomindex.count(i + 1) for i in range(numat)]
        # norbs = total number of orbitals
        norbs = self.properties['norbs']

        # basis function exponents
        zs, zp, zd = self._get_basis_functions()

        # number of doubly-occupied MOs
        nclose = 0
        for occ in self.properties['mo_occs']:
            if occ == 2.0:
                nclose = nclose + 1

        # eigs = [ e-value for MO1, e-value for MO2, ... ]
        eigs = self.properties['eigenvalues']

        # overlap = overlap matrix in upper-triangular packed form
        overlap = self.properties['overlap_matrix']

        # evecs = [ [MO1], [MO2], [MO3], ... ]
        evecs = []
        for i in range(norbs):
            mo = []
            for j in range(norbs):
                mo.append(self.properties['eigenvectors'][j * norbs + i])
            evecs.append(mo)

        # for diagnostics, create pqn array from output data
        pqn = self.properties['atom_pqn']

        mopaclib.surfaces.plot_3d_data(self.name, nplot, gridres, gridext,
                                       nelecs, numat, natoms, coord, nat, norbs,
                                       nclose, natorb, overlap, eigs, evecs, zs,
                                       zp, zd, pqn)