Source code for schrodinger.application.mopac.results_main

"""
This module parses and stores the results of a MOPAC calculation.
A MopacResultsMain class is populated with data from the output files
from MOPAC_MAIN using a text parser.
"""

# Contributor: Mark A. Watson

import contextlib
import os
import re
import sys
from collections import OrderedDict
from io import StringIO
from past.utils import old_div

import numpy

import schrodinger.infra.mopac as mopaclib
from schrodinger.application.mopac import structure_launchers
from schrodinger.application.mopac.utils import convert_sparse_dict_to_list
from schrodinger.utils.fileutils import get_next_filename

MOPAC_MAIN = 'MOPAC2016'
MOPAC_HEAT_OF_FORMATION_PROP = 'r_mopac_MOPAC_Heat_of_Formation'
MOPAC_TOTAL_ENERGY_PROP = 'r_mopac_MOPAC_Total_Energy_EV'

# Memory use (in GB) for an object before a Warning is raised.
MEMORY_WARNING = 10

# Text to distinguish results from multiple jobs in a single .out file
OUTFILE_JOB_TEXT_SEPARATOR = '**                                '
OUTFILE_JOB_TEXT_SEPARATOR += f'{MOPAC_MAIN}'
OUTFILE_JOB_TEXT_SEPARATOR += '                                  **'
# Text to distinguish results from multiple jobs in a single .aux file
AUXFILE_JOB_TEXT_SEPARATOR = 'START OF MOPAC FILE'


[docs]class MopacPropertyError(Exception): pass
[docs]class MopacNumericalError(Exception): pass
[docs]class MopacBasisError1(Exception): pass
[docs]class MopacBasisError2(Exception): pass
[docs]class MopacBasisError3(Exception): pass
[docs]class MopacBasisError4(Exception): pass
[docs]class MopacBasisError5(Exception): pass
[docs]class MopacGradientParserError(Exception): pass
def _write_buffer_to_file(buff, filename): """ Write a StringIO buffer to disk with <filename>. :type buff: StringIO instance :param buff: memory buffer :type filename: str :param filename: name of file to write """ with open(filename, 'w') as fh: fh.write(buff.getvalue()) def _partition_files(outfile, separator, skip_first=True): """ Split up <outfile> at each occurrence of line.strip() == <separator> and write each section as a new file. The leading lines up to the first instance of <separator> are discarded by default. :type outfile: str :param outfile: name of file to split. :type separator: str :param separator: pattern to split file by. :type skip_first: bool :param skip_first: if True, skip all lines before before first separator. :return: list containing output file names for results that were found and "None"s for results that are missing """ if not os.path.isfile(outfile): return [] outfiles = [] buff = StringIO() newname = get_next_filename(outfile, midfix='_') expected_index = 1 with open(outfile, 'r') as fh: line = True while line: # True until EOF # Create a StringIO buffer for each segment with contextlib.closing(StringIO()) as buff: while line: # Loop over all lines in the file in buffered chunks line = fh.readline() if skip_first and line.strip() == separator: skip_first = False elif (structure_launchers.TITLE_START in line and structure_launchers.TITLE_END in line): regex = f'{structure_launchers.TITLE_START}(\\d+),' index = int(re.search(regex, line).group(1)) if index > expected_index: # Add a None for each index that was skipped outfiles += [None] * (index - expected_index) expected_index = index + 1 elif line.strip() == separator: # Dump buffered lines to new file and reset buffer _write_buffer_to_file(buff, newname) outfiles.append(newname) newname = get_next_filename(outfile, midfix='_') break # Save line in current buffer buff.write(line) if not line: # Empty out buffer at EOF _write_buffer_to_file(buff, newname) outfiles.append(newname) # We want to work with the original file when there's just one to # avoid unnecessary suffices etc. if len(outfiles) > 1: return outfiles else: return [outfile]
[docs]def split_outfile(outfile): """ Process .out or .aux output files, splitting them up into subfiles if more than one output deck is found per file. :type outfile: str :param outfile: name of output file to process :return: list containing output file names for results that were found and "None"s for results that are missing """ suffix = os.path.splitext(outfile)[1] if suffix == '.out': separator = OUTFILE_JOB_TEXT_SEPARATOR elif suffix == '.aux': separator = AUXFILE_JOB_TEXT_SEPARATOR else: raise RuntimeError(f'Outfile suffix "{suffix}" not recognized.') outfiles = _partition_files(outfile, separator) return outfiles
[docs]class MopacMainTextParser(object): """ Parser for MOPAC_MAIN's .out and .aux files. Using decorators, the callback() function below populates the "callbacks" dictionary in this class with key-value pairs of the form (regex,func) where "regex" is a regular expression to catch patterns in the output file, and "func" is the callback function which is triggered when the regex is matched. In this way, each specific callback is triggered by a given pattern such as 'HOMO LUMO ENERGIES (EV) =', and the callback function processes the matched lines. Note that multiple regex's can be associated with a given callback function by associating it with two or more decorators. In this way, the same callback can be triggered multiple times. e.g. for both the .aux and .out files, perhaps for consistency checking etc. Typically, however, only one regex is associated with each callback so that data for a given property is grepped from either the .out file OR the .aux file, but not both. """ callbacks = OrderedDict() callback_minimal = OrderedDict()
[docs] def __init__(self, file_iter): self.file_iter = file_iter
[docs] def parse(self): """ Loop over self.file_iter and trigger the callbacks. :return: OrderedDict of structure properties """ # The following try/except is needed for an empty file try: line = next(self.file_iter) except StopIteration: line = '' # Test each line in the file against the regexs properties = OrderedDict() for line in self.file_iter: # Loop over all patterns and gather any recognized data for pattern, func in self.callbacks.items(): m = pattern.search(line) if m: func(properties, m, self.file_iter) return properties
[docs] def parse_multiple_jobs(self, minimal=False): """ Loop over self.file_iter and trigger the callbacks. This is intended for use with output files that have results from multiple jobs in them (MOPAC-224). The optional arg <minimal> can be used to accelerate the parsing when only a few important (hard-coded) properties are required, as indicated by the @callback_minimal decorators. This is useful for large file and fast screening by energy. :type minimal: bool :param minimal: parse only a minimal subset of properties :return: list of OrderedDict of structure properties """ # The following try/except is needed for an empty file try: line = next(self.file_iter) except StopIteration: line = '' if minimal: callbacks = self.callback_minimal else: callbacks = self.callbacks # Test each line in the file against the regexs all_properties = [] properties = OrderedDict() for line in self.file_iter: # Assume we've hit a new set of results and reinitialize results if line.strip() == OUTFILE_JOB_TEXT_SEPARATOR: all_properties.append(properties) properties = OrderedDict() # Loop over all patterns and gather any recognized data for pattern, func in callbacks.items(): m = pattern.search(line) if m: func(properties, m, self.file_iter) all_properties.append(properties) return all_properties[1:]
[docs]def fortfloat(val): """ Convert a floating point number in Fortran notation to a regular float. """ val = val.replace('D', 'E') return float(val)
[docs]def check_memory_use(nvals, name): """ Check memory requirment for a long list of floats. Print message if size exceeds MEMORY_WARNING. :type nvals: int :type nvals: number of floats in list """ # size of float flt = sys.getsizeof(3.141) # size of empty list overhead = sys.getsizeof([]) # size of each reference in a list ref = sys.getsizeof([1.0]) - sys.getsizeof([]) # Total predicted size in bytes of large list total = overhead + nvals * (flt + ref) total = old_div(total, 1000000000.0) if total > MEMORY_WARNING: print("WARNING: parsing %s from the MOPAC .aux file" % name) print("uses approximately %d GB of memory." % total)
[docs]def update_props(props, key, value, precision=5): """ Update the props dictionary with the key-value pair, performing consistency checks if the key already exists. """ if key in props: # Compare old and new values and raise an exception # unless it's an insignificant numerical difference. try: numpy.testing.assert_approx_equal(value, props[key], significant=precision) except AssertionError: print('property = ', key) print('value1 = ', props[key]) print('value2 = ', value) msg = 'MOPAC output values differ within %d digits' % precision raise MopacNumericalError(msg) except ValueError: # Assume not floats, therefore must be exactly the same. msg = 'Error: Non-float MOPAC output values inconsistent.' if value != props[key]: print('property = ', key) print('value1 = ', props[key]) print('value2 = ', value) raise MopacPropertyError(msg) else: props[key] = value
[docs]def callback(regex): """ This decorator is just a convenient way to populate the callbacks dictionary in the MopacMainTextParser. The associated functions themselves are not actually decorated. """ def deco(func): regexp = re.compile(regex) if regexp in MopacMainTextParser.callbacks: raise KeyError("This regex is already being handled!") else: MopacMainTextParser.callbacks[regexp] = func return func return deco
[docs]def callback_minimal(regex): """ This decorator is just a convenient way to populate the callbacks dictionary in the MopacMainTextParser. The associated functions themselves are not actually decorated. """ def deco(func): regexp = re.compile(regex) if regexp in MopacMainTextParser.callbacks: raise KeyError("This regex is already being handled!") else: MopacMainTextParser.callbacks[regexp] = func MopacMainTextParser.callback_minimal[regexp] = func return func return deco
#----------------------------------------------------------------------------- # # Callbacks # # We can supply an arbitrary number of regular expressions using # additional decorators; e.g. this allows for parsing multiple files # which may output the information in different formats. SPACE = r'\s+' # Regex for integer INUM_REGEX = r'(\s*\d+)' # Regex for regular decimal float DNUM_REGEX = r'(\s*\+?\-?\d+\.\d+)' # Regex for Fortan decimal FNUM_REGEX = r'(\s*\+?\-?\d+\.\d+D[\+,\-]\d+)'
[docs]@callback_minimal(r'FINAL HEAT OF FORMATION\s+=' + DNUM_REGEX) @callback_minimal(r'HEAT_OF_FORMATION:KCAL/MOL=' + FNUM_REGEX) def heat_of_formation(props, match, it): key = ('mae', MOPAC_HEAT_OF_FORMATION_PROP) update_props(props, key, fortfloat(match.group(1)))
# in MOPAC-267, the upgrade to the newest version of MOPAC # has a different printout. The total energy now seems to be # only output with the ENPART keyword and has a different regex. # Commenting out old regexes, but keeping around in case just in case #@callback(r'TOTAL ENERGY\s+=' + DNUM_REGEX + r'\s+EV') #@callback(r'TOTAL_ENERGY:EV=' + FNUM_REGEX)
[docs]@callback(r'ETOT \(EONE \+ ETWO\)\s+' + DNUM_REGEX + r'\s+EV') def total_energy(props, match, it): key = ('mae', MOPAC_TOTAL_ENERGY_PROP) update_props(props, key, fortfloat(match.group(1)))
# commented out as MOPAC sometimes overwrites the fixed width # which breaks the extraction with regex #@callback(r'^ GRADIENTS:KCAL\/MOL\/ANGSTROM\[(\d+)\]=') #def func(props, match, it): # """ Parse gradient data from .aux """ # ndim = int(match.group(1)) # natom = ndim//3 # extract # gradients = [] # while len(gradients) < ndim: # line = next(it) # gradients.extend([float(x) for x in line.split()]) # split into XYZ # for idir, direction in enumerate('XYZ'): # grad = [gradients[3*iat + idir] for iat in range(natom)] # key = ('atomic_mae', 'r_mopac_GRADIENT_%s_Kcal_Angstrom' % direction) # update_props(props, key, grad)
[docs]@callback(r'FINAL POINT AND DERIVATIVES') def func(props, match, it): "Parse gradient data from .out" # consume the next two lines blank_line = next(it) title_line = next(it) # extract atom number, XY or Z, gradient and position # example of line we are parsing # 6 2 C CARTESIAN Z 0.000000 -4.262741 KCAL/ANGSTROM regex = re.compile( r'^\s+\d+\s+' + INUM_REGEX + r'\s+[A-Z,a-z]+\s+CARTESIAN\s+([X,Y,Z])\s+' \ + DNUM_REGEX + r'\s+' + DNUM_REGEX + r'\s+KCAL\/ANGSTROM$' ) # this count keeps track of which atom we expect cnt = 1 atomic_gradients = {} while True: line = next(it) m = regex.search(line) if m: at_no, direction, position, gradient = m.groups() # test that atoms are ordered # gradients are ordered as atom 1 x, atom 1 y, atom 1 z, atom2 x, ... if str((cnt - 1) // 3 + 1) != at_no: raise MopacGradientParserError( 'atom indexes in gradient are not in increasing order') atomic_gradients.setdefault(direction, []).append(fortfloat(gradient)) cnt += 1 else: break # store gradient data for direction in atomic_gradients: key = ('atomic_mae', 'r_mopac_GRADIENT_%s_Kcal_Angstrom' % direction) update_props(props, key, atomic_gradients[direction])
#FIXME method names printed differently in .out and .aux file by MOPAC2016 # .out has '"METHOD" CALCULATION RESULTS', .aux has 'METHOD="METHOD"' #@callback_minimal(r'The\s+(.*)\s+Hamiltonian')
[docs]@callback_minimal(r' (.*) CALCULATION') #@callback_minimal(r'METHOD=(\S+)') def method(props, match, it): key = ('mae', 's_mopac_SemiEmpirical_Method') update_props(props, key, match.group(1)) update_props(props, 'method', match.group(1))
[docs]@callback(r'IONIZATION_POTENTIAL:EV=' + FNUM_REGEX) def ionization_potential(props, match, it): key = ('mae', 'r_mopac_Ionization_Energy') update_props(props, key, fortfloat(match.group(1)))
[docs]@callback(r'HOMO LUMO ENERGIES \(EV\) =' + DNUM_REGEX + SPACE + DNUM_REGEX) def homo_lumo_energy(props, match, it): key = ('mae', 'r_mopac_HOMO_Energy') update_props(props, key, float(match.group(1))) key = ('mae', 'r_mopac_LUMO_Energy') update_props(props, key, float(match.group(2)))
[docs]@callback(r'ALPHA SOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX) def alpha_somo_lumo_energy(props, match, it): key = ('mae', 'r_mopac_Alpha_SOMO_Energy') update_props(props, key, float(match.group(1))) key = ('mae', 'r_mopac_Alpha_LUMO_Energy') update_props(props, key, float(match.group(2)))
[docs]@callback(r'ALPHA HOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX) def alpa_homo_lumo_energy(props, match, it): key = ('mae', 'r_mopac_Alpha_HOMO_Energy') update_props(props, key, float(match.group(1))) key = ('mae', 'r_mopac_Alpha_LUMO_Energy') update_props(props, key, float(match.group(2)))
[docs]@callback(r'BETA\s+SOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX) def beta_somo_lumo_energy(props, match, it): key = ('mae', 'r_mopac_Beta_SOMO_Energy') update_props(props, key, float(match.group(1))) key = ('mae', 'r_mopac_Beta_LUMO_Energy') update_props(props, key, float(match.group(2)))
[docs]@callback(r'BETA\s+HOMO LUMO \(EV\)\s+=' + DNUM_REGEX + SPACE + DNUM_REGEX) def beta_homo_lumo_energy(props, match, it): key = ('mae', 'r_mopac_Beta_HOMO_Energy') update_props(props, key, float(match.group(1))) key = ('mae', 'r_mopac_Beta_LUMO_Energy') update_props(props, key, float(match.group(2)))
[docs]@callback(r'Parr & Pople absolute hardness:' + DNUM_REGEX) def parr_and_pople(props, match, it): key = ('mae', 'r_mopac_Parr_Pople_Hardness') update_props(props, key, float(match.group(1))) # According to MOPAC author James Stewart, these are the same. key = ('mae', 'r_mopac_Molecular_Hardness') update_props(props, key, float(match.group(1)))
[docs]@callback(r'Mulliken electronegativity:' + DNUM_REGEX) def mulliken_electronegativity(props, match, it): key = ('mae', 'r_mopac_Mulliken_Electronegativity') update_props(props, key, float(match.group(1))) # According to MOPAC author James Stewart, these are the same # except for a sign change. key = ('mae', 'r_mopac_Molecular_Electronegativity') update_props(props, key, -float(match.group(1)))
[docs]@callback(r'DIPOLE:DEBYE=' + FNUM_REGEX) def dipole(props, match, it): key = ('mae', 'r_mopac_Dipole') update_props(props, key, fortfloat(match.group(1)))
[docs]@callback(r'DIP_VEC:DEBYE\[3\]=' + FNUM_REGEX + SPACE + FNUM_REGEX + SPACE + FNUM_REGEX) def dipole_xyz(props, match, it): key = ('mae', 'r_mopac_Dipole_X') update_props(props, key, fortfloat(match.group(1))) key = ('mae', 'r_mopac_Dipole_Y') update_props(props, key, fortfloat(match.group(2))) key = ('mae', 'r_mopac_Dipole_Z') update_props(props, key, fortfloat(match.group(3)))
[docs]@callback(r'DIPOLE MOMENT EVALUATED FROM THE POINT CHARGES') def dipole_esp(props, match, it): regex = re.compile(DNUM_REGEX + SPACE + DNUM_REGEX + SPACE + DNUM_REGEX + SPACE + DNUM_REGEX) while True: line = next(it) m = regex.search(line) if m: key = ('mae', 'r_mopac_Dipole_ESP_X') update_props(props, key, float(m.group(1))) key = ('mae', 'r_mopac_Dipole_ESP_Y') update_props(props, key, float(m.group(2))) key = ('mae', 'r_mopac_Dipole_ESP_Z') update_props(props, key, float(m.group(3))) key = ('mae', 'r_mopac_Dipole_ESP') update_props(props, key, float(m.group(4))) break
# # Atomic properties #
[docs]@callback(r'ATOM_X:ANGSTROMS\[(\d+)\]=') def atom_x_angstroms(props, match, it): natoms = old_div(int(match.group(1)), 3) coords = [] for i in range(natoms): line = next(it) coords.append([float(x) for x in line.split()]) key = 'initial_geometry' update_props(props, key, coords)
[docs]@callback(r'ATOM_X_OPT:ANGSTROMS\[(\d+)\]=') def atom_x_opt(props, match, it): natoms = old_div(int(match.group(1)), 3) coords = [] for i in range(natoms): line = next(it) coords.append([float(x) for x in line.split()]) key = 'final_geometry' update_props(props, key, coords)
[docs]@callback(r'^ ATOM_CHARGES\[(\d+)\]=') def atom_charges(props, match, it): natoms = int(match.group(1)) charges = [] while len(charges) < natoms: line = next(it) charges.extend([float(x) for x in line.split()]) key = ('atomic_mae', 'r_mopac_NDDO_Charge') update_props(props, key, charges)
[docs]@callback(r'^ MULLIKEN_ATOM_CHARGES\[(\d+)\]=') def mulliken_charge(props, match, it): natoms = int(match.group(1)) charges = [] while len(charges) < natoms: line = next(it) charges.extend([float(x) for x in line.split()]) key = ('atomic_mae', 'r_mopac_Mulliken_Charge') update_props(props, key, charges)
[docs]@callback(r'^ ELECTOSTATIC_POTENTIAL_CHARGES\[(\d+)\]=') def esp_charge(props, match, it): natoms = int(match.group(1)) charges = [] while len(charges) < natoms: line = next(it) charges.extend([float(x) for x in line.split()]) key = ('atomic_mae', 'r_mopac_ESP_Charge') update_props(props, key, charges)
[docs]@callback(r'a\s+n\s+Dn\(r\)\s+De\(r\)\s+q\(r\) - Z\(r\)') def total_nucleophilic_superdelocalizability(props, match, it): regex1 = re.compile(r'\S+' + INUM_REGEX + SPACE + DNUM_REGEX + SPACE + DNUM_REGEX + SPACE + DNUM_REGEX) regex2 = re.compile(r'Total:' + DNUM_REGEX + SPACE + DNUM_REGEX) Dn = OrderedDict() De = OrderedDict() while True: line = next(it) m1 = regex1.search(line) m2 = regex2.search(line) if m1: Dn[int(m1.group(1))] = float(m1.group(2)) De[int(m1.group(1))] = float(m1.group(3)) elif m2: key = ('mae', 'r_mopac_Total_Nucleophilic_Superdelocalizability') update_props(props, key, float(m2.group(1))) key = ('mae', 'r_mopac_Total_Electrophilic_Superdelocalizability') update_props(props, key, float(m2.group(2))) break key = ('atomic_mae', 'r_mopac_Nucleophilic_Superdelocalizability') Dn = convert_sparse_dict_to_list(Dn) update_props(props, key, Dn) key = ('atomic_mae', 'r_mopac_Electrophilic_Superdelocalizability') De = convert_sparse_dict_to_list(De) update_props(props, key, De)
[docs]@callback(r'a\s+n\s+piS\(r\)') def self_polarizability(props, match, it): regex1 = re.compile(r'\S+' + INUM_REGEX + SPACE + DNUM_REGEX) regex2 = re.compile(r'Total:' + DNUM_REGEX) piS = OrderedDict() while True: line = next(it) m1 = regex1.search(line) m2 = regex2.search(line) if m1: piS[int(m1.group(1))] = float(m1.group(2)) elif m2: key = ('mae', 'r_mopac_Total_Atom_Self_Polarizability') update_props(props, key, float(m2.group(1))) break key = ('atomic_mae', 'r_mopac_Atom_Self_Polarizability') piS = convert_sparse_dict_to_list(piS) update_props(props, key, piS)
# # Stoichiometry and charge #
[docs]@callback(r'^ ATOM_EL\[(\d+)\]=') def atom_el(props, match, it): natoms = int(match.group(1)) update_props(props, 'natoms', natoms) update_props(props, 'numat', natoms)
#FIXME when is numat and natoms different? Real atoms, dummy atoms??
[docs]@callback(r'^ NUM_ELECTRONS=(\d+)') def num_electrons(props, match, it): nelecs = int(match.group(1)) key = 'nelecs' update_props(props, key, nelecs)
# # Basis function data #
[docs]@callback(r'^ AO_ATOMINDEX\[(\d+)\]=') def ao_index(props, match, it): norbs = int(match.group(1)) update_props(props, 'norbs', norbs) ao_atomindex = [] while len(ao_atomindex) < norbs: line = next(it) ao_atomindex.extend([int(x) for x in line.split()]) update_props(props, 'ao_atomindex', ao_atomindex)
[docs]@callback(r'^ ATOM_SYMTYPE\[(\d+)\]=') def atom_symtype(props, match, it): norbs = int(match.group(1)) ao_syms = [] while len(ao_syms) < norbs: line = next(it) ao_syms.extend(line.split()) update_props(props, 'ao_syms', ao_syms)
[docs]@callback(r'^ AO_ZETA\[(\d+)\]=') def ao_zeta(props, match, it): norbs = int(match.group(1)) ao_zeta = [] while len(ao_zeta) < norbs: line = next(it) ao_zeta.extend([float(x) for x in line.split()]) update_props(props, 'ao_zeta', ao_zeta)
[docs]@callback(r'^ ATOM_PQN\[(\d+)\]=') def atom_pqn(props, match, it): norbs = int(match.group(1)) atom_pqn = [] while len(atom_pqn) < norbs: line = next(it) atom_pqn.extend([int(x) for x in line.split()]) update_props(props, 'atom_pqn', atom_pqn)
# # Orbital data #
[docs]@callback(r'^ EIGENVALUES\[(\d+)\]=') def eigenvalues(props, match, it): norbs = int(match.group(1)) eigs = [] while len(eigs) < norbs: line = next(it) eigs.extend([float(x) for x in line.split()]) update_props(props, 'eigenvalues', eigs)
[docs]@callback(r'^ MOLECULAR_ORBITAL_OCCUPANCIES\[(\d+)\]=') def mo_occupancies(props, match, it): norbs = int(match.group(1)) mo_occs = [] while len(mo_occs) < norbs: line = next(it) if not line.strip().startswith('#'): mo_occs.extend([float(x) for x in line.split()]) update_props(props, 'mo_occs', mo_occs)
# #FIXME these quantities are maybe too big to load into core by # default? This maybe should be optimized? #
[docs]@callback(r'^ OVERLAP_MATRIX\[(\d+)\]=') def overlap_matrix(props, match, it): nvals = int(match.group(1)) check_memory_use(nvals, 'overlap_matrix') line = next(it) # skip line printing "Lower half triangle only" overlap = [] while len(overlap) < nvals: line = next(it) overlap.extend([float(x) for x in line.split()]) update_props(props, 'overlap_matrix', overlap)
[docs]@callback(r'^ EIGENVECTORS\[(\d+)\]=') def eigenvectors(props, match, it): nvals = int(match.group(1)) check_memory_use(nvals, 'eigenvectors') evecs = [] while len(evecs) < nvals: line = next(it) evecs.extend([float(x) for x in line.split()]) update_props(props, 'eigenvectors', evecs)
[docs]class MopacResultsMain(): """ A class to parse and store the results of a calculation from MOPAC_MAIN """
[docs] def __init__(self, outfile, auxfile=None, struct=None): """ :type outfile: str :param outfile: name of .out file generated by MOPAC binary :type auxfile: str :param auxfile: name of .aux file generated by MOPAC binary :type struct: structure.Structure :param struct: structure instance to populate with data """ self.properties = OrderedDict() self._structure = struct self._method = None self._status = False self._name = None self._output_file = outfile # Note we prioritize data from the .aux file ahead of the .out file if auxfile is not None: self._populate_from_file(auxfile) self._populate_from_file(outfile) self._name, suffix = os.path.splitext(outfile) if struct is not None: self._populate_structure()
@property def name(self): return self._name @property def structure(self): return self._structure @property def method(self): return self._method @property def output_file(self): return self._output_file @property def statusOk(self): return self._status def _populate_from_file(self, filename): """ Parse "filename" containing MOPAC_MAIN's versions's output data. """ if os.path.exists(filename): with open(filename, 'r') as fh: parser = MopacMainTextParser(fh) # only update properties we don't have yet properties = parser.parse() for k in properties: if k not in self.properties: self.properties[k] = properties[k] if filename.endswith('.out'): self._output_file = filename self._status = self._verify_output_file() else: print('Error: file %s to parse does not exist.\n' % filename) print(f"\nWARNING: {MOPAC_MAIN} did not generate file\n%s" % filename) print("Output results may be incomplete.\n") self._status = False def _verify_output_file(self): """ Look for error messages and print tail of output file if any errors found. :return True if no errors found, otherwise False. """ outfile = self._output_file buff = StringIO() ok = False if os.path.isfile(outfile): ok = True with open(outfile, 'rb') as fh: collect = False for line in fh: text = line.decode('utf8') if 'errors detected' in text.lower(): ok = False if 'error and normal termination messages reported' in text.lower( ): ok = False if 'JOB ENDED' in text: collect = True if collect: buff.write(text) if not ok: print("\nTail of outfile showing possible errors:\n") print(buff.getvalue()) print() return ok def _populate_structure(self): """ Compile parsed results into the structure.properties dictionary. """ self.properties['atomic_number'] = [] for at in self._structure.atom: self.properties['atomic_number'].append(at.atomic_number) for key, val in self.properties.items(): if key == 'initial_geometry': xyz = self._structure.getXYZ(copy=False) if not numpy.allclose(numpy.array(val), xyz, atol=1e-4): print('MOPAC initial geometry =\n', numpy.array(val)) print('.mae initial geometry =\n', xyz) msg = 'Initial geometries in MOPAC and input .mae ' \ 'file are inconsistent' raise ValueError(msg) elif key == 'final_geometry': self._structure.setXYZ(numpy.array(val)) elif key[0] == 'atomic_mae': # Atomic Maestro property name = key[1] for atom, val2 in zip(self._structure.atom, val): if val2 is not None: atom.property[name] = val2 elif key[0] == 'mae': # Molecular Maestro property name = key[1] self._structure.property[name] = val def _get_basis_functions(self): """ Get basis function exponents for each atom Note 0 implies no such basis function on that atom. :rtype: ( list, list, list ) :return: zs = s-function exponents indexed by atom zp = p-function exponents indexed by atom zd = d-function exponents indexed by atom """ # unittested # Initialize s,p,d exponents by atom. numat = self.properties['numat'] zs = [0.0] * numat zpx = [0.0] * numat zpy = [0.0] * numat zpz = [0.0] * numat zdx2 = [0.0] * numat zdxz = [0.0] * numat zdz2 = [0.0] * numat zdyz = [0.0] * numat zdxy = [0.0] * numat # Process data parsed from MOPAC output files. ao_atomindex = self.properties['ao_atomindex'] ao_zeta = self.properties['ao_zeta'] ao_syms = self.properties['ao_syms'] if len(ao_zeta) != len(ao_syms): raise MopacBasisError1(f"{MOPAC_MAIN} basis not read in correctly.") if len(ao_zeta) != len(ao_atomindex): raise MopacBasisError2(f"{MOPAC_MAIN} basis not read in correctly.") if max(ao_atomindex) != numat: raise MopacBasisError3(f"{MOPAC_MAIN} basis not read in correctly.") for i in range(len(ao_atomindex)): iatom = ao_atomindex[i] - 1 # s functions if ao_syms[i] == 'S': zs[iatom] = ao_zeta[i] # p functions elif ao_syms[i] == 'PX': zpx[iatom] = ao_zeta[i] elif ao_syms[i] == 'PY': zpy[iatom] = ao_zeta[i] elif ao_syms[i] == 'PZ': zpz[iatom] = ao_zeta[i] # d functions elif ao_syms[i] == 'X2': zdx2[iatom] = ao_zeta[i] elif ao_syms[i] == 'XZ': zdxz[iatom] = ao_zeta[i] elif ao_syms[i] == 'Z2': zdz2[iatom] = ao_zeta[i] elif ao_syms[i] == 'YZ': zdyz[iatom] = ao_zeta[i] elif ao_syms[i] == 'XY': zdxy[iatom] = ao_zeta[i] else: raise MopacBasisError4('Unsupported basis function ang. mom.') # Assume all Px,Py,Pz components are the same zp = zpx if zpx != zpy: raise MopacBasisError5('Px, Py not equivalent!') if zpx != zpz: raise MopacBasisError5('Px, Pz not equivalent!') # Assume all D-function components are the same zd = zdx2 if zdx2 != zdxz: raise MopacBasisError5('Dx2 and Dxz components not equivalent!') if zdx2 != zdz2: raise MopacBasisError5('Dx2 and Dz2 components not equivalent!') if zdx2 != zdyz: raise MopacBasisError5('Dx2 and Dyz components not equivalent!') if zdx2 != zdxy: raise MopacBasisError5('Dx2 and Dxy components not equivalent!') return zs, zp, zd
[docs] def write_vis_files(self, nplot, gridres, gridext): """ This function will call the Fortran routines to generate the 3D data and .vis files for plotting surfaces. :type nplot: int :param nplot: number of MOs to plot around HOMO/LUMO gap. """ # from .aux file nelecs = self.properties['nelecs'] numat = self.properties['numat'] natoms = self.properties['natoms'] #FIXME when is numat and natoms different? # coord = [ [x,y,z], [x,y,z], [x,y,z], ... ] coord = self.properties['final_geometry'] # nat = atomic numbers of real atoms in system nat = [at.atomic_number for at in self.structure.atom] # natorb = number of orbitals on each atom ao_atomindex = self.properties['ao_atomindex'] natorb = [ao_atomindex.count(i + 1) for i in range(numat)] # norbs = total number of orbitals norbs = self.properties['norbs'] # basis function exponents zs, zp, zd = self._get_basis_functions() # number of doubly-occupied MOs nclose = 0 for occ in self.properties['mo_occs']: if occ == 2.0: nclose = nclose + 1 # eigs = [ e-value for MO1, e-value for MO2, ... ] eigs = self.properties['eigenvalues'] # overlap = overlap matrix in upper-triangular packed form overlap = self.properties['overlap_matrix'] # evecs = [ [MO1], [MO2], [MO3], ... ] evecs = [] for i in range(norbs): mo = [] for j in range(norbs): mo.append(self.properties['eigenvectors'][j * norbs + i]) evecs.append(mo) # for diagnostics, create pqn array from output data pqn = self.properties['atom_pqn'] mopaclib.surfaces.plot_3d_data(self.name, nplot, gridres, gridext, nelecs, numat, natoms, coord, nat, norbs, nclose, natorb, overlap, eigs, evecs, zs, zp, zd, pqn)