Source code for schrodinger.application.jaguar.jaguar_keyword_utils

"""
Functions to help grab keywords from xml and jaguar input files

Copyright Schrodinger, LLC. All rights reserved.
"""

import csv
import os
import sys

from schrodinger.application.jaguar.input import JaguarInput as jinp
from schrodinger.application.jaguar.keywordDB import load_keywords
from schrodinger.utils import csv_unicode
from schrodinger.utils import fileutils

LEVELS_OF_THEORY = os.path.join(fileutils.get_mmshare_data_dir(), 'jaguar',
                                'levels_of_theory.csv')
SOLVENTS = os.path.join(fileutils.get_mmshare_data_dir(), 'jaguar',
                        'solvents.csv')

DFT_TYPES = {
    'is_recommended': 'Recommended',
    'is_dispersion_corrected_dft': 'Aposteriori-corrected',
    'is_long_range_corrected_dft': 'Long-range-corrected',
    'is_hybrid_dft': 'Hybrid',
    'is_meta_gga_dft': 'Meta-GGA',
    'is_gga_dft': 'Gradient-corrected (GGA)',
    'is_lda_dft': 'Local',
}


[docs]def jaguar_keywords_xml_filename(): """ Find the jaguar_keywords.xml file in $SCHRODINGER/mmshare-v*/ """ mmdata = fileutils.get_mmshare_data_dir() jaguar = os.path.join(mmdata, 'jaguar') keywords_xml = os.path.join(jaguar, 'jaguar_keywords.xml') if not os.path.exists(keywords_xml): msg = "%s not found!" % keywords_xml raise IOError(msg) return keywords_xml
[docs]def all_keywords(): """ return a list of all keywords """ filename = jaguar_keywords_xml_filename() keywords_list, keywords_dict = load_keywords(filename) return list(keywords_dict)
[docs]def all_meaningful_keywords(): """ return a partial list keywords excludes all iopt,opt,cut,ip """ exclusions = ['ip', 'iopt', 'opt', 'cut'] keywords = all_keywords() meaningful_keywords = [] for key in keywords: append_it = True for pre in exclusions: len_pre = len(pre) if key.startswith(pre): # if we can interpret the remainder of the string as an integer # it will not be appended is_int = True try: my_int = int(key[len_pre:]) except ValueError: is_int = False if is_int: append_it = False if append_it: meaningful_keywords.append(key) return meaningful_keywords
[docs]def dftnames_markup(): """ Return dictionary of DFT names, a short description, and DOI's to literature references if available, grouped by type. All information is taken from the levels_of_theory.csv file. This is intended for use by the documentation team for auto-doc'ing. See JAGUAR-9563. :: {'Local': [ ('HFS', 'a short description', ['doi:1']), ('XALPHA', 'another functional', ['do1:2', 'doi:3']), ('SVWN', None, None), ... 'Aposteriori-corrected': [ ('PBE-ulg', None, None), ('B3LYP-MM', None, None), ... 'Recommended': [ ('B3LYP', None, None), ('B3LYP-MM', None, None), ... } """ docs = {x: [] for x in DFT_TYPES.values()} with csv_unicode.reader_open(LEVELS_OF_THEORY) as fh: reader = csv.DictReader(fh) for row in reader: if row['is_non_dft'] != '1': # Exclude non-DFT methods for col, value in row.items(): if col in DFT_TYPES and value == '1': name = row['method'].strip() desc = row['description'] if desc is not None: desc = desc.strip() refs = row['references'] # Convert possible multiple refs into a list if refs is not None: refs = refs.strip() if ' ' in refs: refs = refs.split() else: refs = [refs] docs[DFT_TYPES[col]].append((name, desc, refs)) return docs
[docs]def all_dftnames(): """ Return a list of all dftnames. Also includes HF and MP2. """ dftnames = [] with csv_unicode.reader_open(LEVELS_OF_THEORY) as fh: reader = csv.DictReader(fh) for row in reader: for col, val in row.items(): if col == 'method': dftnames.append(row[col].strip().upper()) if 'MP2' not in dftnames: dftnames.append('MP2') return sorted(dftnames)
[docs]def all_basisnames(): """ return a list of all basis set names each basis set name is itself a list containing 'base' name, backup name, nplus, nstar """ from schrodinger.application.jaguar import basis basisnames = [] all_sets = basis.get_bases() for basis in all_sets: full_name = basis.name # add pluses and stars full_name = ''.join([basis.name, '*' * basis.nstar, '+' * basis.nplus]) basisnames.append(clean_basisname(full_name)) return basisnames
[docs]def jaguar_input_keywords(fname): """ return a dictionary of the keywords in fname that are non-default hf and mp2 are considered 'dftname' and dftname and basis are always included """ try: ji = jinp(input=fname) except Exception as e: print('Error: cannot create input: %s' % e) sys.exit(1) my_dict = ji.getNonDefault() st = ji.getStructure() mult_string = 'i_m_Spin_muliplicity' charge_string = 'i_m_Molecular_charge' if mult_string in st.property: my_dict['multip'] = st.property[mult_string] if charge_string in st.property: my_dict['molchg'] = st.property[charge_string] keys = list(my_dict) if 'dftname' not in keys: if 'mp2' in keys: if my_dict['mp2'] != 0: my_dict['dftname'] = 'MP2' else: my_dict['dftname'] = 'HF' else: my_dict['dftname'] = 'HF' if 'basis' not in keys: my_dict['basis'] = ji.getDefault('basis') # ensure basis name is legit my_dict['basis'] = clean_basisname(my_dict['basis']) my_dict['dftname'] = my_dict['dftname'].upper() return my_dict
[docs]def clean_basisname(basis): """ return a cleaned up basis name i.e. NAME+++**** """ nplus = basis.count('+') nstar = basis.count('*') basis_name = basis.upper().replace('+', '').replace('*', '') basis_name = ''.join([basis_name, '+' * nplus, '*' * nstar]) return basis_name
[docs]def keyword_coverage(input_files, print_report=True): """ given a list of input files return in this order (as return 1, 2, 3, 4, 5, 6) 1 - percent of keywords covered 2 - percent of basis sets covered 3 - percent of functionals covered 4 - list of uncovered keywords 5 - list of uncovered basis sets 6 - list of uncovered functionals """ input_keywords = [] input_functionals = [] input_basis = [] all_keywords = all_meaningful_keywords() all_functionals = all_dftnames() all_basis = all_basisnames() for file in input_files: keywords = jaguar_input_keywords(file) keys = list(keywords) basis = keywords['basis'] functional = keywords['dftname'] for key in keys: if key not in input_keywords: input_keywords.append(key) # requires an exact match, i.e. not just base name match if basis not in input_basis: input_basis.append(basis) if functional not in input_functionals: input_functionals.append(functional) # make comparisons n_all_keywords = len(all_keywords) n_all_functionals = len(all_functionals) n_all_basis = len(all_basis) for functional in input_functionals: if functional in all_functionals: all_functionals.remove(functional) for key in input_keywords: if key in all_keywords: all_keywords.remove(key) for basis in input_basis: basisname_in_list(basis, all_basis, True) n_uncovered_keywords = len(all_keywords) n_uncovered_functionals = len(all_functionals) n_uncovered_basis = len(all_basis) percent_keywords = 100 * float(n_all_keywords - n_uncovered_keywords) / n_all_keywords percent_functionals = 100 * float( n_all_functionals - n_uncovered_functionals) / n_all_functionals percent_basis = 100 * float(n_all_basis - n_uncovered_basis) / n_all_basis if print_report: print("-------------------------------------") print(" Percent Coverage of Jaguar Keywords") print("-------------------------------------") print(" Keywords %f (%d of %d) \n" % (percent_keywords, (n_all_keywords - n_uncovered_keywords), n_all_keywords)) print( " basis sets %f (%d of %d) \n" % (percent_basis, (n_all_functionals - n_uncovered_functionals), n_all_functionals)) print(" functionals %f (%d of %d) \n" % (percent_functionals, (n_all_basis - n_uncovered_basis), n_all_basis)) print("-------------------------------------") print("Uncovered Keywords:") for key in all_keywords: print(key) print("Uncovered Basis Sets:") for basis in all_basis: print(basis) print("Uncovered Functionals:") for name in all_functionals: print(name) return percent_keywords, percent_basis, percent_functionals, all_keywords, all_basis, all_functionals
[docs]def supported_basis_sets(functional): """ returns the supported basis sets for a particular funcional, only useful for B3LYP-MM/B3LYP-LOC """ supp_base = { 'B3LYP-MM': ['LACVP*', 'CC-PVDZ++'], 'B3LYP-LOC': ['6-31G*', 'CC-PVDZ', 'CC-PVTZ+', '6-311G-3DF-3PD+'] } if functional.upper() in supp_base: return supp_base[functional.upper()] else: return all_basisnames()
[docs]def basis_is_supported_for_functional(functional, basis): """ is this functional/basis set combination supported really just checks B3LYP-MM and B3LYP-LOC """ # make sure its ordered bas = clean_basisname(basis) supported = False supp_func = ['B3LYP-MM', 'B3LYP-LOC'] if functional.upper() in supp_func: supported_basis = supported_basis_sets(functional) if basis in supported_basis: supported = True else: supported = True return supported
[docs]def basisname_in_list(basis, lst, remove=False): """ Inspect list to see if basisname is in it. Compares only by 'basename', i.e. no '`*`'s or '`+`'s optionally removes any matches from the list :return: bool """ in_list = False basename = clean_basisname(basis.replace('*', '').replace('+', '')) for name in list(lst): basename_list = clean_basisname(name.replace('*', '').replace('+', '')) if basename_list == basename: in_list = True if remove: lst.remove(name) return in_list