Source code for schrodinger.application.matsci.espresso.ppdb

"""
Classes and functions to deal with pseudopotentials database.

Copyright Schrodinger, LLC. All rights reserved."""

import glob
import hashlib
import os
import sqlite3

from schrodinger.application.matsci import msutils
from schrodinger.application.matsci.espresso import utils as qeu
from schrodinger.infra import mm

PP_DIR = os.path.join(msutils.get_matsci_user_data_dir(), 'pp_db')
PP_DB_FN = os.path.join(PP_DIR, 'ppdb.sqlite')
PP_TABLE_NAME = 'matsci_pp_table'
PP_TYPES = ('NC', 'US', 'PAW')

PP_CREATE_TABLE = """
CREATE TABLE %s (
                sha1_ppfile_checksum text primary key,
                pp_fn text not null,
                atomic_number int not null,
                type text check("type" in ('NC', 'US', 'PAW')),
                ecutwfc real not null,
                ecutrho real not null,
                z_valence real not null,
                l_max real not null,
                dft_functional text not null,
                accuracy text check("accuracy" in ('none', 'accurate', 'medium', 'quick')) default 'none',
                priority int not null default 1,
                family text not null,
                description text
                );
""" % PP_TABLE_NAME

PP_TABLE_INSERT = """
INSERT INTO %s(sha1_ppfile_checksum, pp_fn, atomic_number, type,
               ecutwfc, ecutrho, z_valence, l_max, dft_functional, family)
       VALUES(:checksum, :pp_fn, :anumber, :type, :ecutwfc, :ecutrho,
              :z_val, :l_max, :func, :family);
""" % PP_TABLE_NAME

SUPPORTED_PP_FAMILIES = []
# each row contains directory, family, ecutwfc, ecutrho, functional
SUPPORTED_PP_FAMILIES.append(['all_pbe_UPF_v1.5', 'GBRV', 40.0, 200.0, 'PBE'])


[docs]def get_checksum(file_fn): """ Calculate checksum of a file. :type file_fn: str :param file_fn: File name :rtype: str :return: checksum of the file """ sha1 = hashlib.sha1() with open(file_fn, 'rb') as file_fh: sha1.update(file_fh.read()) return sha1.hexdigest()
[docs]def populate_ppdb_from_dir(conn, cur, dirpath, family, ecutwfc, ecutrho, dft_func): """ Populate PPDB using files from one of supported directories (see SUPPORTED_PP_FAMILIES). This function will try to catch possible exceptions to go over all the files in the directory. :type conn: `sqlite3.Connection` :param conn: Handler to the sqlite3 connection :type cur: `sqlite3.Cursor` :param cur: Handler to the sqlite3 cursor :type dirpath: str :param dirpath: Relative directory path to the PP folder :type family: str :param family: Family of the PP. PPs within the same family are expected to be homogeneous :type ecutwfc: float :param ecutwfc: Energy cutoff :type ecutrho: float :param ecutrho: Density cutoff :type dft_func: str :param dft_func: Density functional name """ sql_vals = { 'family': family, 'func': dft_func, 'ecutwfc': ecutwfc, 'ecutrho': ecutrho } abs_dirpath = os.path.join(PP_DIR, dirpath) if not os.path.isdir(abs_dirpath): return for upf_fn in glob.iglob(os.path.join(abs_dirpath, qeu.UPF_GLOB)): sql_vals['pp_fn'] = os.path.join(dirpath, os.path.basename(upf_fn)) sql_vals['checksum'] = get_checksum(upf_fn) cur.execute( 'SELECT count(*) FROM %s WHERE sha1_ppfile_checksum = ?' % PP_TABLE_NAME, (sql_vals['checksum'],)) count = cur.fetchall()[0][0] # Skip, if checksum is already present in the DB if count: continue upf = qeu.UPFParser(upf_fn).getPseudo() element, pp_type = upf.element, upf.pp_type ecutwfc, ecutrho, zval = upf.ecutwfc, upf.ecutrho, upf.zval if not element: print('Failed to read atomic type from %s, skipping...' % upf_fn) continue sql_vals['anumber'] = \ mm.mmelement_get_atomic_number_by_symbol(element) sql_vals['type'] = pp_type if sql_vals['type'] not in PP_TYPES: print('Not allowed pp type (%s) found in %s, skipping...' % (pp_type, upf_fn)) continue sql_vals['z_val'] = zval if not sql_vals['z_val']: print('Invalid Z valence value found in %s, skipping...' % upf_fn) continue # TODO: Fix l_max parsing sql_vals['l_max'] = 0.0 cur.execute(PP_TABLE_INSERT, sql_vals) conn.commit()
[docs]def get_ppdb(): """ Connect to the database and populate it with the supported PPs if the database is empty. :rtype: `sqlite3.Connection`, `sqlite3.Cursor`, :return: Handler to the sqlite3 connection and cursor """ conn = sqlite3.connect(PP_DB_FN) cur = conn.cursor() cur.execute( 'SELECT count(*) FROM sqlite_master WHERE type="table" AND ' 'name=?', (PP_TABLE_NAME,)) count = cur.fetchall() # Table is not present in the DB if not count[0][0]: cur.execute(PP_CREATE_TABLE) conn.commit() for pp_row in SUPPORTED_PP_FAMILIES: populate_ppdb_from_dir(conn, cur, *pp_row) return conn, cur