Source code for schrodinger.application.vss.database

import csv
import glob
import os

from ruamel import yaml
from voluptuous import Required
from voluptuous import Schema

from schrodinger import structure
from schrodinger.utils import fileutils

CID_COL = 'Title'
TRUTH_COL = 'Truth'
SMILES_COL = 'SMILES'

INDEX_FILE = 'index.yml'

SHAPE_DATA_SCHEMA = Schema({
    Required('pharm'): str,
    'atom_color': str,
})

INDEX_SCHEMA = Schema({
    Required('name'): str,
    Required('local', default=False): bool,
    Required('smiles'): str,
    'shape': SHAPE_DATA_SCHEMA,
})


def _get_paths(pattern):

    return sorted(os.path.abspath(fn) for fn in glob.iglob(pattern))


[docs]class Database: ''' Database metadata. '''
[docs] def __init__(self, path): ''' :param path: Directory path. :type path: str ''' self.root = os.path.abspath(path) with open(os.path.join(path, INDEX_FILE), 'r') as fp: data = yaml.safe_load(fp) data = INDEX_SCHEMA(data) self.name = data['name'] self.local = data['local'] self._smiles_csv_glob = data['smiles'] self._shape_data_glob = data.get('shape', {})
[docs] def get_smiles_csv(self): with fileutils.chdir(self.root): return _get_paths(self._smiles_csv_glob)
[docs] def get_shape_data(self, shape_type): with fileutils.chdir(self.root): try: return _get_paths(self._shape_data_glob[shape_type]) except KeyError: return []
[docs] def count_structures(self): return sum( structure.count_structures(fn) for fn in self.get_smiles_csv())
[docs] def validate(self): ''' :return: Validation success and error message. :rtype: (bool, str) ''' for fn in self.get_smiles_csv(): with open(fn, 'r') as fp: reader = csv.DictReader(fp) columns = set(reader.fieldnames or []) for col in (CID_COL, SMILES_COL): if col not in columns: return (False, f"'{fn}': lacks '{col}' column") return (True, '')