Source code for schrodinger.application.vss.pilot

import contextlib
import os

from ruamel import yaml

from schrodinger.application.vss import csvsmiles
from schrodinger.application.vss import database
from schrodinger.job import jobcontrol
from schrodinger.utils import fileutils
from schrodinger.utils import log

logger = log.get_output_logger('vss')


[docs]def extract_smiles_and_cids(sources, outname): ''' Writes out SMILES and CID data from `sources` into `outname` (CSV). :param sources: List of sources (assumed to be valid). :type sources: list(schrodinger.application.vss.csvsmiles.CsvSmilesFile) :param outname: Destination file name. :type outname: str ''' dst = csvsmiles.CsvSmilesFile(filename=outname, cid_col=database.CID_COL, smiles_col=database.SMILES_COL) with contextlib.ExitStack() as stack: writer = dst.get_dict_writer(stack) writer.writeheader() for src in sources: reader = src.get_dict_reader(stack) for row in reader: writer.writerow({ database.SMILES_COL: row[src.smiles_col], database.CID_COL: row[src.cid_col] })
[docs]def make_database(name, sources, shape_type=None, host=None): ''' Creates VSS "database" from the compounds in `sources`. :param name: Database name. :type name: str :param sources: Iterable over source files (CSV). :type sources: iterable over `csvsmiles.CsvSmilesFile` :param shape_type: Type of the shape data to generate. :type shape_type: str or NoneType :param host: Execution host. :type host: str or NoneType ''' if os.path.basename(name) != name or os.path.exists(name): raise ValueError(f'invalid database name "{name}"') for src in sources: valid, msg = src.validate() if not valid: raise ValueError(f'{src.filename}: {msg}') os.mkdir(name) smiles = 'molecules.csv.gz' extract_smiles_and_cids(sources, os.path.join(name, smiles)) with fileutils.chdir(name): metadata = {'name': name, 'local': True, 'smiles': smiles} if shape_type: cmd = [ 'shape_screen_gpu', 'generate', '-shape_data_treatment', 'copy', '-shape_type', shape_type, '-source', smiles, '-ligprep', '-flex', '-JOBNAME', shape_type ] if host: cmd += ['-HOST', host] job = jobcontrol.launch_job(cmd, print_output=True) job.wait(throw_on_failure=True) metadata['shape'] = {shape_type: shape_type + '.bin'} with open('index.yml', 'w') as fp: yaml.dump(metadata, fp)