Source code for schrodinger.pipeline.stages.convert

"""
Stage for converting structure files from one format to another.

Copyright Schrodinger, LLC. All rights reserved.

"""
# Contributors: Matvey Adzhigirey

import os

import schrodinger.utils.subprocess as subprocess
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import stage
from schrodinger.utils.fileutils import get_structure_file_format


[docs]class ConvertStage(stage.Stage): """ Stage for converting structure files from one format to another Each file is converted using Schrodingers specific conversion utility and the number of files is not changed. Input file format is determined from the extension of each file. Supported conversions: smiles <-> maestro smilescsv <-> maestro sd <-> maestro Files with unrecognized extensions are treated as SD (Ev:62575) """
[docs] def __init__(self, *args, **kwargs): """ See class docstring. """ specs = """ OUTFORMAT = string(default='maegz') 2D = boolean(default=False) # Whether to generate 2D SD structures even for 3D input. """ stage.Stage.__init__(self, specs=specs, *args, **kwargs) # FIXME: Make OUTFORMAT default to maegz, and have at act as # extension instead of format string. # This will allow compressed and uncompressed Maestro formats. self.addExpectedInput(1, "structures", True) self.addExpectedOutput(1, "structures", True)
[docs] def operate(self): """ Combine all the input files from all input sets into one set, optionally labelling each structure according to the set from which it originated. Raises a RuntimeError if there is a problem reading an input file or writing an output file. """ outext = self['OUTFORMAT'] # Add support for old options; for backwards-compatability: if outext == 'maestro': outext = 'maegz' elif outext == 'sd': outext = 'sdf' elif outext == 'smiles': outext = 'smi' elif outext == 'smilescsv': outext = 'csv' # Add support for SMILES CSV format: if outext == 'csv': outformat = 'smilescsv' else: outformat = get_structure_file_format('test.%s' % outext) if outformat not in ['sd', 'maestro', 'smiles']: msg = "Invalid OUTFORMAT value: %s" % outext raise RuntimeError(msg) ligfiles = self.getInput(1).getFiles() outfiles = [] filenum = 0 for ligfile in ligfiles: filenum += 1 if ligfile.endswith('.csv'): informat = 'smilescsv' else: informat = get_structure_file_format(ligfile) if not informat: informat = 'sd' # Ev:62575 if informat == outformat: outfiles.append(ligfile) continue schrod = os.environ['SCHRODINGER'] if informat == 'sd' and outformat == 'maestro': outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'sdconvert') cmd = [exe, '-isd', ligfile, '-omae', outfile] elif informat == 'maestro' and outformat == 'sd': outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) if self['2D']: exe = os.path.join(schrod, 'utilities', 'canvasConvert') cmd = [exe, '-imae', ligfile, '-osd', outfile, '-2D'] else: exe = os.path.join(schrod, 'utilities', 'sdconvert') cmd = [exe, '-imae', ligfile, '-osd', outfile] elif informat == 'pdb' and outformat == 'maestro': outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'pdbconvert') cmd = [exe, '-ipdb', ligfile, '-omae', outfile] elif informat == 'maestro' and outformat == 'pdb': outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'pdbconvert') cmd = [exe, '-imae', ligfile, '-opdb', outfile] elif informat == 'smiles' and outformat == 'maestro': self.requiredProductRuntime('macromodel') outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'smiles_to_mae') cmd = [exe, ligfile, outfile] elif informat == 'maestro' and outformat == 'smiles': # NOTE: Do not use canvasConvert because it requires Canvas # license: self.requiredProductRuntime('macromodel') outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'uniquesmiles') cmd = [exe, ligfile, outfile] elif informat == 'sd' and outformat == 'smiles': # NOTE: Do not use canvasConvert because it requires Canvas # license: self.requiredProductRuntime('macromodel') # For SD -> SMILES convertion, convert to MAESTRO format first: maefile = self.genOutputFileName(1, filenum=filenum, extension='.maegz') exe = os.path.join(schrod, 'utilities', 'sdconvert') cmd = [exe, '-isd', ligfile, '-omae', maefile] self.debug('RUNNING: %s' % cmd) subprocess.call(cmd) outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'uniquesmiles') cmd = [exe, maefile, outfile] # NOTE: CSV format is supported only with Canvas license elif informat == 'maestro' and outformat == 'smilescsv': self.requiredProductRuntime('canvas') outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'canvasConvert') cmd = [exe, '-u', '-imae', ligfile, '-ocsv', outfile] # ^--- output unique SMILES elif informat == 'smilescsv' and outformat == 'maestro': self.requiredProductRuntime('canvas') outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'canvasConvert') cmd = [exe, '-icsv', ligfile, '-omae', outfile] elif informat == 'sd' and outformat == 'smilescsv': self.requiredProductRuntime('canvas') outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'canvasConvert') cmd = [exe, '-u', '-isd', ligfile, '-ocsv', outfile] # ^--- output unique SMILES elif informat == 'smiles' and outformat == 'smilescsv': self.requiredProductRuntime('canvas') outfile = self.genOutputFileName(1, filenum=filenum, extension='.' + outext) exe = os.path.join(schrod, 'utilities', 'canvasConvert') cmd = [exe, '-u', '-ismi', ligfile, '-ocsv', outfile] # ^--- output unique SMILES else: msg = 'Invalid input/output format combination: "%s" => "%s"' % ( informat, outformat) raise RuntimeError(msg) self.debug('RUNNING: %s' % cmd) # Ev:96103 Report problems with conversion: retcode = subprocess.call(cmd) if retcode != 0: msg = "Failed to run command: %s" % subprocess.list2cmdline(cmd) raise RuntimeError(msg) self.debug('Converted file: "%s"' % outfile) self.checkFile(outfile) outfiles.append(outfile) self.info("\nConverted files: %s" % outfiles) self.setOutput(1, pipeio.Structures(outfiles)) return