"""
Stage for converting structure files from one format to another.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Matvey Adzhigirey
import os
import schrodinger.utils.subprocess as subprocess
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import stage
from schrodinger.utils.fileutils import get_structure_file_format
[docs]class ConvertStage(stage.Stage):
"""
Stage for converting structure files from one format to another
Each file is converted using Schrodingers specific conversion utility
and the number of files is not changed.
Input file format is determined from the extension of each file.
Supported conversions:
smiles <-> maestro
smilescsv <-> maestro
sd <-> maestro
Files with unrecognized extensions are treated as SD (Ev:62575)
"""
[docs] def __init__(self, *args, **kwargs):
"""
See class docstring.
"""
specs = """
OUTFORMAT = string(default='maegz')
2D = boolean(default=False) # Whether to generate 2D SD structures even for 3D input.
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
# FIXME: Make OUTFORMAT default to maegz, and have at act as
# extension instead of format string.
# This will allow compressed and uncompressed Maestro formats.
self.addExpectedInput(1, "structures", True)
self.addExpectedOutput(1, "structures", True)
[docs] def operate(self):
"""
Combine all the input files from all input sets into one set,
optionally labelling each structure according to the set from which it
originated. Raises a RuntimeError if there is a problem reading an
input file or writing an output file.
"""
outext = self['OUTFORMAT']
# Add support for old options; for backwards-compatability:
if outext == 'maestro':
outext = 'maegz'
elif outext == 'sd':
outext = 'sdf'
elif outext == 'smiles':
outext = 'smi'
elif outext == 'smilescsv':
outext = 'csv'
# Add support for SMILES CSV format:
if outext == 'csv':
outformat = 'smilescsv'
else:
outformat = get_structure_file_format('test.%s' % outext)
if outformat not in ['sd', 'maestro', 'smiles']:
msg = "Invalid OUTFORMAT value: %s" % outext
raise RuntimeError(msg)
ligfiles = self.getInput(1).getFiles()
outfiles = []
filenum = 0
for ligfile in ligfiles:
filenum += 1
if ligfile.endswith('.csv'):
informat = 'smilescsv'
else:
informat = get_structure_file_format(ligfile)
if not informat:
informat = 'sd' # Ev:62575
if informat == outformat:
outfiles.append(ligfile)
continue
schrod = os.environ['SCHRODINGER']
if informat == 'sd' and outformat == 'maestro':
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'sdconvert')
cmd = [exe, '-isd', ligfile, '-omae', outfile]
elif informat == 'maestro' and outformat == 'sd':
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
if self['2D']:
exe = os.path.join(schrod, 'utilities', 'canvasConvert')
cmd = [exe, '-imae', ligfile, '-osd', outfile, '-2D']
else:
exe = os.path.join(schrod, 'utilities', 'sdconvert')
cmd = [exe, '-imae', ligfile, '-osd', outfile]
elif informat == 'pdb' and outformat == 'maestro':
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'pdbconvert')
cmd = [exe, '-ipdb', ligfile, '-omae', outfile]
elif informat == 'maestro' and outformat == 'pdb':
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'pdbconvert')
cmd = [exe, '-imae', ligfile, '-opdb', outfile]
elif informat == 'smiles' and outformat == 'maestro':
self.requiredProductRuntime('macromodel')
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'smiles_to_mae')
cmd = [exe, ligfile, outfile]
elif informat == 'maestro' and outformat == 'smiles':
# NOTE: Do not use canvasConvert because it requires Canvas
# license:
self.requiredProductRuntime('macromodel')
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'uniquesmiles')
cmd = [exe, ligfile, outfile]
elif informat == 'sd' and outformat == 'smiles':
# NOTE: Do not use canvasConvert because it requires Canvas
# license:
self.requiredProductRuntime('macromodel')
# For SD -> SMILES convertion, convert to MAESTRO format first:
maefile = self.genOutputFileName(1,
filenum=filenum,
extension='.maegz')
exe = os.path.join(schrod, 'utilities', 'sdconvert')
cmd = [exe, '-isd', ligfile, '-omae', maefile]
self.debug('RUNNING: %s' % cmd)
subprocess.call(cmd)
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'uniquesmiles')
cmd = [exe, maefile, outfile]
# NOTE: CSV format is supported only with Canvas license
elif informat == 'maestro' and outformat == 'smilescsv':
self.requiredProductRuntime('canvas')
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'canvasConvert')
cmd = [exe, '-u', '-imae', ligfile, '-ocsv', outfile]
# ^--- output unique SMILES
elif informat == 'smilescsv' and outformat == 'maestro':
self.requiredProductRuntime('canvas')
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'canvasConvert')
cmd = [exe, '-icsv', ligfile, '-omae', outfile]
elif informat == 'sd' and outformat == 'smilescsv':
self.requiredProductRuntime('canvas')
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'canvasConvert')
cmd = [exe, '-u', '-isd', ligfile, '-ocsv', outfile]
# ^--- output unique SMILES
elif informat == 'smiles' and outformat == 'smilescsv':
self.requiredProductRuntime('canvas')
outfile = self.genOutputFileName(1,
filenum=filenum,
extension='.' + outext)
exe = os.path.join(schrod, 'utilities', 'canvasConvert')
cmd = [exe, '-u', '-ismi', ligfile, '-ocsv', outfile]
# ^--- output unique SMILES
else:
msg = 'Invalid input/output format combination: "%s" => "%s"' % (
informat, outformat)
raise RuntimeError(msg)
self.debug('RUNNING: %s' % cmd)
# Ev:96103 Report problems with conversion:
retcode = subprocess.call(cmd)
if retcode != 0:
msg = "Failed to run command: %s" % subprocess.list2cmdline(cmd)
raise RuntimeError(msg)
self.debug('Converted file: "%s"' % outfile)
self.checkFile(outfile)
outfiles.append(outfile)
self.info("\nConverted files: %s" % outfiles)
self.setOutput(1, pipeio.Structures(outfiles))
return