# python module
"""
Stages for running LigPrep.
LigPrepStage - runs LigPrep on input structures
PostLigPrepStage - limits stereos, removes penalized states
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Matvey Adzhigirey
import gzip # For compress()
import os
import shutil
import sys
import tarfile # For cleanup
from schrodinger import structure
from schrodinger.infra import mm # noqa: F401 For exceptions
from schrodinger.pipeline import compiled_functions
from schrodinger.pipeline import pipeio
from schrodinger.pipeline import pipeutils
from schrodinger.pipeline import stage
from schrodinger.utils import fileutils
from schrodinger.utils.fileutils import MAESTRO
from schrodinger.utils.fileutils import SD
from schrodinger.utils.fileutils import SMILES
from schrodinger.utils.fileutils import SMILESCSV
[docs]def compress(origFile):
""" Compresses the specified file and removes the original """
if origFile.endswith('.mae'):
compFile = origFile + 'gz' # To produce .maegz
else:
compFile = origFile + '.gz'
orig_handle = open(origFile, 'r')
gzip_handle = gzip.GzipFile(compFile, mode='w')
for line in orig_handle:
gzip_handle.write(line)
orig_handle.close()
gzip_handle.close()
os.remove(origFile)
return compFile
[docs]def replace_arg_value(cmd, arg, value, add=True):
'''
Replaces value of a command line argument.
:param cmd: List of command line arguments.
:type cmd: list(str)
:param arg: Argument name.
:type arg: str
:param value: Argument value.
:type value: str
:param add: Add the argument if not present?
:type add: bool
'''
try:
idx = cmd.index(arg)
except ValueError:
# Argument is not present, add it:
if add:
cmd += [arg, value]
else:
if idx + 1 < len(cmd):
# NOTE if arg is present in cmd and has old value, replace:
cmd[idx + 1] = value
[docs]class LigPrepStage(stage.Stage):
"""
Pipeline stage for running LigPrep on the input structures
"""
[docs] def __init__(self, *args, **kwargs):
specs = """
UNIQUEFIELD = string(default="NONE") # Field to identify unique compound by
RETITLE = boolean(default=False) # Whether to set OUTCOMPOUNDFIELD
OUTCOMPOUNDFIELD = string(default="s_m_title") # Field where to store the compound codes (if RETITLE is True).
STEREO_SOURCE = string(default="parities") # parities/geometry
USE_EPIK = boolean(default=False) # Whether to use Epik instead of Ionizer.
METAL_BINDING = boolean(default=False) # Use Epik metal binding mode.
RETAIN = boolean(default=True) # Retain input variant for each compound.
PH = float(default=7.0) # Target pH
PHT = float(default=2.0) # pH threshold
IONIZE = boolean(default=True) # Whether to ionize (Ionizer; Epik always ionizes).
GENERATE_TAUTOMERS = boolean(default=True) # Whether to generate tautomers.
MAX_TAUTOMERS = integer(default=8) # Maximum number of tautomers to generate (Ionizer).
NEUTRALIZE = boolean(default=False) # Whether to neutralize before expanding states.
MAX_STATES = integer(default=16) # Maximum number of states to generate (Epik).
NUM_STEREOISOMERS = integer(default=32) # Generate this many stereoisomers per protonation state.
MAX_STEREOISOMERS = integer(default=None) # Keep this many low-energy stereoisomers per protonation state.
NRINGCONFS = integer(default=1) # Ring conformers per ligand
MIXLIGS = boolean(default=False)
RECOMBINE = boolean(default=True) # Whether to recombine input ligand files
COMBINEOUTS = boolean(default=False) # Combine output files
SKIP_BAD_LIGANDS = boolean(default=True)
REGULARIZE = boolean(default=False) # Whether to standardize input structures before preparing
SKIP_NOUNIQUE_LIGANDS = boolean(default=False) # Whether to skip ligands that have no unique field.
TAUT_SPEC_FILE = string(default=None) # Custom tautomerizer spec file to use.
NORMALIZE = boolean(default=False) # Whether to normalize input variants
OUTFORMAT = string(default="mae")
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
self.addExpectedInput(1, "structures", True)
self.addExpectedOutput(1, "structures", True)
self.setMainProduct('ligprep')
self.requiredProduct('macromodel')
self.input_files = []
self.recombined_files = []
self.ligprep_jobnames = []
self.output_files = []
self.jobdj = None
self.status = "NOT STARTED"
self.px = None # LIGPREP-1732
[docs] def initNonPersistent(self, pipeline):
# value of the `px` attribute gets passed to
# ligprep '-px' argument to skip ligprep license checkouts
self.px = compiled_functions.pipeline_init_wrapper(pipeline.stagejobs)
[docs] def setupJobs(self):
"""
Setup LigPrep subjobs by building up a command to submit, and passing
that command to JobDJ via addJob().
"""
# FIXME: MAKE IT SO THAT ALL VARIANTS OF BAD ROOT ARE REMOVED???
self.jobdj = self.getJobDJ()
self.ligprep_jobnames = []
self.expected_outputs_by_jobname = {}
taut_specfile = None
if self['TAUT_SPEC_FILE']:
taut_specfile = self['TAUT_SPEC_FILE']
self.info("Using custom tautomer spec file: %s" % taut_specfile)
else:
self.info("Using built-in tautomer spec file")
if self['NORMALIZE']:
self.info("Will normalize input variants (no expansion)")
if self['RETAIN']:
self.warning(
"WARNING RETAIN keyword not supported for NORMALIZE job")
if not self['NEUTRALIZE']:
self.warning(
"WARNING NEUTRALIZE False ignored for NORMALIZE job")
if not self['GENERATE_TAUTOMERS']:
self.warning(
"WARNING GENERATE_TAUTOMERS False ignored for NORMALIZE job"
)
else:
self.info("Will generate new variants (expand)")
for fileindex, ligfile in enumerate(self.recombined_files):
jobname = self.genFileName(filenum=fileindex + 1)
#ligprep = os.path.join(os.environ['SCHRODINGER'],'ligprep')
# Fill automatically find $SCHRODINGER/ligprep:
cmd = ['ligprep']
cmd.append('-kp') # Keep properties
informat = fileutils.get_structure_file_format(ligfile)
if informat is None and ligfile.endswith('.csv'):
informat = SMILESCSV
if informat == SMILES:
cmd += ['-ismi', ligfile]
elif informat == SMILESCSV:
cmd += ['-ismi', ligfile]
elif informat == MAESTRO:
cmd += ['-imae', ligfile]
elif informat == SD:
cmd += ['-isd', ligfile]
else:
self.exit("ERROR Invalid file format: %s" % ligfile)
outformat = self['OUTFORMAT']
# When NORMALIZING, output files are always merged anyway:
# SD (Ev:71599)
if self['NORMALIZE']:
# Can't have informat and outformat both be SD:
outformat = 'mae'
if outformat == 'mae':
outfile = jobname + '.maegz'
cmd += ['-omae', outfile]
elif outformat == 'sd':
outfile = jobname + '.sdf'
cmd += ['-osd', outfile]
else:
self.exit("ERROR: Unsupported output format: %s" % outformat)
self.expected_outputs_by_jobname[jobname] = outfile
if self['NORMALIZE']:
# Normalize input variants (generates one per structure)
if informat == 'sd':
cmd += ['-R', 'c']
# New protocol: Use Epik and keep first stage (lowest penalty):
cmd += ['-R', 'd', '-R', 'h', '-R', 'n', '-R', 'e']
# NOTE no need for -lab option since we will not be running
# PostLigPrepStage
cmd.append('-lab') # Label structures
else:
# Expand (generate variants)
if self['RETAIN']:
cmd += ['-retain']
cmd.append('-lab') # Label structures
# LigPrep options:
# -kp = keep properties
# -lab = put labels on structures -> needed for limit stereo stage
# -retain_it = keep input ionization and tautomeric states
# -r = maximum number of ring conformations to generate per ligand
ph = self['PH']
pht = self['PHT']
if self['USE_EPIK']:
ms = self['MAX_STATES'] # Default for Epik is 16
cmd += [
'-epik', '-W',
'e,-ph,%f,-pht,%f,-ms,%i' % (ph, pht, ms)
]
else:
cmd += ['-W', 'i,-ph,%f,-pht,%f' % (ph, pht)]
# NOTE: Anytime IONIZE is True, neutralizing is automatically
# performed.
if self['IONIZE']:
cmd += ['-i', '2']
else:
# Not ionizing
# NEEDED? if self['USE_EPIK']:
# NEEDED? self.warning("WARNING: IONIZE False ignored
# because using Epik")
if self['NEUTRALIZE']:
cmd += ['-i', '1']
else:
cmd += ['-i', '0']
if self['METAL_BINDING']:
if not self['USE_EPIK']:
self.exit(
"ERROR: METAL_BINDING mode is supported only with Epik."
)
cmd.append('-epik_metal_binding')
if self['GENERATE_TAUTOMERS']:
max_tauts = self['MAX_TAUTOMERS']
cmd += ['-t', str(max_tauts)]
else:
cmd.append('-nt')
generate_stereos = self['NUM_STEREOISOMERS']
cmd += ['-s', str(generate_stereos)]
max_stereos = self.get('MAX_STEREOISOMERS')
if max_stereos is not None:
if generate_stereos < max_stereos:
self.exit(
'MAX_STEREOISOMERS can not be greater than NUM_STEREOISOMERS'
)
cmd += ['-m', str(max_stereos)]
# NOTE: When running an older VSW input file, it's possible
# that results will change, as previously stereo limiting
# was done by PostLigPrepStage and keyword was MAXSTEREO.
if taut_specfile:
cmd += ['-ts', taut_specfile]
if self['STEREO_SOURCE'] == 'geometry':
cmd.append('-g')
if not self.getCleanupRequested():
cmd.append('-nc')
self.debug("ADDING COMMAND: %s" % cmd)
self.debug(" as string: %s" % " ".join(cmd))
self.jobdj.addJob(cmd)
self.ligprep_jobnames.append(jobname)
[docs] def missingOutput(self, logfile):
"""
Returns True if logfile is present and shows that no
structures where returned by LigPrep.
"""
with open(logfile) as fh:
for line in fh:
if "No structures found in" in line:
return True
return False
[docs] def processJobOutputs(self):
"""
After all LigPrep subjobs are complete, read each output file
and combined them into one file, or a few files with up to
100K structures in each file.
Structures are NOT combined if COMBINEOUTS is set to False. Instead
each file is renamed in order to have a Pipeline-compliant name.
"""
self.output_files = []
if (self['COMBINEOUTS'] and
len(self.ligprep_jobnames) > 1) or self['NORMALIZE']:
# NOTE: If normalizing, we always need to keep first output state
# per compound
self.info("Combining output files...")
combined_outfile = None
sts_in_file = 0
st_num = 0
outformat = self['OUTFORMAT']
if outformat == 'sd':
ext = '.sdf'
else: # Assume Maestro
ext = '.maegz'
filenum = 0
for jobname in self.ligprep_jobnames:
filenum += 1
outfile = self.expected_outputs_by_jobname[jobname]
if outfile not in self.ligprep_output_files:
# This subjob did not produce output (OK)
self.warning(
"WARNING LigPrep subjob did not produce output: %s" %
jobname)
continue
# Try to read as Textual Structure only if will be writing to
# Mae format:
for st in pipeutils.get_reader(outfile,
astext=(outformat == 'mae')):
st_num += 1
if st_num % 1000 == 0:
sys.stdout.write(".")
sys.stdout.flush()
if not combined_outfile:
combined_outfile = self.genOutputFileName(
1, filenum=filenum, extension=ext)
self.output_files.append(combined_outfile)
sts_in_file = 0
if sts_in_file == 0:
st.write(combined_outfile)
else:
st.append(combined_outfile)
sts_in_file += 1
self.setOutput(1, pipeio.Structures(self.output_files, st_num))
else: # Do NOT combine; rename the files:
filenum = 0
for jobname in self.ligprep_jobnames:
filenum += 1
outfile = self.expected_outputs_by_jobname[jobname]
if outfile not in self.ligprep_output_files:
self.warning(
"WARNING LigPrep subjob did not produce output: %s" %
jobname)
# This subjob did not produce output (OK)
continue
# Since just renaming the file, keep the same extension:
ext = fileutils.splitext(outfile)[1]
newname = self.genOutputFileName(1,
filenum=filenum,
extension=ext)
shutil.move(outfile, newname)
self.output_files.append(newname)
self.setOutput(1, pipeio.Structures(self.output_files))
[docs] def checkProducts(self):
"""
Overrides the checkProducts() method of Stage class.
If USE_EPIK is True, adds 'epik' as required prodct before the
actual check is performed.
Will get called after the instance of this stage is created.
"""
if self['USE_EPIK']:
self.requiredProduct('epik')
[docs] def operate(self):
"""
Run the stage on the input structure file set.
"""
if self.status == "NOT STARTED" or \
self.status == "PREPARING INPUT FILES":
self.status = "PREPARING INPUT FILES"
self.recombineInputLigands()
self.status = "SETTING UP JOBS"
self.dump()
if self.status == "SETTING UP JOBS":
self.setupJobs()
self.status = "RUNNING JOBS"
self.dump()
if self.status == "RUNNING JOBS":
self.debug("DEBUG JobDJ Options: %s" % self.JobDJOptions())
# In case restarting, update JobDJ to new options:
self.setJobDJOptions(self.jobdj)
if self.px:
# Make sure that the "-px" argument of the jobs managed by
# jobdj is up to date (see LIGPREP-1732, PHASE-2241).
for job in self.jobdj.all_jobs:
if not job.isComplete():
replace_arg_value(job._command, '-px', self.px)
# Ev:64286 if restarting, do not restart jobs that have output
# files:
if self.jobdj.hasStarted():
for i, job in enumerate(self.jobdj.all_jobs):
if job.isComplete():
continue
subjobname = self.ligprep_jobnames[i]
outfile = self.expected_outputs_by_jobname[subjobname]
logfile = subjobname + '.log'
if os.path.isfile(outfile):
print(' Output file exists for the job:', outfile)
print(' Marking %s as completed' % subjobname)
job._markFinished('found-output')
elif os.path.isfile(logfile):
if self.missingOutput(logfile):
print(' Job finished; LigPrep produced no output')
print(' Marking %s as completed' % subjobname)
job._markFinished('found-output')
self.info("Running LigPrep jobs...")
self.runJobDJ(self.jobdj)
self.info("Checking subjob outputs...")
# All jobs completed
# Make sure that all LigPrep subjobs procued output:
self.ligprep_output_files = []
some_failed = False
for i, job in enumerate(self.jobdj.all_jobs):
subjobname = self.ligprep_jobnames[i]
outfile = self.expected_outputs_by_jobname[subjobname]
logfile = subjobname + '.log'
if os.path.isfile(outfile):
self.ligprep_output_files.append(outfile)
else:
if not os.path.isfile(logfile):
self.error("ERROR: LigPrep log file missing: %s" %
logfile)
self.jobdj.markForRestart(job, "nolog")
some_failed = True
elif not self.missingOutput(logfile):
# Output file is missing and log file shows that it
# should exist:
self.error("ERROR: LigPrep output file missing: %s" %
outfile)
self.jobdj.markForRestart(job, "noout")
some_failed = True
if some_failed:
self.dump()
self.exit("Some jobs failed. Try restarting. Exiting...")
self.status = "PROCESSING FILES"
self.dump()
if self.status == "PROCESSING FILES":
self.processJobOutputs()
if self.getCleanupRequested():
self.status = "CLEANING UP"
else:
self.status = "COMPLETE"
self.dump()
if self.status == "CLEANING UP":
# Clean up after myself by compressing/removing some files
self.info("Cleaning up...")
archive_files = []
# FIXME: Put this code into stage.py to make it universal:
for jobname in self.ligprep_jobnames:
# Remove these files:
for ext in ['-in.maegz', '-in.mae', '-in.smi']:
filename = jobname + ext
if os.path.isfile(filename):
fileutils.force_remove(filename)
# Archive these files:
for ext in [
'_bm.log', '_ds.log', '_ht.log', '_ion_detail.log',
'_ion.log', '_lb.log', '_nu.log', '_pr.log', '_rc.log',
'_st.log', '_ta.log'
]:
filename = jobname + ext
if os.path.isfile(filename):
archive_files.append(filename)
# Compress these files individually:
for ext in ['_ionout-bad.mae']:
filename = jobname + ext
if os.path.isfile(filename):
self.info('Compressing %s...' % filename)
compress(filename)
if archive_files: # There are files to archive:
# Create an archive for log files:
tar_file = self.genFileName(extension="_logs.tar.gz")
log_tar = tarfile.open(tar_file, 'w:gz')
for filename in archive_files:
self.info('Archiving %s...' % filename)
log_tar.add(filename)
# Close the archive file:
log_tar.close()
# Remove the files that were added to the archive:
for filename in archive_files:
fileutils.force_remove(filename)
self.status = "COMPLETE"
self.dump()
# COMPLETE
return
[docs]class PostLigPrepStage(stage.Stage):
"""
This stage limits the number of steroisomers, removes states penalized by
Epik/Ionizer, and generates the variant codes.
PRESERVE_NJOBS
whether to write out as many output files as there are inputs.
Useful to create the same number of subjobs for QikProp as were
used for LigPrep. By default, a single output file is created.
"""
[docs] def __init__(self, *args, **kwargs):
specs = """
LIMIT_STEREOISOMERS = boolean(default=False) # LigPrepStage does stereo limiting now.
MAXSTEREO = integer(default=4) # Ignored by default now
REMOVE_PENALIZED_STATES = boolean(default=True)
UNIQUEFIELD = string(default="s_m_title")
OUTVARIANTFIELD = string(default="s_vsw_variant")
OUTFORMAT = option("sd", "mae", default="mae")
PRESERVE_NJOBS = boolean(default=False)
"""
stage.Stage.__init__(self, specs=specs, *args, **kwargs)
self.addExpectedInput(1, "structures", True)
self.addExpectedOutput(1, "structures", True)
self.maxstereo = None
[docs] def writeBestStereos(self, stereoisomers):
"""
Given a list of Structure objects from the same ion/taut state,
decices which structures (stereoisomers) need to be kept and which
ones need to be removed. This is done by keeping MAXSTEREO lowest-
energy stereoisomers.
The structures that are "kept" are then written to the output file.
"""
self.debug('writeBestStereos: %d' % len(stereoisomers))
# List of structures to keep:
sts_to_keep = []
if not self.limit_stereos or (len(stereoisomers) <= self.maxstereo):
# keep all stereoisomers:
sts_to_keep = stereoisomers
else: # More than MAXSTEREO stereoisomers:
kept_num = 0
while kept_num < self.maxstereo:
lowest_e = None
best_variant = None
for i, st in enumerate(stereoisomers):
try:
energy = st.property["r_lp_Energy"]
except KeyError:
raise RuntimeError(
"ERROR: The ligand has no potential energy information."
)
if lowest_e is None or energy < lowest_e:
best_variant = i
lowest_e = energy
kept_num += 1
best_st = stereoisomers[best_variant]
sts_to_keep.append(best_st)
stereoisomers.pop(best_variant)
# Write structures (stereoisomers) to be kept from this iontaut:
for st in sts_to_keep:
# add ligand variant property based on current count of its
# compound
self.current_variant += 1
st.property[self.outvariantfield] = "%s-%i" % (
self.current_compound_id, self.current_variant)
self.current_output_writer.append(st)
self.sts_in_file += 1
self.out_variants += 1
[docs] def nextOutFile(self):
if self.current_output_writer:
self.current_output_writer.close()
if self.sts_in_file == 0 and not self.outfilenum == 0:
raise RuntimeError("No structures were written to previous file!")
if self.outfilenum != 0:
self.debug("\nWriting to new file after writing %d structures" %
self.sts_in_file)
self.outfilenum += 1
self.sts_in_file = 0
self.current_output_file = self.genOutputFileName(
1, filenum=self.outfilenum, extension=".maegz")
self.current_output_writer = structure.StructureWriter(
self.current_output_file)
self.output_files.append(self.current_output_file)
[docs] def operate(self):
"""
Optionally limits the number of stereoisomers in the input_files
to <maxstereo> keeping lowest energy isomers.
Optionally filters on ionization penalty
"""
self.input_ligands = self.getInput(1).getFiles()
self.maxstereo = self['MAXSTEREO']
if self['LIMIT_STEREOISOMERS']:
self.limit_stereos = True
if self.maxstereo > 32 or self.maxstereo < 1:
self.exit("limit_stereo: MAXSTEREO must be between 1 and 32 !")
self.warning(
'WARNING: LIMIT_STEREOISOMERS option is deprecated. Use MAX_STEREOISOMERS option in LigPrepStage instead.'
)
else:
# This is the default now, as LigPrepStage now handles stereo
# expansion via the new -m option.
self.limit_stereos = False
if not self['OUTFORMAT'] in ["mae", "sdf"]:
self.exit("Output Format needs to be .mae or .sdf!")
self.uniquefield = self['UNIQUEFIELD']
if self.uniquefield.upper() == "NONE":
# VSW-807 So that badly-formatted VSW input files work
self.warning(
"WARNING: 'NONE' is not a valid value for the UNIQUEFIELD keyword. Using 's_m_title' instead."
)
self.uniquefield = "s_m_title"
self.outvariantfield = self['OUTVARIANTFIELD']
self.preserve_njobs = self['PRESERVE_NJOBS']
found_ligands_wo_penalty = False
if self['REMOVE_PENALIZED_STATES']:
remove_states = True
else:
remove_states = False
if self.limit_stereos and remove_states:
self.info(
"Removing penalized states and keeping %i stereoisomers per ion/taut of each compound."
% self.maxstereo)
elif self.limit_stereos:
self.info(
" Keeping %i stereoisomers per ion/taut of each compound." %
self.maxstereo)
elif remove_states:
self.info("Removing penalized states.")
else:
self.info(
"Generating variant codes and recombining structure files.")
self.output_files = []
self.outfilenum = 0
self.current_output_writer = None
self.sts_in_file = 0
self.create_new_out_file_on_next_write = True
self.in_variants = 0
self.in_roots = 0
self.out_variants = 0
self.current_compound_id = None
self.current_variant = 0
current_iontaut_state = None
stereoisomers = [] # list of Structure objects
for input_file in self.input_ligands:
self.info("\nWorking on file: %s" % input_file)
if self.preserve_njobs:
self.create_new_out_file_on_next_write = True
# Go through all structures in a file:
st_num = 0
for st in structure.StructureReader(input_file):
st_num += 1
self.in_variants += 1
# Print progress period:
if st_num % 1000 == 0:
sys.stdout.write(".")
sys.stdout.flush()
self.debug('Input st: %d' % st_num)
# Filter the structure on state penalty:
if remove_states:
skip_state = False
if "r_epik_State_Penalty" in st.property:
# Ev:93901 Remove the state only if both penalties are
# greater than 2.5 (if both exist):
if st.property["r_epik_State_Penalty"] > 2.5:
if "r_epik_Metal_State_Penalty" in st.property:
if st.property[
"r_epik_Metal_State_Penalty"] > 2.5:
skip_state = True
else:
skip_state = True
else:
try:
# No r_epik_State_Penalty property:
if st.property["r_ionizer_Ionization_penalty"] > 2.8:
skip_state = True
if st.property[
"r_ionizer_Ionization_penalty_charging"] > 1.4:
skip_state = True
if st.property[
"r_ionizer_Ionization_penalty_neutral"] > 2.1:
skip_state = True
except KeyError:
found_ligands_wo_penalty = True
if skip_state:
self.debug(' removed high penalty state')
continue # Go to the next state
# Get the compound code of this structure (by unique field):
try:
# Ev:96648 & Ev: 104439
compound_id = pipeutils.read_unique_field(
st, self.uniquefield)
except KeyError:
self.exit("No field %s ligand %s!" %
(self.uniquefield, st_num))
# Get the iontaut state of the structure:
try:
label = st.property["s_lp_label"]
except KeyError:
self.exit(
"ERROR: LigPrep was not run, or was run without the -lab option."
)
# Determine the ion/taut state number of this ligand by label:
iontaut_state = label.split('_stereoizer')[0]
self.debug(' compound_id: %s' % compound_id)
self.debug(' iontaut state: %s' % iontaut_state)
if (compound_id == self.current_compound_id) and (
iontaut_state == current_iontaut_state):
self.debug(' same iontaut state as previous')
# Same iontaut state as previous st (same compound):
stereoisomers.append(st)
# Reached new state (may be same compound or different
# compound):
else:
self.debug(' new state')
if stereoisomers: # Will not happen only for first st
self.debug(" writing old state's stereoisomers")
# Write out stereoisomers of last ion/taut state:
if self.create_new_out_file_on_next_write:
self.debug("creating new output file")
self.nextOutFile()
self.create_new_out_file_on_next_write = False
self.writeBestStereos(stereoisomers)
# Reset the state list:
current_iontaut_state = iontaut_state
stereoisomers = [st]
# Figure out if different compound than previous structure:
if compound_id != self.current_compound_id:
self.debug(' new compound')
self.in_roots += 1
self.current_compound_id = compound_id
self.current_variant = 0
# DONE: All structures were read
if stereoisomers:
if not self.current_output_writer:
# This will happen if the input set has only one iontaut state
self.nextOutFile()
# Write out stereoisomers of the very last ion/taut state:
self.writeBestStereos(stereoisomers)
# Close the last writer:
self.current_output_writer.close()
if found_ligands_wo_penalty:
self.warning(
"Some ligands did not have state penalty information causing all states to be retained"
)
msg = "\nNumber of compounds: %i" % self.in_roots
msg += "\nNumber of input structures: %i" % self.in_variants
msg += "\nNumber of output structures: %i" % self.out_variants
self.info(msg)
self.setOutput(1, pipeio.Structures(self.output_files,
self.out_variants))
# EOF