"""
Support module for Ligfilter applications, including parsing functions,
filtering criteria, constants, and setting up of the default composite
SMARTS patterns.
The basic idea is to provide a set of criteria for filtering structures
based on properties, function evaluation, or collections of SMARTS patterns.
These criteria can be easily specified in an external file.
Examples of criteria definitons:
Molecular_weight < 300 A predefined criterion type
i_qp_#amide >= 1 A property-based criterion
Alcohols == 0 A SMARTS definition matching criterion
s_sd_Asinex A check for the existence of a property
General terminology used in the documentation of this module:
- SMARTS expression - a SMARTS string
- DEFINITION - a named definition, which can be simple (i.e., just a
SMARTS expression) or composite (including/excluding multiple
definitions, whether simple or composite).
- KEY - a definition name or predefined function (e.g., Num_atoms)
- CRITERION - a filtering condition
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Jeff A. Saunders, Matvey Adzhigirey
# ToDo:
#
# Pull out the actual filtering code into a support class, so this
# functionality can be accessed without running the utility.
#
# Rename the default definition file
import os
import schrodinger.job.util as jobutil # For hunt
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.structutils import analyze
#
# Global constants
#
DEFAULT_PATTERNS_FILE = "ligfilter_definitions.lff"
FILTERFILE_EXT = "lff"
#NOTE: '<'&'>' must be last for mysplit() to work correctly:
OPERATORS = ['==', '!=', '<=', '>=', '<', '>']
GATES = ['AND', 'OR']
PROPERTY = 'property'
PREDEFINED = 'predefined'
SMARTS = 'smarts'
ASL = 'asl'
class _MySplit:
def __init__(self):
pass
def parse(self, thestr):
self.thestr = thestr
self.i = -1
SPACES = [' ', '\t', '\n']
outlist = []
char = self.nextChar()
while char is not None:
if char in SPACES:
char = self.nextChar()
continue
# Starting a double-quoted string
if char == '"':
currstr = char
while True:
char = self.nextChar()
if char is None:
break
currstr += char # Add quote or any other char
if char == '"':
break
# Append string including the quotes:
outlist.append(currstr)
if char is not None:
# Get the next char after the ending quote:
char = self.nextChar()
# Pass last char to next loop:
continue
# Starting a single-quoted string
if char == "'":
currstr = char
while True:
char = self.nextChar()
if char is None:
break
currstr += char # Add quote or any other char
if char == "'":
break
# Append string including the quotes:
outlist.append(currstr)
if char is not None:
# Get the next char after the ending quote:
char = self.nextChar()
# Pass last char to next loop:
continue
# Starting a value or property name:
currstr = ''
while True:
currstr += char
char = self.nextChar()
if char is None or char in SPACES:
break
outlist.append(currstr)
# Pass last char to next loop:
continue
return outlist
def nextChar(self):
self.i += 1
try:
return self.thestr[self.i]
except IndexError:
return None
[docs]def mysplit(thestr):
"""
Special version of thestr.split()
The following string: "criteria<value" will be split into:
["criteria", "<", "value"]
Implemented so that spaces would not longer be required in criteria.
"""
#print '\nBEFORE: "%s"' % thestr
string_list = []
currstr = ''
skip = False
for i, char in enumerate(thestr):
if skip:
skip = False
continue
if char not in ['=', '!', '<', '>', '-']:
currstr += char
continue
# Reached the first char in operator
# Read next char:
try:
nextchar = thestr[i + 1]
except IndexError:
nextchar = None
if nextchar == '=':
if currstr:
string_list.append(currstr)
string_list.append(char + nextchar)
currstr = ''
skip = True
elif char in ['<', '>']:
if currstr:
string_list.append(currstr)
string_list.append(char)
currstr = ''
else: # It's not really a token:
currstr += char
if currstr:
string_list.append(currstr)
#print 'OPERATOR SPLIT:', string_list
outlist = []
for thestr in string_list:
if thestr in OPERATORS:
outlist.append(thestr)
else:
s = _MySplit().parse(thestr)
outlist.extend(s)
#print 'AFTER:', outlist
return outlist
[docs]class Criterion:
"""
A base class for a structure matching criterion. Each instance will test
a structure for some property and indicate whether it passes or not.
Attributes
type - The classification of the Criterion. Can be PREDEFINED,
PROPERTY, or SMARTS.
"""
[docs] def __init__(self, name=None, compstr=None):
"""
Parameters
name - the name of the Criterion. See subclasses for meaning, as
it depends on the implementation.
compstr - a comparison string for evaluating the value of the
named property. Examples are 'VALUE < 300' or 'VALUE >= 1'.
If name or compstr are not specified, parseLine() method should be used
If a PROPERTY Criterion has no operator or value, the Criterion is
just the existence of the property in the tested structure.
The reason why comstr is one string instead of two values (operator and number)
is in order to support implementation of Ev:50600 - Add the ability to create
criteria with multiple (boolean) conditions
"""
self.name = name
self.setCompStr(compstr)
[docs] def setCompStr(self, compstr):
"""
Set the compstr attributes according to specified line
Raises RuntimeError if the string is invalid
"""
# self._compstr is equal to self.compstr except that it has "value"
# entered before the operator, and has 'and'/'or' instead of 'AND'/'OR':
if not compstr: # Empty (PropertyCriterion or AslCriterior only)
self.compstr = None
self._compstr = None
return
# Unique case for -add_descriptors option. Causes this criteria to
# always match, no matter what the value is.
if compstr == 'True':
self.compstr = 'True'
self._compstr = 'True'
return
# compstr must be a series of "OPERATOR VALUE" separated by gates (AND/OR)
GATE, OPERATOR, VALUE = list(range(3)) # constants
modified_compstr = ''
#print 'COMPSTR:', compstr
# First token MUST be OPERATOR:
expected_token = OPERATOR
for token in mysplit(compstr):
if expected_token == OPERATOR:
# Expecting '==', '!=', '<=', '>=', '<', '>':
if token not in OPERATORS:
raise RuntimeError('TOKEN "%s" not an operator' % token)
if self.name.startswith('s_'):
if token not in ['==', '!=']:
raise RuntimeError(
'String properties only support operators "==" and "!="'
)
expected_token = VALUE
# "value" is important for match_compstr() to work right:
modified_compstr += " value %s" % token
elif expected_token == VALUE:
# Check to make sure the value is supported for this criterion:
if self.type == PROPERTY:
if self.name.startswith('b_'):
if token not in ['True', 'False']:
raise RuntimeError(
'TOKEN "%s" must be a value of True or False' %
token)
elif self.name.startswith('i_'):
try:
token = int(token)
except ValueError:
raise RuntimeError('TOKEN "%s" not an int value' %
token)
elif self.name.startswith('f_'):
try:
token = float(token)
except ValueError:
raise RuntimeError('TOKEN "%s" not a float value' %
token)
elif self.name.startswith('s_'):
if not token.startswith('"') and not token.startswith(
"'"):
raise RuntimeError(
'TOKEN "%s": Value for string properties must be quoted'
)
elif self.type == SMARTS:
try:
token = int(token)
except ValueError:
raise RuntimeError('TOKEN "%s" not an int value' %
token)
else: # Predefined
try:
token = float(token)
except ValueError:
raise RuntimeError('TOKEN "%s" not a number value' %
token)
expected_token = GATE
modified_compstr += " %s" % token
elif expected_token == GATE:
# Expecting AND or OR
if token not in GATES:
raise RuntimeError('TOKEN "%s" not a gate' % token)
expected_token = OPERATOR
# convert 'AND'/'OR' to 'and'/'or':
modified_compstr += " %s" % token.lower()
# Last token MUST be VALUE:
if expected_token != GATE:
raise RuntimeError('last token is not a value')
# compstr was verified to be OK at this point
#print '_COMPSTR:', modified_compstr
self.compstr = compstr
self._compstr = modified_compstr
[docs] def parseLine(self, line):
"""
Parse a line of the form:
<name>
(Property criterion only)
or:
<name> <oper> <value>
or:
<name> <oper> <value> AND/OR <oper> <value>
Set the name and compstr attributes from the parsed line;
Raises RuntimeError if the string is invalid
"""
s = mysplit(line)
self.name = s[0]
if len(s) < 1:
raise RuntimeError("ERROR Empty line encountered.")
if len(s) == 1 and self.type == PROPERTY:
self.setCompStr('')
elif len(s) == 2:
if len(s[1].split('-')) == 1:
s.insert(1, '==')
self.setCompStr(' '.join(s[1:]))
elif len(s[1].split('-')) == 2:
# Support for dashes specifying value ranges (required by LigPrep):
val = s[1].split('-')
s[1] = '>='
s.append(val[0])
s.append('AND')
s.append('<=')
s.append(val[1])
self.setCompStr(' '.join(s[1:]))
else:
raise RuntimeError("ERROR Failed to parse: %s" % line)
else: # > 3
self.setCompStr(' '.join(s[1:]))
def __str__(self):
"""
Return a standard string form of the Criterion suitable for filter
files.
"""
if self.compstr:
return f"{self.name:<40} {self.compstr}"
else:
return "%-40s" % self.name
[docs] def match_compstr(self, value):
"""
Return True if the value matches self.compstr, False if not.
"""
matches = value # for pychecker
matches = eval(self._compstr)
return matches
[docs] def matches(self, st, addprops=False):
"""
Return True if the structure 'st' matches the criterion, False if not.
OVERWRITE this method in the subclass
st (Structure) - Structure object
addprops (bool) - whether to add properties for each description
"""
[docs] def getvalue(self, st):
"""
Return the value of this criterion in the structure 'st'.
OVERWRITE this method in the subclass
"""
##############################################################################
# Functions to generate pre-defined criteria:
# Each one returns a number (int or float) for predified criteria
# for the specified structure object
##############################################################################
[docs]def Num_rings(st):
"""
Return the number of rings in the structure 'st'.
"""
return len(st.ring)
[docs]def Num_aromatic_rings(st):
"""
Return the number of aromatic rings in the structure 'st'.
"""
num_arom = 0
for ring in st.ring:
if ring.isAromatic():
num_arom += 1
return num_arom
[docs]def Num_aliphatic_rings(st):
"""
Return the number of aliphatic rings in the structure 'st'.
"""
num_alip = 0
for ring in st.ring:
if not ring.isAromatic():
num_alip += 1
return num_alip
[docs]def Num_heteroaromatic_rings(st):
"""
Return the number of aromatic rings containing heteroatoms (N, O, S) in
the structure 'st'.
"""
num_heter = 0
for ring in st.ring:
if ring.isHeteroaromatic():
num_heter += 1
return num_heter
[docs]def Num_rotatable_bonds(st):
"""
Return the number of rotatable bonds in the structure 'st', as
determined by structutils.analyze.get_num_rotatable_bonds().
"""
return analyze.get_num_rotatable_bonds(st)
[docs]def Num_atoms(st):
"""
Return the number of atoms in the structure 'st'.
"""
return st.atom_total
[docs]def Num_heavy_atoms(st):
"""
Return the number of non-hydrogen atoms in the structure
"""
total = 0
for atom in st.atom:
if atom.atomic_number != 1:
total += 1
return total
[docs]def Num_molecules(st):
"""
Return number of molecules in the structure.
"""
return st.mol_total
[docs]def Num_residues(st):
"""
Return number of residues in the structure.
"""
return len(st.residue)
[docs]def Molecular_weight(st):
"""
Return the total molecular weight of the structure 'st'.
"""
return st.total_weight
[docs]def Num_chiral_centers(st):
"""
Return the number of chiral centers in the structure 'st', as determined
by structutils.analyze.get_chiral_atoms().
"""
return len(analyze.get_chiral_atoms(st))
[docs]def Total_charge(st):
"""
Return the total formal charge of the structure 'st'.
"""
return st.formal_charge
[docs]def Num_positive_atoms(st):
"""
Return the number of positive atoms in the structure 'st'.
"""
num_pos = 0
for atom in st.atom:
if atom.formal_charge > 0:
num_pos += 1
return num_pos
[docs]def Num_negative_atoms(st):
"""
Return the number of negative atoms in the structure 'st'.
"""
num_neg = 0
for atom in st.atom:
if atom.formal_charge < 0:
num_neg += 1
return num_neg
def _get_percent_ss_type(st, ss_type):
counted = 0
matched = 0
for atom in st.atom:
ss = atom.property["i_m_secondary_structure"]
if atom.element == 'H':
assert ss == structure.SS_NONE
continue
counted += 1
if ss == ss_type:
matched += 1
try:
percent = float(matched) * 100.0 / counted
except ZeroDivisionError:
percent = 0.0
return round(percent, 6)
[docs]def Percent_helix(st):
return _get_percent_ss_type(st, structure.SS_HELIX)
[docs]def Percent_strand(st):
return _get_percent_ss_type(st, structure.SS_STRAND)
[docs]def Percent_loop(st):
return _get_percent_ss_type(st, structure.SS_LOOP)
#### Dictionary linking Predefined "name" to a module function:
# Please document any additions in the PredefinedCriterion class docstring.
predefined_function_dict = {
'Num_rings': Num_rings,
'Num_aromatic_rings': Num_aromatic_rings,
'Num_aliphatic_rings': Num_aliphatic_rings,
'Num_heteroaromatic_rings': Num_heteroaromatic_rings,
'Num_rotatable_bonds': Num_rotatable_bonds,
'Num_atoms': Num_atoms,
'Molecular_weight': Molecular_weight,
'Num_chiral_centers': Num_chiral_centers,
'Total_charge': Total_charge,
'Num_positive_atoms': Num_positive_atoms,
'Num_negative_atoms': Num_negative_atoms,
'Num_heavy_atoms': Num_heavy_atoms,
'Num_molecules': Num_molecules,
'Num_residues': Num_residues,
'Molecular_formula': Molecular_formula,
'Percent_helix': Percent_helix,
'Percent_strand': Percent_strand,
'Percent_loop': Percent_loop,
}
PREDEFINED_KEYS = list(predefined_function_dict)
##############################################################################
##############################################################################
[docs]class PropertyCriterion(Criterion):
"""
A structure matching criterion that acts on the presence or value of a
specific structure property.
If no comparison string is provided, the criterion will check for the
presence of property 'name'. Otherwise it will compare the value against
the comparison string definition.
"""
[docs] def __init__(self, name=None, compstr=None):
"""
Parameters
name - the name of the property being evaluated
compstr - the property comparison string to be used if present
currently in format "<operator> <value>"
If name or compstr are not specified, parseLine() method should be used
"""
self.type = PROPERTY
Criterion.__init__(self, name, compstr)
[docs] def matches(self, st, addprops=False):
"""
Return True if structure 'st' matches this criterion, False if not.
st (Structure) - Structure object
addprops (bool) - ignored for property criterions
"""
# This docstring should be kept in sync with other 'matches'
# docstrings.
# FIXME make <addprops> work
try:
value = st.property[self.name]
except KeyError:
# property does not exist == Doesn't match.
return False
else:
# Property exists.
if not self.compstr:
# We just wanted to check for existance of property
return True
else:
# We wanted to know if the value for property is within range
# Check is value is within range:
return self.match_compstr(value)
[docs] def getvalue(self, st):
"""
Return the value of the property for this structure.
Returns None if the property does not exist.
"""
try:
return st.property[self.name]
except KeyError:
# property does not exist
return None
[docs]class SmartsCriterion(Criterion):
"""
A structure matching criterion that looks for a match to a Definition
instance, which is comprised of a collection of SMARTS patterns.
For example, for the Definition 'TwoCarbons' that matches against the
SMARTS patterns [#6][#6], the comparison string
TwoCarbons < 40
will match if there are less than 40 carbon-carbon bonds in the
structure.
"""
[docs] def __init__(self, definition, compstr=None):
"""
Parameters
definition - a Definition instance, specifying the SMARTS
pattern(s) to be included and excluded
compstr - the comparison string to be used.
Currently equal to "<operator> <value>"
Used by the expand() method
If compstr are not specified, parseLine() method should be used
"""
self.type = SMARTS
Criterion.__init__(self, definition.name, compstr)
self.definition = definition
def _count_occurances(self, st):
"""
Retrieve a count of 'includes' matches in the structure, but do not
count any matches that have atoms that are also present in any
'excludes' matches.
"""
# Get the atomsets for groups that match includes SMARTS pattern.
# For each group, see if it also matches the excludes patterns.
# If so, do not count it. Note, that excludes will need to look at
# not only the atoms matching includes, but surrounding atoms as well.
# Currently this works correctly only for single definitions, NOT
# compound definitions (groups of simple definitions)
inc = self.definition.includes()
exc = self.definition.excludes()
try:
inlist = analyze.evaluate_multiple_smarts(st, inc)
exlist = analyze.evaluate_multiple_smarts(st, exc)
except ValueError as err:
raise RuntimeError(err)
# Remove duplicate matches (Ev:84352):
inset = set()
for atoms in inlist:
inset.add(tuple(sorted(atoms)))
exset = set()
for atoms in exlist:
exset.add(tuple(sorted(atoms)))
# Check to see if any inatoms matches any exatoms:
num_matches = 0
for inatoms in inset:
exclude_inset = False
for exatoms in exset:
# To remove duplicate matches (Ev:84352):
inset_in_exset = True
for atom in inatoms:
if atom not in exatoms:
inset_in_exset = False
break
if inset_in_exset:
exclude_inset = True
break # Get out of exatoms loop
if not exclude_inset:
num_matches += 1
return num_matches
[docs] def matches(self, st, addprops=False):
"""
Return True if structure 'st' matches this criterion, False if not.
Current matching behavior is to count the number of matches in the
definition.includes() list, that do not have any overlapping atoms
with matches in the definition.excludes() list.
st (Structure) - Structure object
addprops (bool) - whether to add properties for each description
"""
if not self.compstr:
raise RuntimeError("matches(): compstr not defined")
num = self._count_occurances(st)
if num < 0:
raise RuntimeError(
"Structure had more exclude-definition matches than include-definition matches!"
)
if addprops:
propname = 'i_ligfilter_%s' % self.name
st.property[propname] = num
return self.match_compstr(num)
[docs] def getvalue(self, st):
"""
Return the number of times that definition.includes() patterns match
the structure but do not overlap with any definition.excludes()
patterns.
"""
return self._count_occurances(st)
[docs] def expand(self, definitions):
"""
Generate a new SmartsCriterion from the current one in which the
definition.includes() and definition.excludes() are expanded from
the definitions list.
"""
newdefinition = self.definition.expand(definitions)
return SmartsCriterion(newdefinition, self.compstr)
[docs]class PredefinedCriterion(Criterion):
"""
A structure matching criterion that acts on the value of a predefined
function applied to the structure.
Currently available functions are:
Num_rings
Num_aromatic_rings
Num_aliphatic_rings
Num_heteroaromatic_rings
Num_rotatable_bonds
Num_atoms
Molecular_weight
Num_chiral_centers
Total_charge
Num_positive_atoms
Num_negative_atoms
For example, one definition parseable from the external file is:
Num_rings == 0
"""
[docs] def __init__(self, name=None, compstr=None):
"""
Parameters
name - the name of the function to use. Allowed values are those
in ligfilter.PREDEFINED_KEYS.
compstr - the comparison string to evaluate the result of the
predefined function against
"""
self.type = PREDEFINED
Criterion.__init__(self, name, compstr)
self._function = None
[docs] def matches(self, st, addprops=False):
"""
Return True if structure 'st' matches this criterion, False if not.
st (Structure) - Structure object
addprops (bool) - whether to add properties for each description
"""
# This docstring should be kept in sync with other 'matches'
# docstrings.
if not self.compstr:
raise RuntimeError("matches(): compstr not defined")
if not self._function:
self._function = predefined_function_dict[self.name]
num = self._function(st)
if addprops:
if type(num) == type(0):
propname = 'i_ligfilter_%s' % self.name
elif type(num) == type(0.0):
propname = 'r_ligfilter_%s' % self.name
elif isinstance(num, str):
# e.g. Molecular_formula
propname = 's_ligfilter_%s' % self.name
else:
raise ValueError("Invalid type: %s" % num)
st.property[propname] = num
return self.match_compstr(num)
[docs] def getvalue(self, st):
"""
Return the value of the predefined function applied to 'st'.
For example, return the number of rings, or number of rotatable
bonds.
"""
if not self._function:
self._function = predefined_function_dict[self.name]
num = self._function(st)
return num
[docs]class AslCriterion(Criterion):
"""
This criterion considers a Structure as matching if the stored
ASL expresson match returns at least one atom.
"""
[docs] def __init__(self, asl):
"""
Parameters
asl - the ASL expression string.
"""
Criterion.__init__(self, asl)
self.type = PREDEFINED
[docs] def matches(self, st, addprops=False):
"""
Return True if structure 'st' matches this ASL criterion, False if not.
st (Structure) - Structure object
addprops (bool) - whether to add properties for each description
"""
matched = self.getvalue(st)
if addprops:
propname = 'b_ligfilter_asl'
st.property[propname] = matched
return matched
[docs] def getvalue(self, st):
"""
Return True if the structure 'st' matches this ASL. Flase otherwise.
"""
matched_atoms = analyze.evaluate_asl(st, self.name)
return bool(len(matched_atoms) > 0)
[docs]class Definition:
"""
A class that defines a collection of SMARTS patterns for matching
against. The includes() method returns a list of those patterns that
should be matched, and the excludes() method returns those that
shouldn't.
"""
[docs] def __init__(
self,
name,
includes=[], # noqa: M511
excludes=[], # noqa: M511
group=None):
"""
Parameters
includes - a list of SMARTS patterns to count
excludes - a list of SMARTS patterns that can be used to exclude
matches in the includes list
group - name of the group that this definition is part of
(optional). See Ev:50599
"""
self.name = name
# Make a copy of the lists so that originals don't get modified:
self._includes = includes[:]
self._excludes = excludes[:]
self.group = group
[docs] def addKey(self, key, positive=True):
"""
Add the SMARTS pattern 'key' to the list of desired matches
(includes) if 'positive' is True, and to the list of unwanted
matches (excludes) if 'positive' is False.
"""
if positive:
self._includes.append(key)
else:
self._excludes.append(key)
[docs] def removeKey(self, key):
"""
Remove the SMARTS pattern 'key' from the wanted or unwanted matches
list.
"""
if key in self._includes:
self._includes.remove(key)
elif key in self._excludes:
self._excludes.remove(key)
else:
raise RuntimeError("Key '{}' not in definition '{}'".format(
key, self.name))
[docs] def includes(self):
"""
Return a list of wanted matches.
"""
return self._includes
[docs] def excludes(self):
"""
Return a list of unwanted matches.
"""
return self._excludes
def _expandIncludes(self, inclist, definitions, masterlist):
"""
For the provided list of Definitions 'inclist', expand any composite
definitions, using the contents of the 'definitions' list of
Definitions.
Return the expanded definitions via the parameter 'masterlist'.
For example, if a Definition TwoCarbons in 'definitions' is made up
of the "includes" [C][C] and [c][c], and 'inclist' includes
TwoCarbons, then [C][C] and [c][c] will be added to 'masterlist'.
"""
# TODO: Do we need a guards against circular or contradictory
# definitions?
# Modify the masterlist - probably not a good programming practice:
for inc in inclist:
if inc in definitions:
self._expandIncludes(definitions[inc]._includes, definitions,
masterlist)
else:
masterlist.append(inc)
def _expandExcludes(self, exclist, definitions, masterlist):
"""
For the provided list of Definions 'exclist', expand any composite
definitions, using the contents of the 'definitions' list of
Definitions.
Return the expanded definitions via the parameter 'masterlist'.
Note that the expansion uses the "includes" of the 'definitions'
list to generate the masterlist. For example, if a Definition
TwoCarbons in 'definitions' is made up of the "includes" [C][C] and
[c][c], and 'exclist' includes TwoCarbons, then [C][C] and [c][c]
will be added to 'masterlist'.
"""
# TODO: The behavior described above may not be correct. Should we
# prohibit excluded definitions that themselves exclude other
# definitions?
# TODO: Do we need a guards against circular or contradictory
# definitions?
# Modify the masterlist - probably not a good programming practice:
for exc in exclist:
if exc in definitions:
if definitions[exc]._excludes:
raise RuntimeError(
"Error excluding '%s' -- a definition with excludes (-) cannot be included in another definition"
% exc)
self._expandIncludes(definitions[exc]._includes, definitions,
masterlist)
else:
masterlist.append(exc)
[docs] def expand(self, definitions):
"""
Generate a new Definition from the current one in which the
includes and excludes are expanded from the provided 'definitions'
dictionary.
"""
newincludes = []
self._expandIncludes(self._includes, definitions, newincludes)
newexcludes = []
self._expandExcludes(self._excludes, definitions, newexcludes)
return Definition(self.name, newincludes, newexcludes)
def __str__(self):
"""
Return a standard string form of the Definition suitable for
filter/keys files.
"""
s = []
s.append("DEFINE %s" % self.name)
if len(self._includes) == 1 and not self._excludes:
s.append(self._includes[0])
return " ".join(s)
else:
for i in self._includes:
s.append(" + %s" % i)
for e in self._excludes:
s.append(" - %s" % e)
return '\n'.join(s)
[docs]class CriterionParser:
"""
A class for parsing a general property or predefined matching criterion.
"""
[docs] def __init__(self, definitions_dict=None):
self.definitions_dict = definitions_dict
self.line = None
self.line_num = None
[docs] def error(self, err):
"""
Print the error and exit.
"""
if self.line_num:
err = 'Line %i: %s' % (self.line_num, err)
raise RuntimeError(err)
[docs] def expression_error(self, msg):
"""
Print an error about an invalid expression and exit.
"""
self.error("\n ".join([
"ERROR: Cound not parse expression due to:", msg,
"Expression: %s" % self.line,
"Ligfilter expression needs to be in format: <definition> [<operator> <value>];",
"where <definition> is a property name or SMARTS definition (no spaces allowed)",
"Multiple [<operator> <value>] sets must be separated with 'AND' or 'OR'"
]))
[docs] def parse(self, line, line_num=None):
"""
Create a Criterion object from a string. The method expects an
input line of the form
<name>
...or...
<name> <operator> <value>
The first form is valid only for property criteria.
If the instance has a 'definitions_dict', definition criteria will
be checked against it for validity.
Returns a Criterion.
"""
self.line = line
self.line_num = line_num
try:
name = mysplit(line)[0]
except IndexError:
msg = 'parse(): Could not parse line: "%s"' % line
raise RuntimeError(msg)
criterion = None # so it goes into this namespace
if line[:2] in ['b_', 's_', 'i_', 'r_']: # Property
#self.error("Only integer or real type properties can be compared.")
criterion = PropertyCriterion()
else:
# Definition (SMARTS) or predefined criterion
if name in PREDEFINED_KEYS:
criterion = PredefinedCriterion()
else: # Probably SMARTS
if self.definitions_dict and name not in self.definitions_dict:
self.error("Unknown definition or property: %s" % name)
elif self.definitions_dict:
criterion = SmartsCriterion(self.definitions_dict[name])
else:
# Unvalidated criterion. Should this be allowed at all?
criterion = SmartsCriterion(Definition(name))
try:
criterion.parseLine(line)
except RuntimeError as err:
self.expression_error(str(err))
return criterion
[docs]class DefinitionParser:
"""
A class for parsing a (possibly multi-line) specification of a
Definition.
"""
[docs] def __init__(self):
self.lines = None
self.line_num = None
[docs] def error(self, err):
"""
Print an error and exit.
"""
if self.line_num:
err = 'Line %i: %s' % err
print(err)
raise RuntimeError(err)
[docs] def parse(self, lines, line_num=None, group=None):
"""
Return a Definition from a list of lines. No expansion of
definitions is done.
General pattern of the specification is
DEFINE <name> <SMARTS pattern>
or
DEFINE <name>
(+ include_definition)*
(- exclude_definition)*
Where the asterisk indicates zero or more of each of the include and
exclude definitions.
Options:
line_num - current line of the file being parsed (for error handling)
group - name of the definition group (or None, if there is no group)
"""
firstline = lines[0]
s = mysplit(firstline)
if s[0] != 'DEFINE':
self.error("Definition expression must start with 'DEFINE'")
if len(s) < 2 or len(s) > 3:
self.error("Invalid DEFINE statement: %s" % firstline)
name = s[1] # The second "word" of the line
if name in PREDEFINED_KEYS:
self.error("Cannot redefine built-in definition: %s" % name)
if name[0:2] in ['s_', 'i_', 'r_', 'b_']:
self.error("Invalid definition name: %s" % name)
if len(s) == 3: # 3 "words" on this line
# Single-line definition. Value should be a SMARTS definition.
return Definition(name, includes=[s[2]], group=group)
else:
# Multi-line definition
definition = Definition(name, group=group)
for line in lines[1:]:
line_num += 1
s = mysplit(line)
if not s: # Blank line
continue
if len(s) != 2: # Have to have 2 "words" on each line
self.error("Invalid definition line: %s" % line)
inc_exc = s[0]
value = s[1]
if inc_exc == '+':
definition.addKey(value)
elif inc_exc == '-':
definition.addKey(value, False)
else:
self.error("Invalid definition line: %s" % line)
if not definition.includes():
self.error("Definition '%s' has no includes" % definition.name)
return definition
[docs]def read_keys(fh, validate=False, validdefinitions=None):
"""
Generate lists of Definitions and Criteria from an iterator 'fh'
that returns a line at a time of the Definition and Criteria
specification. For example, this iterator can be an open file or a list
of strings.
If 'validate' is True, definition names in criteria will be checked
against known Definitions, including those previously read from 'fh' and
passed in via 'validdefinitions'. No expansion of Definitions is done.
Return a tuple of (Definition list, Criterion list).
"""
# Make a copy to avoid editing original:
if validdefinitions:
myvaliddefinitions = validdefinitions[:]
else:
myvaliddefinitions = []
criteria = []
definitions = []
line_num = 0
currentdefinitionlines = []
currentdefinitionlinenum = 0
currgroup = None
dp = DefinitionParser()
for line in fh:
line = line.strip() # Get rid of leading and trailing spaces/tabs
line_num += 1
# Skip "#" lines and lines beginning with "# " (comments):
# Note: Comments MUST have a space after #. This is implemented
# to support defenitions that start with a pound.
if not line or line == "#" or line.startswith('# '):
continue
line = line.split('# ')[0] # Delete trailing comment
s = mysplit(line)
if s[0] == "GROUP":
# The group is everything after the space following the keyword:
# (spaces are supported)
currgroup = line[6:]
elif s[0] == "GROUPEND":
currgroup = None
elif s[0] == 'DEFINE':
if currentdefinitionlines:
# The definition we were reading is done
newdefinition = dp.parse(currentdefinitionlines,
currentdefinitionlinenum,
group=currgroup)
# Start the new definition definition
definitions.append(newdefinition)
myvaliddefinitions.append(newdefinition)
currentdefinitionlines = [line]
currentdefinitionlinenum = line_num
elif s[0] == '+':
currentdefinitionlines.append(line)
elif s[0] == '-':
currentdefinitionlines.append(line)
else:
# Criterion
if currentdefinitionlines:
# The definition we were reading is done.
newdefinition = dp.parse(currentdefinitionlines,
currentdefinitionlinenum,
group=currgroup)
definitions.append(newdefinition)
myvaliddefinitions.append(newdefinition)
currentdefinitionlines = []
currentdefinitionlinenum = 0
if validate:
definitions_dict = {}
for d in myvaliddefinitions:
definitions_dict[d.name] = d
cp = CriterionParser(definitions_dict)
newcriterion = cp.parse(line, line_num)
else:
cp = CriterionParser()
newcriterion = cp.parse(line, line_num)
criteria.append(newcriterion)
if currentdefinitionlines:
# The definition we were reading is done.
newdefinition = dp.parse(currentdefinitionlines,
currentdefinitionlinenum,
group=currgroup)
definitions.append(newdefinition)
myvaliddefinitions.append(newdefinition)
return (definitions, criteria)
#
# Find and read the default definitions from the Schrodinger installation.
#
try:
SCHRODINGER = os.environ['SCHRODINGER']
except KeyError:
raise Exception("SCHRODINGER is not defined.")
if 'MMSHARE_EXEC' in os.environ:
MMSHARE_EXEC = os.environ['MMSHARE_EXEC']
else:
# Hunt for mmshare:
try:
MMSHARE_EXEC = jobutil.hunt("mmshare")
except:
MMSHARE_EXEC = None
if not MMSHARE_EXEC:
raise Exception("Could not determine MMSHARE_EXEC.")
# Search for the standard LigFilter definitions file:
# First search in CWD, then in mmshare:
MMSHARE_datadir = os.path.join(MMSHARE_EXEC, "..", "..", "data")
mmshare_data_file = os.path.join(MMSHARE_datadir, DEFAULT_PATTERNS_FILE)
try:
appdata_dir = mm.mmfile_schrodinger_appdata_dir()
except:
raise RuntimeError(
"Could not determine the Schrodinger application data directory.")
appdata_file = os.path.join(appdata_dir, DEFAULT_PATTERNS_FILE)
# Search: 1) CWD, 2) .schrodinger 3) MMSHARE/data:
if os.path.isfile(DEFAULT_PATTERNS_FILE):
_definition_file_name = DEFAULT_PATTERNS_FILE
print('Using local copy of %s' % DEFAULT_PATTERNS_FILE)
elif os.path.isfile(appdata_file):
_definition_file_name = appdata_file
else:
_definition_file_name = mmshare_data_file
# Open the definition file:
try:
_definition_fh = open(_definition_file_name)
except:
raise Exception("Failed to open %s file." % _definition_file_name)
# Read the definitions (and criteria, if present) from the definitions file:
(default_definitions, _criteria) = read_keys(_definition_fh, validate=True)
_definition_fh.close()
if _criteria:
print("")
print("WARNING: There are criteria in the default definitions file. They")
print(" will be ignored.")
print("")
[docs]def get_default_criterion_parser():
"""
Returns a CriterionParser with default definitions
"""
# Get the default definition definitions
definitions_dict = {}
for definition in default_definitions:
definitions_dict[definition.name] = definition
# Expand the definitions (and criteria) before making the matches
# It might be necessary only to expand the definitions in the Criterion
# objects, since the definitions_dict doesn't get used after this.
newdict = {}
for dname, d in definitions_dict.items():
newdict[dname] = d.expand(definitions_dict)
definitions_dict = newdict
cp = CriterionParser(definitions_dict)
return cp
[docs]def generate_criterion(condition, cp=None):
"""
Ev:55805
Returns a Criterion object for a specified condition string.
Condition string may be something like:
"Num_atoms < 100"
The returned criterion can be then used as follows:
if criterion.matches(st):
<do>
Optionally a CriterionParser (cp) may be specified;
otherwise default definitions will be used.
"""
if cp is None:
cp = get_default_criterion_parser()
criterion = cp.parse(condition)
return criterion
[docs]def st_matches_criteria(st, criteria_list, match_any=False, addprops=False):
"""
If the specified structure matches the criteria, returns None. If does not
match, then a string is returned, explaining the reason.
match_any - if True, st is considers to match if it matches at least
one criteria; otherwise all criteria must be matched.
addprops - if True, properties for each descriptor is added to st.
"""
# Sort criteria by order of execution (fast execution first):
sorted_criteria = []
for c in criteria_list:
if c.type == PROPERTY:
sorted_criteria.append(c)
for c in criteria_list:
if c.type == SMARTS:
sorted_criteria.append(c)
for c in criteria_list:
if c.type == PREDEFINED:
sorted_criteria.append(c)
for c in criteria_list:
if c.type == ASL:
sorted_criteria.append(c)
for c in sorted_criteria:
match = c.matches(st, addprops=addprops)
if match and match_any:
return None
if not match and not match_any:
return "did not match criteria: %s" % c.name
if match_any:
# No matches found
return "did not match any createria"
else:
# No non-matches found
return None
#EOF