Source code for schrodinger.application.ligprep.ionizer

# -*- coding: utf-8 -*-
"""

Ionizer module.
Ported from ionizer.pl on February 2014.

Copyright (c) Schrodinger, LLC. All rights reserved.
"""
# Contributors: Matvey Adzhigirey

import math
import os
from past.utils import old_div

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import mmbitset
from schrodinger.infra import mmlist
from schrodinger.utils import log

ION_FRAGLIB_DEF = "ionized"
MYERRHAND = mm.MMERR_DEFAULT_HANDLER

# Indent levels for log text
IND = "  "
IND2 = IND * 2
IND3 = IND * 3
IND4 = IND * 4

# Control spec file
SPEC_FILE_DEF = "ionizer.ini"  # via mmfile

COMMENT_CHAR = "#"

# Generation restriction parameters & defaults

MAX_GROUPS_DEF = 15  # lower, on behalf of mass screeners
MAX_GROUPS_MAX = 31  # hard limit (due to signed 4-byte int)
# Assuming 31 bits to represent a positive integer value, then enforcing
# a limit of 31 possible ion centers allows a great simplification to indexing
# & iterating of possible ionization states.

MAX_IONS_DEF = 4
MAX_ABSTOTQ_DEF = 2

# Default maximum output CTs per input structure:
MAX_OUTCTS_DEF = 512

PH_DEF = 7.0
PH_THRESH_DEF = 2.0
PK_THRESH_DEF = 1000.0  # effectively unlimited

# Miscellaneous
MIN_FRAG_ATOMS = 2
PH_PRECISION = 0.01  # Jasna says 0.1 would suffice

# Derived
PH_DIFF_TOL = old_div(PH_PRECISION, 10.)

# Constants for ionization state energy cost calculations

RT = 0.001987207 * 298.15  # units: kcal/mol
RT_LN_10 = RT * math.log(10.0)

# For CT-level property annotation, to get sortable column of
# numbers in Maestro project table; this m2io dataname spelling
# leads to project table column title "Tot Q", which matches
# the string affixed to each ion state's description string
# (see state_desc, below)
M2IO_DATA_TOTAL_CHARGE = "i_ionizer_Tot_Q"

# CT-level property for ionization state number
M2IO_DATA_ION_STATE_NUM = "i_ionizer_Ion_state_n"

# CT-level property names for ionization center atom info
M2IO_DATA_CENTER_ATOMS = "s_ionizer_Ion_centers"
M2IO_DATA_CENTER_TYPES = "s_ionizer_Ion_types"
M2IO_DATA_CENTERS_ORIG = "s_ionizer_Ion_ctrs_in"

# CT-level property names for ionization state energy costs
M2IO_DATA_COST_TOTAL = "r_ionizer_Ionization_penalty"
M2IO_DATA_COST_CHARGE = "r_ionizer_Ionization_penalty_charging"
M2IO_DATA_COST_NEUT = "r_ionizer_Ionization_penalty_neutral"
M2IO_DATA_RETAIN = "i_lp_orig_ion_str"

#
# The following will be used as an extension to mmfrag's use of
# MMFILE_PATH_MMSHARE_DATA (defined in mmfile.h)

# For this use, the env vbl MMSHARE_EXEC does not need to be
# expanded in advance; mmfile_path_set() will expand it --
# assuming it's set, which will be the case when running via
# the utility wrapper

# If attempting to run outside of the utility wrapper, and
# MMSHARE_EXEC has not been set, then mmfile_path_set() will
# replace $MMSHARE_EXEC with blank, prepend that bogus dir
# on the mmfile search path, and then the mmfrag re-init will
# revert to the customary mmfrag.ini in mmshare/data/res, and
# subsequent attempts to find 'mmshare' custom mmfrag lib(s)
# will fail; thus, I will verify this setting explicitly in
# other code below
MMSHARE_DATA_DIR = r"\$MMSHARE_EXEC/../../data"

# Use of ':' as delimiter between dirs in the mmfile search
# path is hard-coded in mmfile.c (in more than one place)
MMFILE_PATH_DELIM = ":"


[docs]def format_charge(charge): """ Return a string representation of the charge. "-" will be prepended for negative numbers, "+" for positive, and nothing for zero. """ if charge == 0: return "0" else: return "%+i" % charge
[docs]def get_bonded_hydrogens(atom): """ Return a list of atom numbers of all Hydrogens bonded to given atom. """ return [a.index for a in atom.bonded_atoms if a.atomic_number == 1]
[docs]def prepend_mmfile_path(pdir): """ Add the given path to the mmfile search path. This makes it possible to search for the mmfrag.ini file in custom locations. """ path = mm.mmfile_path_get() path = pdir + MMFILE_PATH_DELIM + path mm.mmfile_path_set(path)
[docs]def free_energy_costs(x): """ For a given pKa - pH difference X, this function calculates and returns RT.ln(10^X + 1) and RT.ln(10^(-X) + 1) """ if x <= -20.0: # Approximation for low X cost = 0.0 elif x >= +20.0: # Approximation for high X; avoids 10.**x overflow cost = RT_LN_10 * x else: # Use exact formula when X is in normal range cost = RT * math.log(10.0**x + 1.0) # Now calculate RT.ln(10^(-X) + 1), which equals to: # RT.ln(10^X + 1) - RT.ln(10).X other_cost = cost - RT_LN_10 * x return (cost, other_cost)
# # Class definitions: #
[docs]class TolerableError(Exception): """ Raised when the structure could not be processed due to a non-fatal error. """
[docs]class TooManyGroupsError(Exception): """ Raised when the structure could not be processed because it has too many ionizable groups. """
[docs]class Fragment(object): """ Class representing a fragment from the ionizer library. """
[docs] def __init__(self, st, connect_to, chargef): self.st = st self.connect_to = connect_to self.chargef = chargef
[docs]class Spec(object): """ This class represents a specification line in the ionizer data file. """
[docs] def __init__(self, cmd, linenum, mmsubs_pattern, mmsubs_handle): self.cmd = cmd self.linenum = linenum self.mmsubs_pattern = mmsubs_pattern self.mmsubs_handle = mmsubs_handle
def __del__(self): mm.mmsubs_delete(self.mmsubs_handle)
[docs] def getMatches(self, st): """ Run a substracture search for this pattern in the specified structure. :rtype: list of int lists. :return: A list of matches, where each list is a list of atom indices. """ try: raw_matches = mm.mmsubs_match_comp(self.mmsubs_handle, st.handle, True) except mm.MmException: raise RuntimeError( "Substructure pattern matching failed" + "Used mmsubs handle mmsubs_handle for pattern --" + self.mmsubs_pattern) matches = [] for match_mmlist in raw_matches: matches.append(mmlist._mmlist_to_pylist(match_mmlist)) mm.mmlist_delete(match_mmlist) return matches
[docs]class Ionizer(object): """ Class for ionizing structures (part of LigPrep). To ionize a structure, do: i = Ionizer() out_sts = i.ionizeStructure(st) These options can be adjusted after instantiating the class: :ivar max_ions: Maximum number of ionizations. :ivar max_abstotq: Maximum absolute value of total charge. :ivar max_groups: Maximum ionizational groups to handle (up to MAX_GROUPS_MAX). :ivar max_outcts: Maximum output CTs per input CT. :ivar retain: Whether to retain the input structure in the output. :ivar retain_lab: Retain the input structure that has "i_lp_orig_ion_str" property set. :ivar show_match_atoms: Whether to report substructure pattern matches to the logger (log file). :ivar show_skips: Whether to show skipped state generations. Applies only to log levels 1 and up. :ivar keep_props: Whether to keep any structure-based properties from the input CT in the output CTs. :ivar retitle_with: Prefix for adding ion state numbers to output CT titles. :ivar logger: Logger instance. When processing CTs, any info, warning, and error messages will be redirected here. :ivar show_final_list: Report final ionization candidate list to logger. """
[docs] def __init__( self, loglevel=0, specfile=None, ph=None, ph_thresh=None, pk_thresh=None, ): """ :type loglevel: int :param logleve: Reporting log level. Use 0 for quietest (default); use 1 to log state generations; use 2 to log ion frag fusions also. :type specfile: str or None :param specfile: Path to the non-standard patterns spec file to use. :type ph: float or None :param ph: Effective pH of active site. Does not apply to pK-only mode. :type ph_thresh: float or None :param ph_thresh: De-protonated `|pKa-pH|` difference limit. Does not apply to pK-only mode. :type pk_thresh: float or None :param pk_thresh: Strong/weak pK threshold (no default). Overrides pH-based rejection mode and activates pK-only mode. Reject on pKa values only (no pH). """ self.loglevel = loglevel self.specfile = specfile if pk_thresh is not None: if ph is not None: raise ValueError("ph option is not supported when pk_thresh " "is set") if ph_thresh is not None: raise ValueError("ph_thresh option is not supported when " "pk_thresh is set") if ph is not None: self.ph = ph else: self.ph = PH_DEF if ph_thresh is not None: self.ph_thresh = ph_thresh else: self.ph_thresh = PH_THRESH_DEF if pk_thresh is not None: self.pk_thresh = pk_thresh self.ph = None # trigger non-default, pK-only mode else: self.pk_thresh = PK_THRESH_DEF # Other customizable options: self.max_ions = MAX_IONS_DEF self.max_abstotq = MAX_ABSTOTQ_DEF self.max_groups = MAX_GROUPS_DEF self.max_outcts = MAX_OUTCTS_DEF self.retain = False self.retain_lab = False self.str_to_retain = False # default: flag to mark the orig_str self.show_match_atoms = False self.show_skips = False self.keep_props = False self.retitle_with = None # Undocumented option to exclude from consideration an ion center atom # if it has a growname on it. This is provided for use in the VCS # product. There, the Ionizer may be applied to frag libs with # grownames in place, identifying mmfrag-style growbonds, to be used in # subsequent combinatorial growing. We don't want any ionizing fusions # disrupting the grow info. self.avoid_grow = False # Undocumented option to disable post-fuse revision of certain atom # properties (e.g., PDB residue name) in new ionized center atom and # any new H atoms. self.revise_ion_groups = True # Undocumented option to turn off adjustemnt of stereo-chemical # properties; would be useful if a serious bug were discovered in the # adjustment operation (no evidence, so far). self.stereo_adjust = True # Can be replaced with a custom logger later: self.logger = log.get_output_logger("ionizer.py") # Since log level >= 1 leads to showing ionizable atom numbers, # have any such setting imply -sf option, to show details on # all ionizable atoms self.show_final_list = (self.loglevel >= 1) # End of customizable options. # Ionized frag data (dicts on frag names). Used in ionizeCommandParser(). self.frags = {} # Ionization control specs & support self.control_specs = [] # Might as well do here: Add some slack to pK & pH thresholds # to ignore f.p. imprecisions in diff's to be calculated self.pk_thresh_adj = self.pk_thresh + PH_DIFF_TOL self.ph_thresh_adj = self.ph_thresh + PH_DIFF_TOL # Init relevant mmlibs self.initializeMmlibs() # Do all one-time preparations involving the mmfrag lib of # ionized fragments (for fusions in place of neutrals) # collect & store relevant data on all ionized frags self.readFragmentLibrary(ION_FRAGLIB_DEF) # Open ionizer.ini (or whatever), read lines, parse out # the control specs, and store them for later use self.readSpecFile()
def __del__(self): # Relase the specs' mmsubs handles: self.control_specs = [] # On general principles, trying to keep sequences of initialize() & # terminate() calls consistent with dependencies implicit in # compiled programs' link command library flag order mm.mmbuild_terminate() mm.mmsubs_terminate() mm.mmfrag_terminate()
[docs] def initializeMmlibs(self): """ Initialize the mmlibs libraries. """ mm.mmfrag_initialize(MYERRHAND) mm.mmsubs_initialize(MYERRHAND) mm.mmbuild_initialize(MYERRHAND)
[docs] def readFragmentLibrary(self, flib_name): """ Read the ionized frag library and populate self.frags dictionary. The ionized frag lib contains a fairly small number of fragments, all or most of which will be fused in a typical run; therefore we just collect information on all of them now. """ ion_fraglib_handle = mm.mmfrag_new(flib_name) # Prior to any building with fragments in a frag lib, # you have to set a specific grow direction, so here, # just set to first available grow dir as a reasonable # default; it's usually named "forward" (or similar) dir_name = mm.mmfrag_get_direction(ion_fraglib_handle, mm.MM_FIRST) mm.mmfrag_set_direction(ion_fraglib_handle, dir_name) self.frags = {} # Read the first fragment from the library: try: frag_name, frag_ct_handle = mm.mmfrag_duplicate_fragment( ion_fraglib_handle, mm.MM_FIRST) except mm.MmException as e: raise RuntimeError('Failed to get first fragment of "%s" lib' % flib_name) frag_ct = structure.Structure(frag_ct_handle) while True: # Collect additional info about this fragment.... # Get atom number for this frag's 1st connection's # connect-to atom; this is related to the earlier # selection of frag lib grow direction (I think) try: conn_to, conn_from, new_from, new_to = mm.mmfrag_get_connections( ion_fraglib_handle, frag_ct.handle, mm.MM_FIRST) except mm.MmException: raise RuntimeError('Cannot get "%s" frag connection atoms' % frag_name) # Get total of frag atoms' formal charges # Note: Frag's total charge is used later to decide which # ionization states (out of all possible ones) to actually # generate; it better be true that the ultimate "fuse" of # this frag's CT onto the main CT doesn't drop any charged # atoms! chargef_tot = 0 for atom in frag_ct.atom: chargef_tot += atom.formal_charge # Program currently demands that each ionized frag # has a total charge of either -1 or +1 if chargef_tot not in (-1, +1): msg = '"%s" frag total charge ' % frag_name msg += "%s not allowed" % format_charge(chargef_tot) raise RuntimeError(msg) # Store some ion frag data; use hashes on frag names self.frags[frag_name] = Fragment(frag_ct, conn_to, chargef_tot) # Okay, try to get next frag in lib try: frag_name, frag_ct_handle = mm.mmfrag_duplicate_fragment( ion_fraglib_handle, mm.MM_NEXT) except mm.MmException as e: if e.rc == mm.MMFRAG_DONE: # end of lib! break raise frag_ct = structure.Structure(frag_ct_handle)
[docs] def readSpecFile(self): """ Put cwd and $HOME/.schrodinger/mmshare at the front of the mmfile search path, to allow user "ionizer.ini" in either of those dirs to override installed data file in the mmshare data dir. Note: We wait as late as possible to do this, to prevent any similar override of the mmfrag data files, which we currently consider to be inviolate, that is, the user must not be allowed to override them, even accidentally. """ try: prepend_mmfile_path( os.getcwd() + MMFILE_PATH_DELIM + os.path.join(mm.mmfile_schrodinger_userdata_dir(), "mmshare")) except mm.MmException: raise RuntimeError("Cannot prepend dirs onto mmfile search path") if self.specfile is None: # Use mmfile to get abs path to std control spec file, # which should be installed in mmshare/data try: specfile_mod = mm.mmfile_find(SPEC_FILE_DEF, mm.MMFILE_FILE) except mm.MmException: raise RuntimeError('Cannot find "%s" ' % self.specfile + "(std control specs)") else: # User provides non-std control spec file specfile_mod = self.specfile try: with open(specfile_mod) as fh: spec_lines = fh.read().splitlines() except EnvironmentError: raise RuntimeError('Cannot open control spec file "%s"' % specfile_mod) for linenum, line in enumerate(spec_lines, start=1): line_orig = line # save original line (for error messages) def _raise_error(msg): msg += "\nControl spec file, line %i --" % linenum msg += "\n%s" % line_orig self.logger.error(msg) raise RuntimeError(msg) # Remove any comments: line = line.split(COMMENT_CHAR)[0].rstrip() # Skip blank lines: if not line.strip(): continue tokens = line.split() cmd = tokens.pop(0).lower() # hash on lower-case if cmd == "exclude": require_params = 1 elif cmd in ("acid", "base"): require_params = 3 else: _raise_error('Unrecognized spec command "%s"' % cmd) # Verify the number of command parameters: nparams = len(tokens) if nparams < require_params: _raise_error('Too few parameters in "%s" command' % cmd) elif nparams > require_params: _raise_error('Too many parameters in "%s" command' % cmd) if cmd == "exclude": pattern = tokens[0] spec = self.excludeCommandParser(linenum, pattern) else: # The same parsing function is used on both Acid and Base spec # commands, but the command name itself is stored with the ion # spec data, so the acid/base distinction will be known later. pattern, frag_name, pka = tokens try: spec = self.ionizeCommandParser(linenum, cmd, pattern, frag_name, pka) except RuntimeError as err: msg = 'Error in handling "%s" command' % cmd msg += "\n%s" % err _raise_error(msg) self.control_specs.append(spec)
[docs] def ionizeCommandParser(self, linenum, cmd, subs_patt, frag_name, pka): """ Validate mmsubs pattern spec; if all goes well, will store compiled pattern matcher below Parses both Acid and Base specs. :type linenum: int :param linenum: Line number that is being parsed :type cmd: str :param cmd: Spec command that is being parsed. Either "acid" or "base". :type subs_patt: str :param subs_patt: Substructure pattern for this specification. :type frag_name: str :param frag_name: Name of the fragment to use (from the "ionized" fragment library). :type pka: str :param pka: String representation of the pKa for this ionizable group. :rtype: `Spec` :return: The Spec instance representing this ionizable group. """ # Validate frag name spec if frag_name not in self.frags: raise RuntimeError("Invalid fragment name \"%s\"" % frag_name) frag = self.frags[frag_name] # Validate pKa spec # The following test for a numeric spec should be adequate # for numbers supplied as pKa specs, given the range of # reasonable pKa numbers try: pka = float(pka) except TypeError: raise RuntimeError("Non-numeric pKa spec \"%s\"" % pka) # Check that pKa number is within reasonable limits # (0-14 is too tight) if pka < -5.0 or pka > +15.0: msg = "Is pKa value out of range in following spec?" msg += "\n" + cmd.capitalize() msg += " %s %s %f" % (subs_patt, frag_name, pka) self.logger.warning(msg) # Check that total charge of fragment makes sense for specified group # type frag_chargef_tot = frag.chargef if (cmd == 'acid' and frag_chargef_tot >= 0) or \ (cmd == 'base' and frag_chargef_tot <= 0): if frag_chargef_tot > 0: sign = "positive" elif frag_chargef_tot < 0: sign = "negative" else: sign = "zero" msg = "\nFrag charge is %s in %s" % (sign, cmd.capitalize()) msg += "group spec --" msg += "\n%s %s %s" % (subs_patt, frag_name, pka) self.logger.warning(msg) try: mmsubs_handle = mm.mmsubs_new(subs_patt) except mm.MmException: raise RuntimeError("Invalid substructure pattern; see below") # Add spec to internal list; replicate associated frag parameters too: spec = Spec(cmd, linenum, subs_patt, mmsubs_handle) spec.frag_name = frag_name spec.pka = pka spec.frag_st = frag.st spec.frag_connect_to = frag.connect_to spec.frag_chargef_tot = frag.chargef # FIXME consider subclassing Spec and making these required attributes. # For pH-mode operation, pre-compute this site's ionization state # free energy costs, for possible later use; see EV case 30258 # for more information # if self.ph is not None: if cmd == 'base': spec.zcost, spec.ycost = free_energy_costs(pka - self.ph) elif cmd == 'acid': spec.zcost, spec.ycost = free_energy_costs(self.ph - pka) return spec
[docs] def excludeCommandParser(self, linenum, subs_patt): """ Validate an "exclude" mmsubs pattern spec. :type linenum: int :param linenum: Line number that is being parsed :type subs_patt: str :param subs_patt: Substructure pattern for this specification. """ try: mmsubs_handle = mm.mmsubs_new(subs_patt) except mm.MmException: raise RuntimeError("Invalid substructure pattern; see below") # Add spec to internal list spec = Spec("exclude", linenum, subs_patt, mmsubs_handle) return spec
def _treatStructure(self, st): """ This method generates the ionization states for the specified structure, and appends them to the self.output_structures list. :type st: `structure.Structure` :param st: Structure to process. :raise TooManyGroupsError: if the number of ionizable groups exceeds the limit. :raise TolerableError: on a non-fatal error. """ if self.retain_lab: self.str_to_retain = M2IO_DATA_RETAIN in st.property # Check on input CT's neutrality: if any atoms are charged, # merely warn (for now): chargef_tot = 0 for atom in st.atom: chargef = atom.formal_charge if chargef != 0 and self.loglevel > 0: msg = "WARNING: CT atom %i formal charge = %s" % ( atom.index, format_charge(chargef)) self.logger.warning(msg) chargef_tot += chargef if chargef_tot != 0 and self.loglevel > 0: msg = "WARNING: Input CT's total charge = %s" % format_charge( chargef_tot) self.logger.warning(msg) ion_candidates, ion_cand_atoms = self.collectIonCandidates(st) # If any candidate ionization center atom is charged, it will # foul up the logic for ion state expansion and charge-based # state restriction, both of which assume ion center neutrality # in the input CT; skip over "bad" input CT if running in # fault-tolerant mode, but fail hard in -strict mode # Note: Some pattern specs in ionizer.ini require the leading # atom to be uncharged; for matches to those patterns, this # test is superfluous for iatom in ion_cand_atoms: chargef = st.atom[iatom].formal_charge if chargef != 0: # FIXME ideally these should be part of the exception: self.why_tolerate = "Ion center atom is not properly neutralized" self.why_tol_brief = "Charged ion center" raise TolerableError("Ion center atom %i is not neutral" % iatom) # Summarize ionization candidates ncands = len(ion_cand_atoms) self.logger.info(IND + "Total candidate ionization centers: %i" % ncands) assert (self.max_groups <= MAX_GROUPS_MAX) # just in case # Make sure the number of ionizable grops does not exceed the limit: if ncands > self.max_groups: # FIXME ideally this should be part of the exception: self.why_tol_brief = "%s ion group" % ncands if ncands > 1: self.why_tol_brief += "s" raise TooManyGroupsError( "Exceeds maximum ion groups to handle (%s)" % self.max_groups) # Proceed with ion state analyses & expansion.... nstates = nstates = 2**ncands self.logger.info( IND + "Maximum possible ionization states: %s (unrestricted)" % nstates) # Expand input CT into multiple output CTs comprising all # allowed ionization states self._expandCT(st, chargef_tot, ion_cand_atoms, ion_candidates)
[docs] def collectIonCandidates(self, st): """ Check CT for matches to Acid/Base/Exclude patterns, in same order as supplied in control spec file; for non-Excluded, final Acid/Base matches, store specs in hash indexed on ion center atom number; also get ordered list of candidate atom numbers. """ # Per-CT hash of ion center atom numbers to ion specs ion_candidates = {} # Per-CT list of ion center atom numbers (ordered) ion_cand_atoms = [] if self.show_match_atoms: self.logger.info(IND + "Pattern specs match sequence --") # Test for Acid/Base/Exclude pattern matches in same order # as they were supplied in control spec file for spec in self.control_specs: cmd = spec.cmd cmd_rep = cmd.lower().capitalize() # for output linenum = spec.linenum # where cmd is in spec file matches = spec.getMatches(st) if len(matches) < 1: continue # Treat any pattern match(es) for match_atoms in matches: iatom = match_atoms[0] # 1st atom # Is match atom already on ionization candidate list? prior_candidate = iatom in ion_candidates # Modify ion candidate list as per spec if cmd == "exclude": if self.show_match_atoms: # Report only if actual exclusion if prior_candidate: self.logger.info(IND2 + "[%s] %s %s match atom %i; removing" % \ (linenum, cmd_rep, spec.mmsubs_pattern, iatom)) # Note: To permit use of simple blanket exclusions, # it is not required that an Exclude pattern match # must qualify a prior Acid/Base pattern match.... # Remove prior candidate spec (if any) if prior_candidate: del ion_candidates[iatom] else: # Acid or Base command if self.show_match_atoms: msg = IND2 + "[%s] %s %s match atom %i; " % \ (linenum, cmd_rep, spec.mmsubs_pattern, iatom) if prior_candidate: msg += "replacing" else: msg += "add to list" self.logger.info(msg) # Store new candidate spec, or supersede prior ion_candidates[iatom] = spec # Enumeration to come will be based on normal numeric ordering # of ion center atom numbers, so sort candidate atom numbers # and store in ordered list for iatom in sorted(list(ion_candidates)): # sort atom number keys numerically! ion_cand_atoms.append(iatom) # For some VCS uses of Ionizer, do optional avoidance of ion # center atoms which have grownames on them if self.avoid_grow: tmp_cand_atom_list = [] # Inspect current candidate ion center atoms; retain only # those without grownames on them for iatom in ion_cand_atoms: growname = st.atom[iatom].growname if not growname: # No growname at all, so keep atom on list tmp_cand_atom_list.append(iatom) continue growname = growname.strip() if growname == "": # Growname was empty or blank, so keep atom on list tmp_cand_atom_list.append(iatom) continue # If we got to here, then this candidate atom has a non- # trivial growname on it, and we must remove the atom # from further consideration; simply _don't_ push it # onto the tmp list being accumulated in this loop del ion_candidates[iatom] # trash this too if self.show_match_atoms: self.logger.info(IND2 + 'Match atom %s has growname "%s", ' % \ (iatom, growname) + "so remove from list") # Now copy back the tmp list, which may be reduced from # the original ion_cand_atoms = tmp_cand_atom_list if self.show_final_list: self.logger.info(IND + "Final ionization candidate list --") if self.ph is not None: # Temporarily, while developing, do these pH-related # printouts only if both -sf -d are specified # leave this undocumented self.logger.debug(IND2 + "Effective pH range is " + "%s +/- %s" % (self.ph, self.ph_thresh)) ph_max_adj = self.ph + self.ph_thresh_adj ph_min_adj = self.ph - self.ph_thresh_adj for iatom in ion_cand_atoms: spec = ion_candidates[iatom] group_type = spec.cmd pka = spec.pka self.logger.info(IND2 + "Atom %s is %s, " % (iatom, group_type) + \ "fuses %s frag, pKa = %s" % (spec.frag_name, pka)) # Work in progress: Preparing to implement scatter/ # gather iteration over possible states to generate, # for efficiency reasons; pH mode only.... # If in pH mode, compare pKa to pH range max & min # print -- at some verbosity -- whether combinations # to come can have prot only, deprot only, or prot and # deprot; relate Acid/Base to ionized or not (to fuse, # or not to fuse) # Eventually, when storing this info, have to make # these calculations independent of display options # like -sf; and test defined self.ph once, not # repeatedly if self.ph is not None: if pka > ph_max_adj: # Temporarily, while developing, do these # pH-related printouts only if both -sf & -d # are specified; leave this undocumented msg = IND3 + "pKa > max pH --> protonated (" if group_type == "acid": msg += "non-" msg += "ionized) only" self.logger.debug(msg) elif pka < ph_min_adj: msg = IND3 + "pKa < min pH --> deprotonated (" if group_type == 'base': msg += "non-" msg += "ionized) only" self.logger.debug(msg) else: self.logger.debug( IND3 + "pKa in range --> either form may exist") return ion_candidates, ion_cand_atoms
def _expandCT(self, st, input_ct_chargef_tot, ion_cand_atoms, ion_candidates): """ Basically a continuation of _treatStructure(). """ # FIXME would be good to re-factor this method to make it smalle by # pulling some code into another method. self.logger.info(IND + "Proceeding with expansion....") # Iterate over all possible combinations of individual # candidate ionizations; generation of some states may # be precluded by charge- and pKa-based restrictions # applied below # Terminology: I use the term "state" as a synonym for a # particular combination of individual group ionizations # and non-ionizations; elsewhere, I use the term "variant" # in the same sense, with the connotation that there are # multiple ionized variations of the non-ionized (neutral) # input molecule ngen = 0 # init output CT count for this input CT # main scope for tolerate_fault() use ncands = len(ion_cand_atoms) nstates = 2**ncands # unrestricted count for istate in range(nstates): state_fuse_atoms = [] # clear n_ionized_groups = 0 # init # This init assumes that no ionization (via frag fusion) # can affect a non-zero formal charge on an input CT atom # any such atom is prohibited from being a candidate ion # center, so there should be no problem, I think # chargef_grand_tot = input_ct_chargef_tot # init n_protonated = 0 # init n_deprotonated = 0 # init prot_pka_min = +100.0 # init min way too high deprot_pka_max = -100.0 # init max way too low prot_pka_min_atom = None # init deprot_pka_max_atom = None # init ycost_total = 0 # init (pH mode only) zcost_total = 0 # init (pH mode only) for jcand, fuse_atom in enumerate(ion_cand_atoms): # more like "fuseable" atom at this point cand = ion_candidates[fuse_atom] pka = cand.pka group_type = cand.cmd # Because each possible ionization center either will or # will not be ionized, the expansion index is a direct # bit-wise encoding of the particular combination of # individual candidate ionizations if (istate >> jcand) & 1: # Ionize this group via fuse at center atom state_fuse_atoms.append(fuse_atom) n_ionized_groups += 1 # increment list count chargef_grand_tot += cand.frag_chargef_tot if group_type == "acid": group_is_protonated = False elif group_type == "base": group_is_protonated = True else: raise ValueError if self.ph is not None: # Add site's ionization cost ycost_total += cand.ycost else: # Don't ionize this group if group_type == "acid": group_is_protonated = True elif group_type == "base": group_is_protonated = False else: raise ValueError if self.ph is not None: # Add site's keep-neutral cost zcost_total += cand.zcost # Tally info for pKa and pH decisions to come if group_is_protonated: n_protonated += 1 if pka < prot_pka_min: prot_pka_min = pka prot_pka_min_atom = fuse_atom else: n_deprotonated += 1 if pka > deprot_pka_max: deprot_pka_max = pka deprot_pka_max_atom = fuse_atom # Save up all reasons for skipping generation in a # string, initialized to empty # skip_reasons = [] # init # The two charge-limit tests occur _only_ when running in # non-default, pK-difference mode; it was decided that # the limits should not be applied in the default, # pH-oriented mode # # Decide whether this ion state should be generated, # based on charge limits.... # # Note that n_ionized_groups is the same as the total # absolute formal charge of this state's ion frags # assuming that every ion frag's total charge is either # +1 or -1; this is true, so far # ignore in pH mode if self.ph is None and n_ionized_groups > self.max_ions: skip_reasons.append("ion count %s > " % n_ionized_groups + "max ion count %s" % self.max_ions) abs_tot_ion_chg = abs(chargef_grand_tot) # ignore in pH mode if self.ph is None and abs_tot_ion_chg > self.max_abstotq: if chargef_grand_tot < 0: charge_str = "|-%s|" % abs_tot_ion_chg else: charge_str = str(abs_tot_ion_chg) skip_reasons.append("abs total charge %s" % charge_str + " > abs total charge max %s" % self.max_abstotq) # Decide whether this ion state should be generated, # based on pH and/or pKa considerations.... # # Note use below of self.p*_thresh_adj, which are # slightly higher than specified self.p*_thresh # this leads to ignoring slight f.p. imprecisions in # computed p* differences if self.ph is not None: # Definition of pH value selects default, # pH-based mode of ion state rejection # Consider protonated & deprotonated separately if n_protonated > 0: ph_pka_diff = self.ph - prot_pka_min if ph_pka_diff > self.ph_thresh_adj: skip_reasons.append("pH %s - " % self.ph + "prot@%s pKa %s > " % (prot_pka_min_atom, prot_pka_min) + "pH thresh %s" % self.ph_thresh) if n_deprotonated > 0: pka_ph_diff = deprot_pka_max - self.ph if pka_ph_diff > self.ph_thresh_adj: skip_reasons.append("deprot@%s" % deprot_pka_max_atom + " pKa %s" % deprot_pka_max + " - pH %s > pH thresh %s" % (self.ph, self.ph_thresh)) elif n_protonated > 0 and n_deprotonated > 0: # Undefined pH value selects the non-default, # purely pK-based rejection mode # Consider protonated & deprotonated together pk_diff = deprot_pka_max - prot_pka_min if pk_diff > self.pk_thresh_adj: skip_reasons.append("deprot@%s pKa %s - " % (deprot_pka_max_atom, deprot_pka_max) + "prot@%s pKa %s > " % (prot_pka_min_atom, prot_pka_min) + "pK thresh %s" % self.pk_thresh) # Label the ion state under consideration state_desc = "state %s: " % istate if istate == 0: state_desc += "unchanged" else: state_desc += "ionize @ %s" % ' '.join( map(str, state_fuse_atoms)) # Note: The "Tot Q" string below is consistent with the # effect in Maestro of the string currently assigned to # M2IO_DATA_TOTAL_CHARGE; it might be nice to # keep it that way! state_desc += ", Tot Q %s" % format_charge(chargef_grand_tot) if istate != 0: self.str_to_retain = False if istate == 0 and self.retain: self.str_to_retain = True retain_state = not skip_reasons if istate == 0 and (self.retain or self.str_to_retain): # If we need to retain the input structure if self.loglevel >= 1: self.logger.info( IND2 + "Retaining the input structure as requested by user") retain_state = True # Any regular reasons not to generate this state? if not retain_state: if self.show_skips and self.loglevel >= 1: self.logger.info(IND2 + "Skipping %s" % state_desc) if self.loglevel >= 2: self.logger.info('\n'.join( [IND3 + r for r in skip_reasons])) continue # skip to next possible state # This should apply to unusual cases only: Are we about # to pass the max allowed output CTs per input CT? if ngen + 1 > self.max_outcts: if self.loglevel >= 1: self.logger.info(IND2 + "Cease at %s" % state_desc) self.logger.info(IND + "Ceased generation at %s output CT limit" % self.max_outcts) break # ignore all other possible states # Okay to attempt to generate this ion state...+ if self.loglevel >= 1: self.logger.info(IND2 + "Generate %s" % state_desc) self.generateStateCT(st, chargef_grand_tot, istate, state_fuse_atoms, ion_candidates, ycost_total, zcost_total) ngen += 1 if self.adjust: self.output_structures.append(self.best_out_st) return # no error
[docs] def generateStateCT(self, st, total_charge, state_num, state_fuse_atoms, ion_candidates, ycost_total, zcost_total): """ Generate the CT for the specificed ionization state. :type st: `structure.Structure` :param st: Input structure. :type total_charge: int :param total_charge: Total charge of this ionization state. :type state_num: int :param state_num: The number of this state (0-indexed) :type state_fuse_atoms: list of ints :param state_fuse_atoms: List of atoms to which the new fragment will be attached to. :type ion_candidates: dict; key: atom index; value: `Spec` :param ion_candidates: Specifications for this state change. :type ycost_total: float :param ycost_total: Charging ionization penalty for this state :type zcost_total: float :param zcost_total: Neutral ionization penalty for this state """ # Duplicate input CT for output after ion frag fusions output_st = st.copy() # If the user specified a retitling prefix, then add the prefix and ion # state number at end of input CT title: if self.retitle_with: output_st.title = st.title + self.retitle_with + str(state_num) # If the user has not disabled this (via command option), # clear any structure-based properties from the CT for # output, which is about to change its structure.... # # Strictly speaking, this is _wrong_ in the single case of # the non-ionized variant, which may pass through; but to # leave dependent properties on a single variant in the # output stream seems bound to cause confusion for users # # It would be more efficient to do this clearing just once # on the input CT, rather than repetitively on each output # CT; but just in case this program ever needs to consult # some structure-dependent property on the input CT, in a # per output variant manner, I choose the inefficient way, # to retain access to all input CT properties if not self.keep_props: mm.mmct_ct_clear_dependent_properties( output_st.handle, mm.MMCT_DEPEND_ON_CONNECTIVITY) # Make local copy of this state's atom number list, for # renumbering after each fusion, that is, as a side # effect of each call to fuseIonFragment() renumbered_fuse_atoms = state_fuse_atoms[:] # For each ion center atom selected in this state, # fuse associated ion frag natoms = len(renumbered_fuse_atoms) for i in range(natoms): fuse_atom_orig = state_fuse_atoms[i] fuse_atom_renum = renumbered_fuse_atoms[i] ion_cand = ion_candidates[fuse_atom_orig] try: # NOTE: Will modify the renumbered_fuse_atoms list self.fuseIonFragment(output_st, fuse_atom_orig, fuse_atom_renum, renumbered_fuse_atoms, ion_cand) except RuntimeError as err: msg = "Problem fusing frag at atom " + \ "%s (orig. %s)" % (fuse_atom_renum, fuse_atom_orig) msg += "\n%s" % err raise RuntimeError(msg) # Prepare to add CT-level properties giving ion state details atom_numbers = [] atom_types = [] atom_origs = [] for i in range(natoms): anum = renumbered_fuse_atoms[i] atom = output_st.atom[anum] atom_numbers.append(anum) atom_types.append(atom.atom_type_name) atom_origs.append(state_fuse_atoms[i]) # Note the use of underscore "_" delimiter; that delimiter is # relied upon a little further down center_atoms_string = "_".join(map(str, atom_numbers)) center_types_string = "_".join(atom_types) center_atoms_orig_string = "_".join(map(str, atom_origs)) # Add CT-level properties giving details of ionization state try: self.addIonProperties(output_st, state_num, total_charge, center_atoms_string, center_types_string, center_atoms_orig_string) except RuntimeError: raise RuntimeError("Problem adding ion properties") # High-verbosity log of final ion center atom numbers; don't # bother logging the charged atom types if self.loglevel >= 2 and natoms > 0: # Note the reliance on underscore "_" delimiter, which # must be used above s = "s" if len(atom_numbers) > 1 else "" tmp_str = center_atoms_string.replace("_", " ") self.logger.info(IND3 + "ionized center%s @ %s (final)" % (s, tmp_str)) # In pH mode only, add CT-level properties giving free energy # costs for the particular ionization state, for use by other # programs in ranking the states if self.ph is not None: try: self.addCostProperties(output_st, ycost_total, zcost_total) except RuntimeError: raise RuntimeError("Problem adding energy cost properties") # Write output CT to output file if not self.adjust: self.output_structures.append(output_st) else: # -adjust used (retain only "best" ionization state) tot_cost = ycost_total + zcost_total if tot_cost <= self.min_cost and not self.retain: self.min_cost = tot_cost self.best_out_st = output_st elif self.retain and state_num == 0: self.best_out_st = output_st self.logger.info("\nInfo %s %s %s" % (state_num, tot_cost, self.min_cost)) return # no error
[docs] def fuseIonFragment(self, output_st, fuse_atom_orig, pre_fuse_center, renumbered_fuse_atoms, cand): """ This function coordinates the ionization of one ionizable group, via mmbuild_fuse() of the appropriate ionizing fragment onto the group's ionization center atom (same method as Maestro builder uses for Place onto non-terminal atom); function also takes care of various property adjustments in the modified CT. """ frag_name = cand.frag_name frag_st = cand.frag_st frag_connect_to = cand.frag_connect_to if self.loglevel >= 2: self.logger.info(IND3 + "fuse %s frag @ " % frag_name + "atom %s (renum.)" % pre_fuse_center) # I do not replicate code for Maestro Place's alternate use # of mmbuild_attach (mmbuild_grow) in cases where the ion # center is a terminal atom; it's precluded for the acids, # since each one must _lose_ an H; it might be possible for # a base; in any event, no current Acid/Base pattern is # anchored on a terminal atom # # Just in case, detect & reject any attempt to fuse on a # terminal atom nbonds = output_st.atom[pre_fuse_center].bond_total if nbonds < 1: raise RuntimeError("No bond found for ionizable atom " + "%i (orig. %i" % (pre_fuse_center, fuse_atom_orig)) if nbonds == 1: raise RuntimeError("Ionization of terminal atom is not supported!") # Replicate Maestro builder Place action by adapting # maestro-src/src/build/mm_mcsplace.cxx code's use of # mmbuild_fuse() to attach frag at non-terminal atom num_main_atoms = output_st.atom_total connect_to = num_main_atoms + frag_connect_to fuse1_mmlist = mmlist.mmlist([pre_fuse_center]) fuse2_mmlist = mmlist.mmlist([connect_to]) output_st.extend(frag_st) # In preparation for post-fuse atom property adjustments, # save certain atom properties of original ion center if self.revise_ion_groups: self.saveIonCenterProps(output_st, pre_fuse_center) # Now do the ionizing fuse.... # Backup the log level to avoid "mmlist" errors from going into the log # files. These errors also show up differently on different platform. # See LIGPREP-1611 prev_level = mm.mmerr_get_level(MYERRHAND) mm.mmerr_level(MYERRHAND, mm.MMERR_OFF) try: renumber_map = mm.mmbuild_fuse(output_st, fuse1_mmlist, fuse2_mmlist) except mm.MmException as e: if e.rc == mm.MMBUILD_VALENCE_ERROR: # most common failure # FIXME ideally these should be part of the exception: self.why_tolerate = \ "Unusual build error, due to valence violation" self.why_tol_brief = "Valence violation" raise TolerableError( 'Valence violation fusing "%s" at atom %i' % (frag_name, pre_fuse_center)) else: # unusual failure # FIXME ideally these should be part of the exception: self.why_tolerate = \ "Unusual build error, probably superposition problem" self.why_tol_brief = "Superposition issue" raise TolerableError('Unable to fuse "%s" fragment at atom %i' % (frag_name, pre_fuse_center)) finally: mm.mmerr_level(MYERRHAND, prev_level) # Need for a few uses below: Get atom number of new ionized # center; it's the renumbering of the pre-fuse connect_to # (was number of atom fused from appended frag CT) post_fuse_center = renumber_map[connect_to] # Revise ion center atom numbers in this state's list, # in light of fuse operation's renumbering of atoms old_nums = [] new_nums = [] for i, old in enumerate(renumbered_fuse_atoms): if old == pre_fuse_center: # Original ion center atom has been replaced! new = post_fuse_center else: new = renumber_map[old] renumbered_fuse_atoms[i] = new # Save lists of old & new fuse atom numbers, if needed # for debug-variety, high-verbosity logging (-ll 2 -d) if self.loglevel >= 2: old_nums.append(old) new_nums.append(new) # If debug-variety, high-verbosity logging (-ll 2 -d), # report fuse atom renumbering, post-fuse.... # Won't document this extra logging (outside of here) # since it's really only useful to the developer if self.loglevel >= 2: self.logger.debug(IND4 + "renum: %s -> %s" % (old_nums, new_nums)) # Revise certain atom properties to match input ion center if self.revise_ion_groups: try: self.reviseIonizedGroup(output_st, post_fuse_center) except RuntimeError: raise RuntimeError("Cannot revise ion group atom property " + "(atom %s)" % post_fuse_center) # As with Maestro builder Place, assign new default atom # names to atoms whose atom numbers just changed, and to # new atoms just added; first have to find the highest # numbered atom which was not renumbered # # Note: This action isn't really needed, as atom names # aren't used for anything, and other current problems # confound evaluation of its correct effect anyway! new_num_atoms = output_st.atom_total last_unchanged = 0 old = new_num_atoms while old > 0: new = renumber_map[old] if new == old: last_unchanged = old break old -= 1 # Defer finishing revision of default atom name fields # until after any stereochemical property adjustment. At # the time I'm adding this comment, frankly, I don't see # why, or even if, this deferral is necessary! But I'm # leaving the code arrangement as is, to avoid breaking # any of the operation. # Adjust stereochemistry properties (default action now) if self.stereo_adjust: renumber_map[connect_to] = None renumber_map[pre_fuse_center] = post_fuse_center try: mm.mmstereo_adjust_atoms(output_st, 7, renumber_map, num_main_atoms) except mm.MmException: raise RuntimeError("Cannot adjust stereochemistry properties") # Note that the renumber map is now changed from what # mmbuild_fuse() passed back! # Now finish revising the default atom name fields.... added_atoms_bitset = mmbitset.Bitset(size=new_num_atoms) added_atoms_bitset.range(last_unchanged + 1, new_num_atoms) try: mm.mmct_ct_set_default_atomname(output_st, added_atoms_bitset) except mm.MmException: raise RuntimeError( "Error setting atom name field for new additions") return # no error
[docs] def saveIonCenterProps(self, st, center_anum): """ Save certain properties of an ion center atom before the ionizing fuse. """ # Pass in pre-fuse ion group center atom number center_atom = st.atom[center_anum] # Save settings for easy access by reset function self.center_pdbres = center_atom.pdbres self.center_pdbname = center_atom.pdbname # FIXME need to get this to work or remove it: # self.center_reschar = mm.mmct_atom_get_res(st.handle, center_anum) self.center_chain_char = center_atom.chain self.center_res_num = center_atom.resnum self.center_ins_code = center_atom.inscode
[docs] def restoreIonCenterProps(self, st, iatom): """ Restores the atom properties saved by the method above. Also sets the default display color property based on the atom's new (i.e., post-fuse) atom type. Pass in post-fuse atom number (new ionized group center or bonded H). """ atom = st.atom[iatom] atom.pdbres = self.center_pdbres if atom.atomic_number != 1: atom.pdbname = self.center_pdbname # FIXME need to get this to work or remove it: # mm.mmct_atom_set_res(st.handle, iatom, self.center_reschar) atom.chain = self.center_chain_char # It's possible that the input had no resnum set: atom.resnum = self.center_res_num atom.inscode = self.center_ins_code # Set atom color based on new (post-fuse) atom type; no reference to # original setting, since ionization has likely changed the atom type: itype = atom.atom_type icolor = mm.mmat_get_color(itype) atom.color = icolor
[docs] def reviseIonizedGroup(self, st, center_atom): """ Function to apply above reset function to all atoms of newly ionized group. Pass in post-fuse ionized group center atom number Collect list of newly ionized group's atoms (new ionized center + all Hydrogens bonded to it); revise certain atom properties to match original ion center """ atom_obj = st.atom[center_atom] new_atoms = get_bonded_hydrogens(atom_obj) new_atoms.append(center_atom) for iatom in new_atoms: self.restoreIonCenterProps(st, iatom) return # no error
[docs] def addIonProperties(self, st, state_num, total_charge, center_atoms_string, center_types_string, center_atoms_orig_string): """ This function writes CT-level properties recording several details of a particular ionization state; it's intended to be used on CTs generated for output; the values expressed in the properties must be calculated beforehand; all of these properties get marked as dependent on CONNECTIVITY. """ # Now add the properties and set their dependencies st.property[M2IO_DATA_ION_STATE_NUM] = state_num mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_ION_STATE_NUM, mm.MMCT_DEPEND_ON_CONNECTIVITY) st.property[M2IO_DATA_TOTAL_CHARGE] = total_charge mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_TOTAL_CHARGE, mm.MMCT_DEPEND_ON_CONNECTIVITY) st.property[M2IO_DATA_CENTER_ATOMS] = center_atoms_string mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_CENTER_ATOMS, mm.MMCT_DEPEND_ON_CONNECTIVITY) st.property[M2IO_DATA_CENTER_TYPES] = center_types_string mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_CENTER_TYPES, mm.MMCT_DEPEND_ON_CONNECTIVITY) st.property[M2IO_DATA_CENTERS_ORIG] = center_atoms_orig_string mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_CENTERS_ORIG, mm.MMCT_DEPEND_ON_CONNECTIVITY) if self.str_to_retain: st.property[M2IO_DATA_RETAIN] = 1 mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_RETAIN, mm.MMCT_DEPEND_ON_CONNECTIVITY)
[docs] def addCostProperties(self, st, ycost_total, zcost_total): """ For use in pH mode only, this function adds CT-level properties recording the free energy costs associated with this particular ionization state. """ if self.ph is None: return 0 # Now add the properties and set their dependencies pcost_total = ycost_total + zcost_total st.property[M2IO_DATA_COST_TOTAL] = pcost_total mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_COST_TOTAL, mm.MMCT_DEPEND_ON_CONNECTIVITY) st.property[M2IO_DATA_COST_CHARGE] = ycost_total mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_COST_CHARGE, mm.MMCT_DEPEND_ON_CONNECTIVITY) st.property[M2IO_DATA_COST_NEUT] = zcost_total mm.mmct_ct_add_property_dependency(st.handle, M2IO_DATA_COST_NEUT, mm.MMCT_DEPEND_ON_CONNECTIVITY)
[docs] def ionizeStructure(self, st): """ This is the public interface method. The specified CT will be ionized, and the output structures will be returned as a list. :type st: `structure.Structure` object :param st: Structure to ionize. :rtype: List of structures. :return: Returns a list of ionized output structures. :raise TooManyGroupsError: if the CT had too many ionizable groups. :raise TolerableError: if there was a non-fatal error :raise RuntimeError: on unexpected error """ self.output_structures = [] # Initialize the "best" structure criteria if -adjust is used: if self.adjust: self.min_cost = 100000.0 self.best_out_st = None # These variables are read by the ionizer_backend.py: # FIXME ideally these should be part of the exception: self.why_tolerate = None # clear for fault-tolerant mode self.why_tol_brief = "Problem in input CT" # default self._treatStructure(st) return self.output_structures