Source code for schrodinger.application.desmond.replica_exchange_review

import copy
from collections import defaultdict

import numpy


[docs]def line_match_final_log(line): if line.find("::: finished :::") > -1: return True else: return False
[docs]def accept_reject_parse(file_input, n_replicas): ''' Parse log file for REMDrelated lines :param file_input: Iterator of replica exchange log lines :type n_replicas: `int` :param n_replicas: number of replicas :returns: (acc_count, rej_count, reject_dict, total_dict, replica_states): - acc_count - Number of accepted exchanges - rej_count - Number of rejected exchanges - rej_dict - Number of rejected exchanges per replica pair - total_dict - Number of exchange attempts per replica pair - replica_states - List of (time, replica_indices) for each time where replicas differ from previous time :rtype: `tuple(int, int, dict, dict, list)` ''' # Accumulate changes of indices from the ACCEPTances switches = [] rej_count, acc_count = 0, 0 time_now = 0.0 reject_dict, total_dict = defaultdict(int), defaultdict(int) replica_idx = list(range(n_replicas)) replica_states = [(time_now, replica_idx[:])] for line in file_input: if not len(line) or line[0] == '#' or line[0:8] == 'Chemical': continue if line_match_final_log(line): break fields = line.split() if len(fields) < 2: continue # Expecting a line with the following format # 0:time 1:exchange 2:rep_1 3:rep_2 4:T_1 5:T_2 6:U_1(x_1) 7:U_2(x_2) 8:U_1(x_2) 9:U_2(x_1) 10:P_1 11:P_2 12:V_1 13:V_2 14:arg # Make sure this is a valid line if fields[1] in ('REJECT', 'ACCEPT'): # If the time has changed, commit the completed time time_read = float(fields[0]) if time_read != time_now: if len(switches) > 0: for s0, s1 in switches: replica_idx[s0], replica_idx[s1] = replica_idx[ s1], replica_idx[s0] replica_states.append((time_now, replica_idx[:])) switches.clear() time_now = time_read if fields[1] == 'REJECT': # Index by temperature or Hamiltonian state index idx = f"{fields[4]} - {fields[5]}" if fields[4] != fields[ 5] else f"{fields[2]} - {fields[3]}" rej_count += 1 reject_dict[idx] += 1 total_dict[idx] += 1 elif fields[1] == 'ACCEPT': # Index by temperature or Hamiltonian state index idx = f"{fields[4]} - {fields[5]}" if fields[4] != fields[ 5] else f"{fields[2]} - {fields[3]}" acc_count += 1 switches.append((int(fields[2][1:]), int(fields[3][1:]))) total_dict[idx] += 1 # Finally run through the last time point's accumulated switches (assumes successful completion) if len(switches) > 0: for s0, s1 in switches: replica_idx[s0], replica_idx[s1] = replica_idx[s1], replica_idx[s0] replica_states.append((time_now, replica_idx[:])) return acc_count, rej_count, reject_dict, total_dict, replica_states
[docs]def read_data_from_log_file(logfile, cfg_map): # Contributors: Yujie Wu, John Shelley, Matvey Adzhigirey # New parser is based on the actual simulation configuration as recorded in # the log file and thus should work for whatever types of REMD. -YW input_log = open(logfile, "r") # Also determine if CPU or GPU codebase was ran isGPU = False if 'graph' in list(cfg_map.remd): isGPU = True # Gets the number of replicas first. Then finds out the temperatures for all # replicas from the configuration. use_temps = [] simulation_time = None if not isGPU: for replica in cfg_map.remd.cfg: cfg_map_tmp = copy.deepcopy(cfg_map) cfg_map_tmp.update(replica.val) try: rep = cfg_map_tmp.integrator.temperature.T_ref.val except: rep = cfg_map_tmp.integrator.temperature[0].T_ref.val use_temps.append(rep) # now check if you actually got different replica temeratures # if it's a REST job, all temperatures will be the same if len(set(use_temps)) == 1: use_temps = [] for replica in cfg_map.remd.cfg: replica_name = replica.force.term.gibbs.window.val use_temps.append(replica_name) else: for replica_name, replica_setting in cfg_map.remd.graph.key_value(): cfg_map_tmp = copy.deepcopy(cfg_map) cfg_map_tmp.update(replica_setting.val) if replica_name == 'edges': continue rep = cfg_map_tmp.integrator.temperature.T_ref.val use_temps.append(rep) # now check if you actually got different replica temperatures, if # it's a rest job, all temepatures will be the same if len(set(use_temps)) == 1: replica_name = list(cfg_map.remd.graph) replica_name.remove('edges') use_temps = [name.strip('T') for name in replica_name] use_temps = sorted(use_temps) #get the total time of the simulation, so the plot reflects it. try: simulation_time = cfg_map.remd.last_time.val except: pass num_replicas = len(use_temps) acc_count, rej_count, _, _, replica_states = accept_reject_parse( input_log, num_replicas) # Sort the information so that it is in the form [(Time, [Temperatures])] # where Temperatures are the temperatures for each replica (in their # original order) orig_replica_temps = [] for sys_state in replica_states: temps = [] for ind in range(0, len(use_temps)): a_temp = use_temps[sys_state[1].index(ind)] temps.append(a_temp) time_temp = (sys_state[0], temps) orig_replica_temps.append(time_temp) time_list = [] temp_lists_list = [] # list of lists for time_temp in orig_replica_temps: #print "%10.3f " % time_temp[0], time_list.append(time_temp[0]) for i, a_temp in enumerate(time_temp[1]): if len(temp_lists_list) == i: # This is the first time value temp_lists_list.append([a_temp]) else: temp_lists_list[i].append(a_temp) # Post-process the lists: # Add 2 points for each exchange so that the transitions between # replicas are drawn as vertical lines instead of diagonal lines: doubled_time_list = [] for i, time in enumerate(time_list): if i == 0: doubled_time_list.append(time) else: doubled_time_list.append(time) doubled_time_list.append(time) doubled_temp_lists_list = [] for temps in temp_lists_list: doubled_temps = [] for i, temp in enumerate(temps): if i == len(temps) - 1: doubled_temps.append(temp) else: doubled_temps.append(temp) doubled_temps.append(temp) doubled_temp_lists_list.append(doubled_temps) line_list = [] #add last time point if simulation_time is not None: doubled_time_list.append(simulation_time) for temps in doubled_temp_lists_list: if simulation_time is not None: #add replica temperature for at the very end of the simulation temps.append(temps[-1]) line_list.append([numpy.array(doubled_time_list), numpy.array(temps)]) return use_temps, line_list