Source code for schrodinger.test.log_parser

"""
Parse pytest log files to help track down parallel test failures.  All log files
must have been generated using subprocess tests (i.e. "-n <NUMBER>" or
"-n auto") and the "-v" flag.  Running this script from the command line will
output statistics about the failures as well as a recipe to reproduce a worker
process failure.

Usage: log_parser.py <log file 1> [<log file 2>...]

Note: To automatically run the Python unit tests 10 times in a row and generate
logs that are approrpiate for input into this script, you can run ::

    mkdir ../test_logs
    for i in {1..10}; do
        make unittest TEST_ARGS="-n=auto -v" 2>&1 | tee ../test_logs/${i}.txt;
    done

from $SCHRODINGER/mmshare-v*/python/test.  This will put the test logs into a
new $SCHRODINGER/mmshare-v*/python/test_logs directory.
"""

import glob
import itertools
import re
import sys


[docs]def parse_log(logfile): """ Parse the specified pytest log file. The log file must have been generated using subprocess tests (i.e. "-n <NUMBER>" or "-n auto") and the "-v" flag. This method reports which tests were run on each worker process, the number of failed tests per worker process, and the index of the first failed test. :param logfile: The path of the log file to parse :type logfile: str :rtype: tuple(list(list(str)), list(int or None), int) Return value is a tuple of: - A list of list of tests run on each worker process, formatted as [list of tests run on worker process 0, list of tests run on worker process 1, ...] - A list of the first failed test index on each worker process, or None if no tests failed on that worker. - The number of worker processes with failed tests. """ per_process = [[]] first_fail = [None] num_failed_procs = 0 with open(logfile) as log: for curline in log: match = re.match( r""" \[gw(\d+)\]\s* # process number \[\s*\d*%\]\s # percent complete (\w*)\s* # passed/failed/error ([\w/\\:. -]*)\s* # test name (?:<-\s*[\w/\\. -]*\s*)?$ # arrow for inherited tests (optional) """, curline, re.VERBOSE) if match: proc_num, status, test_name = match.groups() test_name = test_name.strip() proc_num = int(proc_num) if proc_num >= len(per_process): procs_to_add = proc_num - len(per_process) + 1 per_process.extend([] for _ in range(procs_to_add)) first_fail.extend([None] * procs_to_add) if (status in ("FAILED", "ERROR") and first_fail[proc_num] is None): first_fail[proc_num] = len(per_process[proc_num]) num_failed_procs += 1 per_process[proc_num].append(test_name) return per_process, first_fail, num_failed_procs
[docs]def parse_logs(logfiles): """ Parse multiple pytest log files. All log files must have been generated using subprocess tests (i.e. "-n <NUMBER>" or "-n auto") and the "-v" flag. :param logfiles: A list of log file paths :type logfiles: list(str) :rtype: tuple(int, int, int, int, int, list(str) or None) The return value is a tuple of - The total number of pytest runs processed - The number of failed runs - The total number of worker processes over all runs - The total number of worker processes with a test failure over all runs - The maximum number of worker processes with a test failure in a single test run - All tests run on a single worker process up to and including the first failed test. If multiple worker processes contained a failed test, then the shortest recipe is provided. If no worker processes contained a failed test, will be None. """ total_runs = len(logfiles) num_failed_runs = 0 total_num_procs = 0 total_num_failed_procs = 0 max_num_failed_procs = 0 shortest_recipe = None recipe_length = float("inf") for cur_file in logfiles: per_process, first_fail, num_failed_procs = parse_log(cur_file) total_num_procs += len(first_fail) total_num_failed_procs += num_failed_procs max_num_failed_procs = max(max_num_failed_procs, num_failed_procs) if num_failed_procs: num_failed_runs += 1 fastest_fail_index = min( range(len(first_fail)), key=lambda i: first_fail[i] if first_fail[i] is not None else float("inf")) fastest_fail_length = first_fail[fastest_fail_index] if fastest_fail_length < recipe_length: recipe_length = fastest_fail_length shortest_recipe = \ per_process[fastest_fail_index][:fastest_fail_length + 1] return (total_runs, num_failed_runs, total_num_procs, total_num_failed_procs, max_num_failed_procs, shortest_recipe)
[docs]def main(): """ Parse all log files provided on the command line. Print out statistics about the failures and a recipe to reproduce the shortest failure. """ # bash will glob the paths for us but cmd on Windows won't, so we glob them # here just in case files = list( itertools.chain.from_iterable(glob.glob(path) for path in sys.argv[1:])) if not files: print("No logs found", file=sys.stderr) sys.exit(1) (total_runs, num_failed_runs, total_num_procs, total_num_failed_procs, max_num_failed_procs, shortest_recipe) = parse_logs(files) print(f"{num_failed_runs} of {total_runs} runs failed") if num_failed_runs == 0: return print(f"{total_num_failed_procs} of {total_num_procs} worker processes " "contained a failure") print(f"At most, {max_num_failed_procs} worker processes failed per run") print("") print(f"Fastest failure required {len(shortest_recipe)} tests:") print("\n".join(shortest_recipe)) print("") print("To run these tests, run:") print(f'make unittest TEST_ARGS="-v {" ".join(shortest_recipe)}"')
if __name__ == "__main__": main()