Source code for schrodinger.test.stu.outcomes.correlate_to_reference_file

import numpy as np
import scipy.stats.stats

from schrodinger import structure


[docs]def read_prop_by_title(fn, prop_name): """ Read data from a structure file into a dictionary by title fn {string} file to read prop_name {string} ct-level property to read """ data = {} # Get the data from file_name for ct in structure.StructureReader(fn): if prop_name in ct.property: if ct.title in data: raise RuntimeError('Titles must be unique in %s (%s)' % (fn, ct.title)) data[ct.title] = ct.property[prop_name] return data
[docs]def correlate_to_reference_file_workup(file_name, file_property, ref_file_name, ref_file_property, min_correl): r""" Determine the correlation between ct-level real properties from one file to a reference. CT's are matched by their titles. :param file_name: Path to the file to check, readable by structurereader :type file_name: str :param file_property: Name of the ct-level property in the file_name to use. This should start with r\_ or i\_ :type file_property: str :param ref_file_name: Path to the reference file, readable by structurereader :type ref_file_name: str :param ref_file_property: Name of the ct-level property in ref_file_name to use. This should start with a r\_ or i\_ :type ref_file_property: str :param min_correl: Correlation (R, not R-squared) should be less than this value if this is a negative value (more negative correlation) and more than this value if this is a positive value (more positive correlation) :type min_correl: float """ # Match by title file_dict = read_prop_by_title(file_name, file_property) ref_dict = read_prop_by_title(ref_file_name, ref_file_property) # Create lists of the data ref_data = [] file_data = [] for title in ref_dict: if (title not in file_dict): raise RuntimeError("Cannot find data for %s(%s) in %s" % (title, file_property, file_name)) ref_data.append(ref_dict[title]) file_data.append(file_dict[title]) # Get the correlation results = scipy.stats.stats.linregress(np.array(file_data), np.array(ref_data)) correl = results[2] report = f"Correlation is {correl:5.3f} -- Target is {min_correl:5.3f}" if min_correl < 0 and correl > min_correl: raise AssertionError(report) elif min_correl > 0 and correl < min_correl: raise AssertionError(report) return True