Source code for schrodinger.project.pandasutils

"""
A module which contains functions to convert between Schrodinger project data
and a Pandas data frame.
"""
import collections
import enum
import re
import typing

import pandas as pd
from rdkit.Chem import PandasTools

from schrodinger import project
from schrodinger.structutils.smiles import SmilesGenerator

WhichRows = enum.Enum('WhichRows', ('SELECTED', 'ALL'))
WhichColumns = enum.Enum('WhichColumns', ('VISIBLE', 'ALL'))


[docs]def get_data_frame_from_project(pt: project.Project, which_rows: WhichRows = WhichRows.ALL, which_columns: WhichColumns = WhichColumns.ALL, prop_filter: typing.Optional[str] = None, with_rdkit: bool = False, with_smiles: bool = False) -> pd.DataFrame: """ Return a Pandas frame given a Schrodinger project object (as might be returned from maestro.get_project_table() :param pt: Project (already open via Maestro or standalone) to convert :param which_rows: Which rows from the project are to be converted (all or selected) :param which_columns: Which columns from the project are to be converted :param prop_filter: A regular expression which, if defined, will restrict the properties to datanames which match this expression :param with_rdkit: A flag which indicates if RdKit MOL objects should be added :return: A Pandas dataframe populated with data from the project """ if which_columns == WhichColumns.ALL: prop_names = pt.getPropertyNames() else: prop_names = pt.getVisiblePropertyNames() if prop_filter: prop_names = [p for p in prop_names if re.match(prop_filter, p)] data_dict = collections.defaultdict(list) sg = SmilesGenerator() rows = pt.all_rows if (which_rows == WhichRows.ALL) else pt.selected_rows for row in rows: for p in prop_names: data_dict[p].append(row.property[p]) if with_rdkit or with_smiles: data_dict["smiles"].append(sg.getSmiles(row.getStructure())) df = pd.DataFrame(data_dict) if with_rdkit: PandasTools.AddMoleculeColumnToFrame(df, "smiles", "RDKit Mol") return df