diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py index 00cf5b12..654328aa 100644 --- a/protocol/dataframe_protocol.py +++ b/protocol/dataframe_protocol.py @@ -285,6 +285,13 @@ def null_count(self) -> Optional[int]: """ pass + @property + def metadata(self) -> Dict[str, Any]: + """ + The metadata for the column. See `DataFrame.metadata` for more details. + """ + pass + def num_chunks(self) -> int: """ Return the number of chunks the column consists of. @@ -350,6 +357,19 @@ def __dataframe__(self, nan_as_null : bool = False) -> dict: "version": 0 # Version number of the protocol } + @property + def metadata(self) -> Dict[str, Any]: + """ + The metadata for the data frame, as a dictionary with string keys. The + contents of `metadata` may be anything, they are meant for a library + to store information that it needs to, e.g., roundtrip losslessly or + for two implementations to share data that is not (yet) part of the + interchange protocol specification. For avoiding collisions with other + entries, please add name the keys with the name of the library + followed by a period and the desired name, e.g, ``pandas.indexcol``. + """ + pass + def num_columns(self) -> int: """ Return the number of columns in the DataFrame @@ -417,4 +437,3 @@ def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable[DataFrame]: before yielding it. """ pass - diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py index e3e3e62e..4c6e0e1e 100644 --- a/protocol/pandas_implementation.py +++ b/protocol/pandas_implementation.py @@ -426,6 +426,13 @@ def null_count(self) -> int: """ return self._col.isna().sum() + @property + def metadata(self) -> Dict[str, Any]: + """ + Store specific metadata of the column. + """ + return {} + def num_chunks(self) -> int: """ Return the number of chunks the column consists of. @@ -495,6 +502,10 @@ def __init__(self, df : pd.DataFrame, nan_as_null : bool = False) -> None: # dtypes is added, this value should be propagated to columns. self._nan_as_null = nan_as_null + @property + def metadata(self): + return {"pandas.index": self._df.index} + def num_columns(self) -> int: return len(self._df.columns) @@ -578,9 +589,28 @@ def test_categorical_dtype(): tm.assert_frame_equal(df, df2) +def test_metadata(): + df = pd.DataFrame({'A': [1, 2, 3, 4],'B': [1, 2, 3, 4]}) + + # Check the metadata from the dataframe + df_metadata = df.__dataframe__().metadata + expected = {"pandas.index": df.index} + for key in df_metadata: + assert all(df_metadata[key] == expected[key]) + + # Check the metadata from the column + col_metadata = df.__dataframe__().get_column(0).metadata + expected = {} + for key in col_metadata: + assert col_metadata[key] == expected[key] + + df2 = from_dataframe(df) + tm.assert_frame_equal(df, df2) + + if __name__ == '__main__': test_categorical_dtype() test_float_only() test_mixed_intfloat() test_noncontiguous_columns() - + test_metadata()