diff --git a/protocol/pandas_implementation.py b/protocol/pandas_implementation.py index 786bfbd2..344cb107 100644 --- a/protocol/pandas_implementation.py +++ b/protocol/pandas_implementation.py @@ -765,9 +765,43 @@ def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable['_PandasDataFr # Roundtrip testing # ----------------- +def assert_buffer_equal(buffer_dtype: Tuple[_PandasBuffer, Any], pdcol:pd.Series): + buf, dtype = buffer_dtype + pytest.raises(NotImplementedError, buf.__dlpack__) + assert buf.__dlpack_device__() == (1, None) + # It seems that `bitwidth` is handled differently for `int` and `category` + # assert dtype[1] == pdcol.dtype.itemsize * 8, f"{dtype[1]} is not {pdcol.dtype.itemsize}" + # print(pdcol) + # if isinstance(pdcol, pd.CategoricalDtype): + # col = pdcol.values.codes + # else: + # col = pdcol + + # assert dtype[1] == col.dtype.itemsize * 8, f"{dtype[1]} is not {col.dtype.itemsize * 8}" + # assert dtype[2] == col.dtype.str, f"{dtype[2]} is not {col.dtype.str}" + + +def assert_column_equal(col: _PandasColumn, pdcol:pd.Series): + assert col.size == pdcol.size + assert col.offset == 0 + assert col.null_count == pdcol.isnull().sum() + assert col.num_chunks() == 1 + if col.dtype[0] != _DtypeKind.STRING: + pytest.raises(RuntimeError, col._get_validity_buffer) + assert_buffer_equal(col._get_data_buffer(), pdcol) + +def assert_dataframe_equal(dfo: DataFrameObject, df:pd.DataFrame): + assert dfo.num_columns() == len(df.columns) + assert dfo.num_rows() == len(df) + assert dfo.num_chunks() == 1 + assert dfo.column_names() == list(df.columns) + for col in df.columns: + assert_column_equal(dfo.get_column_by_name(col), df[col]) + def test_float_only(): df = pd.DataFrame(data=dict(a=[1.5, 2.5, 3.5], b=[9.2, 10.5, 11.8])) df2 = from_dataframe(df) + assert_dataframe_equal(df.__dataframe__(), df) tm.assert_frame_equal(df, df2) @@ -775,6 +809,7 @@ def test_mixed_intfloat(): df = pd.DataFrame(data=dict(a=[1, 2, 3], b=[3, 4, 5], c=[1.5, 2.5, 3.5], d=[9, 10, 11])) df2 = from_dataframe(df) + assert_dataframe_equal(df.__dataframe__(), df) tm.assert_frame_equal(df, df2) @@ -783,6 +818,7 @@ def test_noncontiguous_columns(): df = pd.DataFrame(arr, columns=['a', 'b', 'c']) assert df['a'].to_numpy().strides == (24,) df2 = from_dataframe(df) # uses default of allow_copy=True + assert_dataframe_equal(df.__dataframe__(), df) tm.assert_frame_equal(df, df2) with pytest.raises(RuntimeError): @@ -803,6 +839,7 @@ def test_categorical_dtype(): assert col.describe_categorical == (False, True, {0: 1, 1: 2, 2: 5}) df2 = from_dataframe(df) + assert_dataframe_equal(df.__dataframe__(), df) tm.assert_frame_equal(df, df2) @@ -818,6 +855,8 @@ def test_string_dtype(): assert col.describe_null == (4, 0) assert col.num_chunks() == 1 + assert_dataframe_equal(df.__dataframe__(), df) + def test_metadata(): df = pd.DataFrame({'A': [1, 2, 3, 4],'B': [1, 2, 3, 4]}) @@ -834,6 +873,7 @@ def test_metadata(): assert col_metadata[key] == expected[key] df2 = from_dataframe(df) + assert_dataframe_equal(df.__dataframe__(), df) tm.assert_frame_equal(df, df2)