|
| 1 | +""" |
| 2 | +Test metadata propagation in groupby |
| 3 | +
|
| 4 | +The PandasTable class below is implemented according to the [guidelines], and as such would |
| 5 | +expect `__finalize__` to always be called so that the `_pandastable_metadata` is always populated. |
| 6 | +
|
| 7 | +[guidelines]: https://pandas.pydata.org/pandas-docs/stable/development/extending.html#override-constructor-properties |
| 8 | +""" |
| 9 | + |
| 10 | +import pytest |
| 11 | +import pandas as pd |
| 12 | +from warnings import warn |
| 13 | +from typing import List |
| 14 | + |
| 15 | + |
| 16 | +_TABLE_METADATA_FIELD_NAME = '_pandastable_metadata' |
| 17 | + |
| 18 | + |
| 19 | +def _combine_metadata(data: List[str]) -> str: |
| 20 | + """ |
| 21 | + A mock implementation for testing |
| 22 | + """ |
| 23 | + return '+'.join(data) |
| 24 | + |
| 25 | + |
| 26 | +class PandasTable(pd.DataFrame): |
| 27 | + """ |
| 28 | + A pandas dataframe subclass with associated table metadata. |
| 29 | + """ |
| 30 | + |
| 31 | + _metadata = [_TABLE_METADATA_FIELD_NAME] # Register metadata fieldnames here |
| 32 | + |
| 33 | + @property |
| 34 | + def _constructor(self): |
| 35 | + return PandasTable |
| 36 | + |
| 37 | + def __finalize__(self, other, method=None, **kwargs): |
| 38 | + """ |
| 39 | + This method is responsible for populating metadata when creating new Table-object. |
| 40 | +
|
| 41 | + The method argument is subject to change, and a robust handling of this is implemented |
| 42 | + """ |
| 43 | + src = [other] #more logic here in actual implementation |
| 44 | + metadata = _combine_metadata([d.get_metadata() for d in src if isinstance(d, PandasTable)]) |
| 45 | + |
| 46 | + if not metadata: |
| 47 | + warn('__finalize__ unable to combine metadata for method "{method}", falling back to DataFrame') |
| 48 | + return pd.DataFrame(self) |
| 49 | + object.__setattr__(self, _TABLE_METADATA_FIELD_NAME, metadata) |
| 50 | + return self |
| 51 | + |
| 52 | + def get_metadata(self): |
| 53 | + #return object.__getattribute__(self, _TABLE_METADATA_FIELD_NAME) |
| 54 | + metadata = getattr(self, _TABLE_METADATA_FIELD_NAME, None) |
| 55 | + if metadata is None: |
| 56 | + warn('PandasTable object not correctly initialized: no metadata') |
| 57 | + return metadata |
| 58 | + |
| 59 | + @staticmethod |
| 60 | + def from_table_data(df: pd.DataFrame, metadata) -> 'PandasTable': |
| 61 | + df = PandasTable(df) |
| 62 | + object.__setattr__(df, _TABLE_METADATA_FIELD_NAME, metadata) |
| 63 | + return df |
| 64 | + |
| 65 | + |
| 66 | +@pytest.fixture |
| 67 | +def dft(): |
| 68 | + df = pd.DataFrame([[11, 12, 0], [21, 22, 0], [31, 32, 1]], columns={'a','b','g'}) |
| 69 | + return PandasTable.from_table_data(df, 'My metadata') |
| 70 | + |
| 71 | + |
| 72 | +def test_initial_metadata(dft): |
| 73 | + assert dft.get_metadata() == 'My metadata' |
| 74 | + |
| 75 | + |
| 76 | +def test_basic_propagation(dft): |
| 77 | + gg = dft.loc[dft.g==0, :] |
| 78 | + assert gg.get_metadata() == 'My metadata' |
| 79 | + |
| 80 | + |
| 81 | +def test_groupby(dft): |
| 82 | + gg = [ab for g, ab in dft.groupby('g')] |
| 83 | + assert gg[0].get_metadata() is not None |
0 commit comments