|
1 |
| -""" |
2 |
| -Test metadata propagation in groupby |
| 1 | +from pandas import DataFrame |
3 | 2 |
|
4 |
| -The PandasTable class below is implemented according to the [guidelines], |
5 |
| -and as such would expect `__finalize__` to always be called so that the |
6 |
| -`_pandastable_metadata` is always populated. |
7 | 3 |
|
8 |
| -[guidelines]: https://pandas.pydata.org/pandas-docs/stable/development/extending.html#override-constructor-properties # noqa |
9 |
| -""" |
10 |
| - |
11 |
| -from typing import List |
12 |
| -from warnings import warn |
13 |
| - |
14 |
| -import pytest |
15 |
| - |
16 |
| -import pandas as pd |
17 |
| - |
18 |
| -_TABLE_METADATA_FIELD_NAME = "_pandastable_metadata" |
19 |
| - |
20 |
| - |
21 |
| -def _combine_metadata(data: List[str]) -> str: |
22 |
| - """ |
23 |
| - A mock implementation for testing |
| 4 | +class CustomDataFrame(DataFrame): |
24 | 5 | """
|
25 |
| - return "+".join(data) |
26 |
| - |
| 6 | + Extension of DataFrame as described in [guidelines] |
27 | 7 |
|
28 |
| -class PandasTable(pd.DataFrame): |
29 |
| - """ |
30 |
| - A pandas dataframe subclass with associated table metadata. |
| 8 | + [guidelines]: https://pandas.pydata.org/pandas-docs/stable/development/extending.html#override-constructor-properties # noqa |
31 | 9 | """
|
32 | 10 |
|
33 |
| - _metadata = [_TABLE_METADATA_FIELD_NAME] # Register metadata fieldnames here |
| 11 | + _metadata = ["_custom_metadata"] |
34 | 12 |
|
35 | 13 | @property
|
36 | 14 | def _constructor(self):
|
37 |
| - return PandasTable |
38 |
| - |
39 |
| - def __finalize__(self, other, method=None, **kwargs): |
40 |
| - """ |
41 |
| - This method will be called after constructor to populate metadata |
42 |
| -
|
43 |
| - The "method" argument is subject to change and must be handled robustly. |
44 |
| - """ |
45 |
| - src = [other] # more logic here in actual implementation |
46 |
| - metadata = _combine_metadata( |
47 |
| - [d.get_metadata() for d in src if isinstance(d, PandasTable)] |
48 |
| - ) |
49 |
| - |
50 |
| - if not metadata: |
51 |
| - warn( |
52 |
| - '__finalize__ unable to combine metadata for method "{method}", ' |
53 |
| - "falling back to DataFrame" |
54 |
| - ) |
55 |
| - return pd.DataFrame(self) |
56 |
| - object.__setattr__(self, _TABLE_METADATA_FIELD_NAME, metadata) |
57 |
| - return self |
58 |
| - |
59 |
| - def get_metadata(self): |
60 |
| - metadata = getattr(self, _TABLE_METADATA_FIELD_NAME, None) |
61 |
| - if metadata is None: |
62 |
| - warn("PandasTable object not correctly initialized: no metadata") |
63 |
| - return metadata |
64 |
| - |
65 |
| - @staticmethod |
66 |
| - def from_table_data(df: pd.DataFrame, metadata) -> "PandasTable": |
67 |
| - df = PandasTable(df) |
68 |
| - object.__setattr__(df, _TABLE_METADATA_FIELD_NAME, metadata) |
69 |
| - return df |
70 |
| - |
71 |
| - |
72 |
| -@pytest.fixture |
73 |
| -def dft(): |
74 |
| - df = pd.DataFrame([[11, 12, 0], [21, 22, 0], [31, 32, 1]], columns={"a", "b", "g"}) |
75 |
| - return PandasTable.from_table_data(df, "My metadata") |
76 |
| - |
77 |
| - |
78 |
| -def test_initial_metadata(dft): |
79 |
| - assert dft.get_metadata() == "My metadata" |
80 |
| - |
| 15 | + return CustomDataFrame |
81 | 16 |
|
82 |
| -def test_basic_propagation(dft): |
83 |
| - gg = dft.loc[dft.g == 0, :] |
84 |
| - assert gg.get_metadata() == "My metadata" |
85 | 17 |
|
| 18 | +def test_groupby_with_custom_metadata(): |
| 19 | + custom_df = CustomDataFrame( |
| 20 | + [[11, 12, 0], [21, 22, 0], [31, 32, 1]], columns=["a", "b", "g"] |
| 21 | + ) |
| 22 | + custom_df._custom_metadata = "Custom metadata" |
86 | 23 |
|
87 |
| -def test_groupby(dft): |
88 |
| - gg = [ab for g, ab in dft.groupby("g")] |
89 |
| - assert gg[0].get_metadata() is not None |
| 24 | + for _, group_df in custom_df.groupby("g"): |
| 25 | + assert group_df._custom_metadata == "Custom metadata" |
0 commit comments