forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_custom_metadata.py
89 lines (64 loc) · 2.5 KB
/
test_custom_metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
Test metadata propagation in groupby
The PandasTable class below is implemented according to the [guidelines],
and as such would expect `__finalize__` to always be called so that the
`_pandastable_metadata` is always populated.
[guidelines]: https://pandas.pydata.org/pandas-docs/stable/development/extending.html#override-constructor-properties # noqa
"""
from typing import List
from warnings import warn
import pytest
import pandas as pd
_TABLE_METADATA_FIELD_NAME = "_pandastable_metadata"
def _combine_metadata(data: List[str]) -> str:
"""
A mock implementation for testing
"""
return "+".join(data)
class PandasTable(pd.DataFrame):
"""
A pandas dataframe subclass with associated table metadata.
"""
_metadata = [_TABLE_METADATA_FIELD_NAME] # Register metadata fieldnames here
@property
def _constructor(self):
return PandasTable
def __finalize__(self, other, method=None, **kwargs):
"""
This method will be called after constructor to populate metadata
The "method" argument is subject to change and must be handled robustly.
"""
src = [other] # more logic here in actual implementation
metadata = _combine_metadata(
[d.get_metadata() for d in src if isinstance(d, PandasTable)]
)
if not metadata:
warn(
'__finalize__ unable to combine metadata for method "{method}", '
"falling back to DataFrame"
)
return pd.DataFrame(self)
object.__setattr__(self, _TABLE_METADATA_FIELD_NAME, metadata)
return self
def get_metadata(self):
metadata = getattr(self, _TABLE_METADATA_FIELD_NAME, None)
if metadata is None:
warn("PandasTable object not correctly initialized: no metadata")
return metadata
@staticmethod
def from_table_data(df: pd.DataFrame, metadata) -> "PandasTable":
df = PandasTable(df)
object.__setattr__(df, _TABLE_METADATA_FIELD_NAME, metadata)
return df
@pytest.fixture
def dft():
df = pd.DataFrame([[11, 12, 0], [21, 22, 0], [31, 32, 1]], columns={"a", "b", "g"})
return PandasTable.from_table_data(df, "My metadata")
def test_initial_metadata(dft):
assert dft.get_metadata() == "My metadata"
def test_basic_propagation(dft):
gg = dft.loc[dft.g == 0, :]
assert gg.get_metadata() == "My metadata"
def test_groupby(dft):
gg = [ab for g, ab in dft.groupby("g")]
assert gg[0].get_metadata() is not None