Skip to content

Commit 42bc2d9

Browse files
fix: avoid TypeError when using sorted search (#84)
* fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies * fix min/max/median for 2D arrays * fixes except for null contains * actually use NaT as 'advertised' * fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 * more progress towards compliance * address errors in TestMethods * move tests * add prerelease deps * fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies * fix owlbot config * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * document why microsecond precision is used * use correct units * add box_func tests * typo * fix: avoid TypeError when using sorted search * add unit tests * fix: dbdate and dbtime support set item * add TestMethods * add unit test for search sorted Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent b771e05 commit 42bc2d9

File tree

5 files changed

+209
-0
lines changed

5 files changed

+209
-0
lines changed

db_dtypes/core.py

+8
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@ def _validate_scalar(self, value):
113113
"""
114114
return self._datetime(value)
115115

116+
def _validate_searchsorted_value(self, value):
117+
"""
118+
Convert a value for use in searching for a value in the backing numpy array.
119+
120+
TODO: With pandas 2.0, this may be unnecessary. https://github.com/pandas-dev/pandas/pull/45544#issuecomment-1052809232
121+
"""
122+
return self._validate_setitem_value(value)
123+
116124
def _validate_setitem_value(self, value):
117125
"""
118126
Convert a value for use in setting a value in the backing numpy array.

tests/compliance/conftest.py

+60
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,28 @@
1616
import pytest
1717

1818

19+
@pytest.fixture(params=[True, False])
20+
def as_frame(request):
21+
"""
22+
Boolean fixture to support Series and Series.to_frame() comparison testing.
23+
24+
See:
25+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
26+
"""
27+
return request.param
28+
29+
30+
@pytest.fixture(params=[True, False])
31+
def as_series(request):
32+
"""
33+
Boolean fixture to support arr and Series(arr) comparison testing.
34+
35+
See:
36+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
37+
"""
38+
return request.param
39+
40+
1941
@pytest.fixture(params=["ffill", "bfill"])
2042
def fillna_method(request):
2143
"""
@@ -28,6 +50,21 @@ def fillna_method(request):
2850
return request.param
2951

3052

53+
@pytest.fixture
54+
def invalid_scalar(data):
55+
"""
56+
A scalar that *cannot* be held by this ExtensionArray.
57+
58+
The default should work for most subclasses, but is not guaranteed.
59+
60+
If the array can hold any item (i.e. object dtype), then use pytest.skip.
61+
62+
See:
63+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
64+
"""
65+
return object.__new__(object)
66+
67+
3168
@pytest.fixture
3269
def na_value():
3370
return pandas.NaT
@@ -51,3 +88,26 @@ def cmp(a, b):
5188
return a is pandas.NaT and a is b
5289

5390
return cmp
91+
92+
93+
@pytest.fixture(params=[None, lambda x: x])
94+
def sort_by_key(request):
95+
"""
96+
Simple fixture for testing keys in sorting methods.
97+
Tests None (no key) and the identity key.
98+
99+
See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py
100+
"""
101+
return request.param
102+
103+
104+
@pytest.fixture(params=[True, False])
105+
def use_numpy(request):
106+
"""
107+
Boolean fixture to support comparison testing of ExtensionDtype array
108+
and numpy array.
109+
110+
See:
111+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
112+
"""
113+
return request.param

tests/compliance/date/conftest.py

+85
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@
2020
from db_dtypes import DateArray, DateDtype
2121

2222

23+
@pytest.fixture(params=["data", "data_missing"])
24+
def all_data(request, data, data_missing):
25+
"""Parametrized fixture giving 'data' and 'data_missing'"""
26+
if request.param == "data":
27+
return data
28+
elif request.param == "data_missing":
29+
return data_missing
30+
31+
2332
@pytest.fixture
2433
def data():
2534
return DateArray(
@@ -32,6 +41,52 @@ def data():
3241
)
3342

3443

44+
@pytest.fixture
45+
def data_for_grouping():
46+
"""
47+
Data for factorization, grouping, and unique tests.
48+
49+
Expected to be like [B, B, NA, NA, A, A, B, C]
50+
51+
Where A < B < C and NA is missing
52+
53+
See:
54+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
55+
"""
56+
return DateArray(
57+
[
58+
datetime.date(1980, 1, 27),
59+
datetime.date(1980, 1, 27),
60+
None,
61+
None,
62+
datetime.date(1969, 12, 30),
63+
datetime.date(1969, 12, 30),
64+
datetime.date(1980, 1, 27),
65+
datetime.date(2022, 3, 18),
66+
]
67+
)
68+
69+
70+
@pytest.fixture
71+
def data_for_sorting():
72+
"""
73+
Length-3 array with a known sort order.
74+
75+
This should be three items [B, C, A] with
76+
A < B < C
77+
78+
See:
79+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
80+
"""
81+
return DateArray(
82+
[
83+
datetime.date(1980, 1, 27),
84+
datetime.date(2022, 3, 18),
85+
datetime.date(1969, 12, 30),
86+
]
87+
)
88+
89+
3590
@pytest.fixture
3691
def data_missing():
3792
"""Length-2 array with [NA, Valid]
@@ -42,6 +97,36 @@ def data_missing():
4297
return DateArray([None, datetime.date(2022, 1, 27)])
4398

4499

100+
@pytest.fixture
101+
def data_missing_for_sorting():
102+
"""
103+
Length-3 array with a known sort order.
104+
105+
This should be three items [B, NA, A] with
106+
A < B and NA missing.
107+
108+
See:
109+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
110+
"""
111+
return DateArray([datetime.date(1980, 1, 27), None, datetime.date(1969, 12, 30)])
112+
113+
114+
@pytest.fixture
115+
def data_repeated(data):
116+
"""
117+
Generate many datasets.
118+
119+
See:
120+
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
121+
"""
122+
123+
def gen(count):
124+
for _ in range(count):
125+
yield data
126+
127+
return gen
128+
129+
45130
@pytest.fixture
46131
def dtype():
47132
return DateDtype()

tests/compliance/date/test_date_compliance.py

+29
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py
2121
"""
2222

23+
import pandas
2324
from pandas.tests.extension import base
25+
import pytest
26+
27+
import db_dtypes
2428

2529

2630
class TestDtype(base.BaseDtypeTests):
@@ -45,3 +49,28 @@ class TestGetitem(base.BaseGetitemTests):
4549

4650
class TestMissing(base.BaseMissingTests):
4751
pass
52+
53+
54+
# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add
55+
# compliance tests for reduction operations.
56+
57+
58+
class TestMethods(base.BaseMethodsTests):
59+
def test_combine_add(self):
60+
pytest.skip("Cannot add dates.")
61+
62+
@pytest.mark.parametrize("dropna", [True, False])
63+
def test_value_counts(self, all_data, dropna):
64+
all_data = all_data[:10]
65+
if dropna:
66+
# Overridden from
67+
# https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/methods.py
68+
# to avoid difference in dtypes.
69+
other = db_dtypes.DateArray(all_data[~all_data.isna()])
70+
else:
71+
other = all_data
72+
73+
result = pandas.Series(all_data).value_counts(dropna=dropna).sort_index()
74+
expected = pandas.Series(other).value_counts(dropna=dropna).sort_index()
75+
76+
self.assert_series_equal(result, expected)

tests/unit/test_date.py

+27
Original file line numberDiff line numberDiff line change
@@ -328,3 +328,30 @@ def test_date_median_2d():
328328
)
329329
),
330330
)
331+
332+
333+
@pytest.mark.parametrize(
334+
("search_term", "expected_index"),
335+
(
336+
(datetime.date(1899, 12, 31), 0),
337+
(datetime.date(1900, 1, 1), 0),
338+
(datetime.date(1920, 2, 2), 1),
339+
(datetime.date(1930, 3, 3), 1),
340+
(datetime.date(1950, 5, 5), 2),
341+
(datetime.date(1990, 9, 9), 3),
342+
(datetime.date(2012, 12, 12), 3),
343+
(datetime.date(2022, 3, 24), 4),
344+
),
345+
)
346+
def test_date_searchsorted(search_term, expected_index):
347+
test_series = pandas.Series(
348+
[
349+
datetime.date(1900, 1, 1),
350+
datetime.date(1930, 3, 3),
351+
datetime.date(1980, 8, 8),
352+
datetime.date(2012, 12, 12),
353+
],
354+
dtype="dbdate",
355+
)
356+
got = test_series.searchsorted(search_term)
357+
assert got == expected_index

0 commit comments

Comments
 (0)