Skip to content

Commit cc5b178

Browse files
committed
fix!: use pandas.NaT for missing values in dbdate and dbtime dtypes
This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0
1 parent e9d41d1 commit cc5b178

File tree

5 files changed

+119
-53
lines changed

5 files changed

+119
-53
lines changed

db_dtypes/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ def _datetime(
145145
raise TypeError("Invalid value type", scalar)
146146

147147
def _box_func(self, x):
148-
if pandas.isnull(x):
149-
return None
148+
if pandas.isna(x):
149+
return pandas.NaT
150150

151151
try:
152152
return x.astype("<M8[us]").astype(datetime.datetime).time()
@@ -251,7 +251,7 @@ def _datetime(
251251

252252
def _box_func(self, x):
253253
if pandas.isnull(x):
254-
return None
254+
return pandas.NaT
255255
try:
256256
return x.astype("<M8[us]").astype(datetime.datetime).date()
257257
except AttributeError:

db_dtypes/core.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
import numpy
1818
import pandas
19-
from pandas import NaT
2019
import pandas.api.extensions
2120
from pandas.api.types import is_dtype_equal, is_list_like, pandas_dtype
2221

@@ -27,8 +26,8 @@
2726

2827

2928
class BaseDatetimeDtype(pandas.api.extensions.ExtensionDtype):
30-
na_value = NaT
31-
kind = "o"
29+
na_value = pandas.NaT
30+
kind = "O"
3231
names = None
3332

3433
@classmethod

tests/unit/test_date.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
# To register the types.
2121
import db_dtypes # noqa
22+
from db_dtypes import pandas_backports
2223

2324

2425
@pytest.mark.parametrize(
@@ -65,3 +66,29 @@ def test_date_parsing(value, expected):
6566
def test_date_parsing_errors(value, error):
6667
with pytest.raises(ValueError, match=error):
6768
pandas.Series([value], dtype="dbdate")
69+
70+
71+
@pytest.mark.skipif(
72+
not hasattr(pandas_backports, "numpy_validate_median"),
73+
reason="median not available with this version of pandas",
74+
)
75+
@pytest.mark.parametrize(
76+
"values, expected",
77+
[
78+
(["1970-01-01", "1900-01-01", "2000-01-01"], datetime.date(1970, 1, 1)),
79+
(
80+
[
81+
None,
82+
"1900-01-01",
83+
pandas.NA if hasattr(pandas, "NA") else None,
84+
pandas.NaT,
85+
float("nan"),
86+
],
87+
datetime.date(1900, 1, 1),
88+
),
89+
(["2222-02-01", "2222-02-03"], datetime.date(2222, 2, 2)),
90+
],
91+
)
92+
def test_date_median(values, expected):
93+
series = pandas.Series(values, dtype="dbdate")
94+
assert series.median() == expected

tests/unit/test_dtypes.py

Lines changed: 57 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
pandas_release = packaging.version.parse(pd.__version__).release
2424

2525
SAMPLE_RAW_VALUES = dict(
26-
dbdate=(datetime.date(2021, 2, 2), "2021-2-3", None),
27-
dbtime=(datetime.time(1, 2, 2), "1:2:3.5", None),
26+
dbdate=(datetime.date(2021, 2, 2), "2021-2-3", pd.NaT),
27+
dbtime=(datetime.time(1, 2, 2), "1:2:3.5", pd.NaT),
2828
)
2929
SAMPLE_VALUES = dict(
3030
dbdate=(
@@ -90,19 +90,19 @@ def test_array_construction(dtype, factory_method):
9090
factory = getattr(factory, factory_method)
9191
if factory_method == "_from_sequence_of_strings":
9292
sample_raw_values = [
93-
str(v) if v is not None else v for v in sample_raw_values
93+
str(v) if not pd.isna(v) else v for v in sample_raw_values
9494
]
9595
a = factory(sample_raw_values)
9696
assert len(a) == 3
9797
assert a.size == 3
9898
assert a.shape == (3,)
9999
sample_values = SAMPLE_VALUES[dtype]
100100
assert a[0], a[1] == sample_values[:2]
101-
assert a[2] is None
101+
assert pd.isna(a[2]) and a[2] is pd.NaT
102102

103103
# implementation details:
104104
assert a.nbytes == 24
105-
assert np.array_equal(
105+
np.testing.assert_array_equal(
106106
a._ndarray
107107
== np.array(SAMPLE_DT_VALUES[dtype][:2] + ("NaT",), dtype="datetime64[us]"),
108108
[True, True, False],
@@ -121,7 +121,7 @@ def test_time_series_construction(dtype):
121121
s = pd.Series(SAMPLE_RAW_VALUES[dtype], dtype=dtype)
122122
assert len(s) == 3
123123
assert s[0], s[1] == sample_values[:2]
124-
assert s[2] is None
124+
assert s[2] is pd.NaT
125125
assert s.nbytes == 24
126126
assert isinstance(s.array, _cls(dtype))
127127

@@ -166,8 +166,8 @@ def test_timearray_comparisons(
166166
# Note that the right_obs comparisons work because
167167
# they're called on right_obs rather then left, because
168168
# TimeArrays only support comparisons with TimeArrays.
169-
assert np.array_equal(comparisons[op](left, r), expected)
170-
assert np.array_equal(complements[op](left, r), ~expected)
169+
np.testing.assert_array_equal(comparisons[op](left, r), expected)
170+
np.testing.assert_array_equal(complements[op](left, r), ~expected)
171171

172172
# Bad shape
173173
for bad_shape in ([], [1, 2, 3]):
@@ -186,10 +186,10 @@ def test_timearray_comparisons(
186186
[1], # a single-element array gets broadcast
187187
):
188188
if op == "==":
189-
assert np.array_equal(
189+
np.testing.assert_array_equal(
190190
comparisons[op](left, np.array(bad_items)), np.array([False, False])
191191
)
192-
assert np.array_equal(
192+
np.testing.assert_array_equal(
193193
complements[op](left, np.array(bad_items)), np.array([True, True])
194194
)
195195
else:
@@ -204,7 +204,7 @@ def test_timearray_comparisons(
204204
def test___getitem___arrayindex(dtype):
205205
cls = _cls(dtype)
206206
sample_values = SAMPLE_VALUES[dtype]
207-
assert np.array_equal(
207+
np.testing.assert_array_equal(
208208
cls(sample_values)[[1, 3]], cls([sample_values[1], sample_values[3]]),
209209
)
210210

@@ -215,21 +215,23 @@ def test_timearray_slicing(dtype):
215215
b = a[:]
216216
assert b is not a
217217
assert b.__class__ == a.__class__
218-
assert np.array_equal(b, a)
218+
np.testing.assert_array_equal(b._ndarray, a._ndarray)
219219

220220
sample_values = SAMPLE_VALUES[dtype]
221221
cls = _cls(dtype)
222-
assert np.array_equal(a[:1], cls._from_sequence(sample_values[:1]))
222+
np.testing.assert_array_equal(
223+
a[:1]._ndarray, cls._from_sequence(sample_values[:1])._ndarray
224+
)
223225

224226
# Assignment works:
225227
a[:1] = cls._from_sequence([sample_values[2]])
226-
assert np.array_equal(
228+
np.testing.assert_array_equal(
227229
a[:2], cls._from_sequence([sample_values[2], sample_values[1]])
228230
)
229231

230232
# Series also work:
231233
s = pd.Series(SAMPLE_RAW_VALUES[dtype], dtype=dtype)
232-
assert np.array_equal(s[:1].array, cls._from_sequence([sample_values[0]]))
234+
np.testing.assert_array_equal(s[:1].array, cls._from_sequence([sample_values[0]]))
233235

234236

235237
@for_date_and_time
@@ -238,9 +240,13 @@ def test_item_assignment(dtype):
238240
sample_values = SAMPLE_VALUES[dtype]
239241
cls = _cls(dtype)
240242
a[0] = sample_values[2]
241-
assert np.array_equal(a, cls._from_sequence([sample_values[2], sample_values[1]]))
243+
np.testing.assert_array_equal(
244+
a, cls._from_sequence([sample_values[2], sample_values[1]])
245+
)
242246
a[1] = None
243-
assert np.array_equal(a, cls._from_sequence([sample_values[2], None]))
247+
np.testing.assert_array_equal(
248+
a._ndarray, cls._from_sequence([sample_values[2], None])._ndarray
249+
)
244250

245251

246252
@for_date_and_time
@@ -249,9 +255,9 @@ def test_array_assignment(dtype):
249255
cls = _cls(dtype)
250256
sample_values = SAMPLE_VALUES[dtype]
251257
a[a.isna()] = sample_values[3]
252-
assert np.array_equal(a, cls([sample_values[i] for i in (0, 1, 3)]))
258+
np.testing.assert_array_equal(a, cls([sample_values[i] for i in (0, 1, 3)]))
253259
a[[0, 2]] = sample_values[2]
254-
assert np.array_equal(a, cls([sample_values[i] for i in (2, 1, 2)]))
260+
np.testing.assert_array_equal(a, cls([sample_values[i] for i in (2, 1, 2)]))
255261

256262

257263
@for_date_and_time
@@ -270,7 +276,7 @@ def test_copy(dtype):
270276
b = a.copy()
271277
assert b is not a
272278
assert b._ndarray is not a._ndarray
273-
assert np.array_equal(b, a)
279+
np.testing.assert_array_equal(b, a)
274280

275281

276282
@for_date_and_time
@@ -280,7 +286,7 @@ def test_from_ndarray_copy(dtype):
280286
a = cls._from_sequence(sample_values)
281287
b = cls(a._ndarray, copy=True)
282288
assert b._ndarray is not a._ndarray
283-
assert np.array_equal(b, a)
289+
np.testing.assert_array_equal(b, a)
284290

285291

286292
@for_date_and_time
@@ -310,7 +316,7 @@ def test__validate_scalar_invalid(dtype):
310316
[
311317
(False, None),
312318
(True, None),
313-
(True, pd._libs.NaT if pd else None),
319+
(True, pd.NaT if pd else None),
314320
(True, np.NaN if pd else None),
315321
(True, 42),
316322
],
@@ -326,7 +332,7 @@ def test_take(dtype, allow_fill, fill_value):
326332
else datetime.time(0, 42, 42, 424242)
327333
)
328334
else:
329-
expected_fill = None
335+
expected_fill = pd.NaT
330336
b = a.take([1, -1, 3], allow_fill=True, fill_value=fill_value)
331337
expect = [sample_values[1], expected_fill, sample_values[3]]
332338
else:
@@ -370,7 +376,7 @@ def test__concat_same_type_not_same_type(dtype):
370376

371377
@for_date_and_time
372378
def test_dropna(dtype):
373-
assert np.array_equal(_make_one(dtype).dropna(), _make_one(dtype)[:2])
379+
np.testing.assert_array_equal(_make_one(dtype).dropna(), _make_one(dtype)[:2])
374380

375381

376382
@pytest.mark.parametrize(
@@ -398,14 +404,18 @@ def test_fillna(dtype, value, meth, limit, expect):
398404
elif value is not None:
399405
value = sample_values[value]
400406
expect = cls([None if i is None else sample_values[i] for i in expect])
401-
assert np.array_equal(a.fillna(value, meth, limit), expect)
407+
np.testing.assert_array_equal(
408+
a.fillna(value, meth, limit)._ndarray, expect._ndarray
409+
)
402410

403411

404412
@for_date_and_time
405413
def test_unique(dtype):
406414
cls = _cls(dtype)
407415
sample_values = SAMPLE_VALUES[dtype]
408-
assert np.array_equal(cls(sample_values * 3).unique(), cls(sample_values),)
416+
np.testing.assert_array_equal(
417+
cls(sample_values * 3).unique(), cls(sample_values),
418+
)
409419

410420

411421
@for_date_and_time
@@ -421,7 +431,7 @@ def test_astype_copy(dtype):
421431
b = a.astype(a.dtype, copy=True)
422432
assert b is not a
423433
assert b.__class__ is a.__class__
424-
assert np.array_equal(b, a)
434+
np.testing.assert_array_equal(b._ndarray, a._ndarray)
425435

426436

427437
@pytest.mark.parametrize(
@@ -452,7 +462,7 @@ def test_asdatetime(dtype, same):
452462

453463
b = a.astype(dt, copy=copy)
454464
assert b is not a._ndarray
455-
assert np.array_equal(b[:2], a._ndarray[:2])
465+
np.testing.assert_array_equal(b[:2], a._ndarray[:2])
456466
assert pd.isna(b[2]) and str(b[2]) == "NaT"
457467

458468

@@ -482,7 +492,7 @@ def test_astimedelta(dtype):
482492

483493
a = _cls("dbtime")([t, None])
484494
b = a.astype(dtype)
485-
np.array_equal(b[:1], expect)
495+
np.testing.assert_array_equal(b[:1], expect)
486496
assert pd.isna(b[1]) and str(b[1]) == "NaT"
487497

488498

@@ -531,21 +541,21 @@ def test_min_max_median(dtype):
531541
)
532542

533543
empty = cls([])
534-
assert empty.min() is None
535-
assert empty.max() is None
544+
assert empty.min() is pd.NaT
545+
assert empty.max() is pd.NaT
536546
if pandas_release >= (1, 2):
537-
assert empty.median() is None
547+
assert empty.median() is pd.NaT
538548
empty = cls([None])
539-
assert empty.min() is None
540-
assert empty.max() is None
541-
assert empty.min(skipna=False) is None
542-
assert empty.max(skipna=False) is None
549+
assert empty.min() is pd.NaT
550+
assert empty.max() is pd.NaT
551+
assert empty.min(skipna=False) is pd.NaT
552+
assert empty.max(skipna=False) is pd.NaT
543553
if pandas_release >= (1, 2):
544554
with pytest.warns(RuntimeWarning, match="empty slice"):
545555
# It's weird that we get the warning here, and not
546556
# below. :/
547-
assert empty.median() is None
548-
assert empty.median(skipna=False) is None
557+
assert empty.median() is pd.NaT
558+
assert empty.median(skipna=False) is pd.NaT
549559

550560
a = _make_one(dtype)
551561
assert a.min() == sample_values[0]
@@ -563,14 +573,14 @@ def test_date_add():
563573
times = _cls("dbtime")(SAMPLE_VALUES["dbtime"])
564574
expect = dates.astype("datetime64") + times.astype("timedelta64")
565575

566-
assert np.array_equal(dates + times, expect)
567-
assert np.array_equal(times + dates, expect)
576+
np.testing.assert_array_equal(dates + times, expect)
577+
np.testing.assert_array_equal(times + dates, expect)
568578

569579
do = pd.DateOffset(days=1)
570580
expect = dates.astype("object") + do
571-
assert np.array_equal(dates + do, expect)
581+
np.testing.assert_array_equal(dates + do, expect)
572582
if pandas_release >= (1, 1):
573-
assert np.array_equal(do + dates, expect)
583+
np.testing.assert_array_equal(do + dates, expect)
574584

575585
with pytest.raises(TypeError):
576586
dates + times.astype("timedelta64")
@@ -587,8 +597,8 @@ def test_date_add():
587597

588598
do = pd.Series([pd.DateOffset(days=i) for i in range(4)])
589599
expect = dates.astype("object") + do
590-
assert np.array_equal(dates + do, expect)
591-
assert np.array_equal(do + dates, expect)
600+
np.testing.assert_array_equal(dates + do, expect)
601+
np.testing.assert_array_equal(do + dates, expect)
592602

593603

594604
def test_date_sub():
@@ -602,11 +612,11 @@ def test_date_sub():
602612
)
603613
)
604614
expect = dates.astype("datetime64") - dates2.astype("datetime64")
605-
assert np.array_equal(dates - dates2, expect)
615+
np.testing.assert_array_equal(dates - dates2, expect)
606616

607617
do = pd.DateOffset(days=1)
608618
expect = dates.astype("object") - do
609-
assert np.array_equal(dates - do, expect)
619+
np.testing.assert_array_equal(dates - do, expect)
610620

611621
with pytest.raises(TypeError):
612622
dates - 42
@@ -620,4 +630,4 @@ def test_date_sub():
620630

621631
do = pd.Series([pd.DateOffset(days=i) for i in range(4)])
622632
expect = dates.astype("object") - do
623-
assert np.array_equal(dates - do, expect)
633+
np.testing.assert_array_equal(dates - do, expect)

0 commit comments

Comments
 (0)