Skip to content

Commit c65ff18

Browse files
author
Jim Fulton
authored
test: No warnings (#9)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-db-dtypes-pandas/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #2 🦕 This builds-on/includes #8, which builds-on/includes #7.
1 parent e996883 commit c65ff18

File tree

7 files changed

+136
-23
lines changed

7 files changed

+136
-23
lines changed

db_dtypes/__init__.py

+42-7
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
"""
1717

1818
import datetime
19+
import re
1920

2021
import numpy
22+
import packaging.version
2123
import pandas
2224
import pandas.compat.numpy.function
2325
import pandas.core.algorithms
@@ -35,6 +37,8 @@
3537
date_dtype_name = "date"
3638
time_dtype_name = "time"
3739

40+
pandas_release = packaging.version.parse(pandas.__version__).release
41+
3842

3943
@pandas.core.dtypes.dtypes.register_extension_dtype
4044
class TimeDtype(core.BaseDatetimeDtype):
@@ -61,15 +65,33 @@ class TimeArray(core.BaseDatetimeArray):
6165
_npepoch = numpy.datetime64(_epoch)
6266

6367
@classmethod
64-
def _datetime(cls, scalar):
68+
def _datetime(
69+
cls,
70+
scalar,
71+
match_fn=re.compile(
72+
r"\s*(?P<hour>\d+)(?::(?P<minute>\d+)(?::(?P<second>\d+(?:[.]\d+)?)?)?)?\s*$"
73+
).match,
74+
):
6575
if isinstance(scalar, datetime.time):
6676
return datetime.datetime.combine(cls._epoch, scalar)
6777
elif isinstance(scalar, str):
6878
# iso string
69-
h, m, s = map(float, scalar.split(":"))
70-
s, us = divmod(s, 1)
79+
match = match_fn(scalar)
80+
if not match:
81+
raise ValueError(f"Bad time string: {repr(scalar)}")
82+
83+
hour = match.group("hour")
84+
minute = match.group("minute")
85+
second = match.group("second")
86+
second, microsecond = divmod(float(second if second else 0), 1)
7187
return datetime.datetime(
72-
1970, 1, 1, int(h), int(m), int(s), int(us * 1000000)
88+
1970,
89+
1,
90+
1,
91+
int(hour),
92+
int(minute if minute else 0),
93+
int(second),
94+
int(microsecond * 1_000_000),
7395
)
7496
else:
7597
raise TypeError("Invalid value type", scalar)
@@ -96,6 +118,11 @@ def astype(self, dtype, copy=True):
96118
else:
97119
return super().astype(dtype, copy=copy)
98120

121+
if pandas_release < (1,):
122+
123+
def to_numpy(self, dtype="object"):
124+
return self.astype(dtype)
125+
99126
def __arrow_array__(self, type=None):
100127
return pyarrow.array(
101128
self.to_numpy(), type=type if type is not None else pyarrow.time64("ns"),
@@ -125,12 +152,20 @@ class DateArray(core.BaseDatetimeArray):
125152
dtype = DateDtype()
126153

127154
@staticmethod
128-
def _datetime(scalar):
155+
def _datetime(
156+
scalar,
157+
match_fn=re.compile(r"\s*(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+)\s*$").match,
158+
):
129159
if isinstance(scalar, datetime.date):
130160
return datetime.datetime(scalar.year, scalar.month, scalar.day)
131161
elif isinstance(scalar, str):
132-
# iso string
133-
return datetime.datetime(*map(int, scalar.split("-")))
162+
match = match_fn(scalar)
163+
if not match:
164+
raise ValueError(f"Bad date string: {repr(scalar)}")
165+
year = int(match.group("year"))
166+
month = int(match.group("month"))
167+
day = int(match.group("day"))
168+
return datetime.datetime(year, month, day)
134169
else:
135170
raise TypeError("Invalid value type", scalar)
136171

db_dtypes/core.py

+6
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ def astype(self, dtype, copy=True):
8484
return super().astype(dtype, copy=copy)
8585

8686
def _cmp_method(self, other, op):
87+
oshape = getattr(other, "shape", None)
88+
if oshape != self.shape and oshape != (1,) and self.shape != (1,):
89+
raise TypeError(
90+
"Can't compare arrays with different shapes", self.shape, oshape
91+
)
92+
8793
if type(other) != type(self):
8894
return NotImplemented
8995
return op(self._ndarray, other._ndarray)

db_dtypes/pandas_backports.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,17 @@
3131

3232

3333
def import_default(module_name, force=False, default=None):
34+
"""
35+
Provide an implementation for a class or function when it can't be imported
36+
37+
or when force is True.
38+
39+
This is used to replicate Pandas APIs that are missing or insufficient
40+
(thus the force option) in early pandas versions.
41+
"""
42+
3443
if default is None:
35-
return lambda func: import_default(module_name, force, func)
44+
return lambda func_or_class: import_default(module_name, force, func_or_class)
3645

3746
if force:
3847
return default

testing/constraints-3.6.txt

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#
66
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
77
packaging==17.0
8+
# Make sure we test with pandas 0.24.2. The Python version isn't that relevant.
89
pandas==0.24.2
910
pyarrow==3.0.0
1011
numpy==1.16.6

testing/constraints-3.7.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
# Make sure we test with pandas 1.1.0. The Python version isn't that relevant.
12
pandas==1.1.0

testing/constraints-3.8.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
# Make sure we test with pandas 1.2.0. The Python version isn't that relevant.
12
pandas==1.2.0

tests/unit/test_dtypes.py

+75-15
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import datetime
1616

1717
import packaging.version
18+
import pyarrow.lib
1819
import pytest
1920

2021
pd = pytest.importorskip("pandas")
@@ -171,18 +172,14 @@ def test_timearray_comparisons(
171172

172173
# Bad shape
173174
for bad_shape in ([], [1, 2, 3]):
174-
if op == "==":
175-
assert not comparisons[op](left, np.array(bad_shape))
176-
assert complements[op](left, np.array(bad_shape))
177-
else:
178-
with pytest.raises(
179-
ValueError, match="operands could not be broadcast together",
180-
):
181-
comparisons[op](left, np.array(bad_shape))
182-
with pytest.raises(
183-
ValueError, match="operands could not be broadcast together",
184-
):
185-
complements[op](left, np.array(bad_shape))
175+
with pytest.raises(
176+
TypeError, match="Can't compare arrays with different shapes"
177+
):
178+
comparisons[op](left, np.array(bad_shape))
179+
with pytest.raises(
180+
TypeError, match="Can't compare arrays with different shapes"
181+
):
182+
complements[op](left, np.array(bad_shape))
186183

187184
# Bad items
188185
for bad_items in (
@@ -478,8 +475,10 @@ def test_asdatetime(dtype, same):
478475
)
479476
def test_astimedelta(dtype):
480477
t = "01:02:03.123456"
481-
expect = pd.to_timedelta([t]).array.astype(
482-
"timedelta64[ns]" if dtype == "timedelta" else dtype
478+
expect = (
479+
pd.to_timedelta([t])
480+
.to_numpy()
481+
.astype("timedelta64[ns]" if dtype == "timedelta" else dtype)
483482
)
484483

485484
a = _cls("time")([t, None])
@@ -543,7 +542,10 @@ def test_min_max_median(dtype):
543542
assert empty.min(skipna=False) is None
544543
assert empty.max(skipna=False) is None
545544
if pandas_release >= (1, 2):
546-
assert empty.median() is None
545+
with pytest.warns(RuntimeWarning, match="empty slice"):
546+
# It's weird that we get the warning here, and not
547+
# below. :/
548+
assert empty.median() is None
547549
assert empty.median(skipna=False) is None
548550

549551
a = _make_one(dtype)
@@ -620,3 +622,61 @@ def test_date_sub():
620622
do = pd.Series([pd.DateOffset(days=i) for i in range(4)])
621623
expect = dates.astype("object") - do
622624
assert np.array_equal(dates - do, expect)
625+
626+
627+
@pytest.mark.parametrize(
628+
"value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2))],
629+
)
630+
def test_short_time_parsing(value, expected):
631+
assert _cls("time")([value])[0] == expected
632+
633+
634+
@pytest.mark.parametrize(
635+
"value, error",
636+
[
637+
("thursday", "Bad time string: 'thursday'"),
638+
("1:2:3thursday", "Bad time string: '1:2:3thursday'"),
639+
("1:2:3:4", "Bad time string: '1:2:3:4'"),
640+
("1:2:3.f", "Bad time string: '1:2:3.f'"),
641+
("1:d:3", "Bad time string: '1:d:3'"),
642+
("1:2.3", "Bad time string: '1:2.3'"),
643+
("", "Bad time string: ''"),
644+
("1:2:99", "second must be in 0[.][.]59"),
645+
("1:99", "minute must be in 0[.][.]59"),
646+
("99", "hour must be in 0[.][.]23"),
647+
],
648+
)
649+
def test_bad_time_parsing(value, error):
650+
with pytest.raises(ValueError, match=error):
651+
_cls("time")([value])
652+
653+
654+
@pytest.mark.parametrize(
655+
"value, error",
656+
[
657+
("thursday", "Bad date string: 'thursday'"),
658+
("1-2-thursday", "Bad date string: '1-2-thursday'"),
659+
("1-2-3-4", "Bad date string: '1-2-3-4'"),
660+
("1-2-3.f", "Bad date string: '1-2-3.f'"),
661+
("1-d-3", "Bad date string: '1-d-3'"),
662+
("1-3", "Bad date string: '1-3'"),
663+
("1", "Bad date string: '1'"),
664+
("", "Bad date string: ''"),
665+
("2021-2-99", "day is out of range for month"),
666+
("2021-99-1", "month must be in 1[.][.]12"),
667+
("10000-1-1", "year 10000 is out of range"),
668+
],
669+
)
670+
def test_bad_date_parsing(value, error):
671+
with pytest.raises(ValueError, match=error):
672+
_cls("date")([value])
673+
674+
675+
@for_date_and_time
676+
def test_date___arrow__array__(dtype):
677+
a = _make_one(dtype)
678+
ar = a.__arrow_array__()
679+
assert isinstance(
680+
ar, pyarrow.Date32Array if dtype == "date" else pyarrow.Time64Array,
681+
)
682+
assert [v.as_py() for v in ar] == list(a)

0 commit comments

Comments
 (0)