Skip to content

test: No warnings #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2ca325e
test: Don't use the equal_nan option of array_equal. It requires new …
Sep 26, 2021
fec2a2d
🦉 Updates from OwlBot
gcf-owl-bot[bot] Sep 26, 2021
0258316
blacken
Sep 26, 2021
2433b6f
Merge branch 'no-equal_nan' of github.com:googleapis/python-db-dtypes…
Sep 26, 2021
e787a4e
fix: support pandas 0.24
Sep 26, 2021
742b2df
blacken
Sep 26, 2021
b38ab11
use to_numpy() rather than array to get underlying array for expected…
Sep 26, 2021
098dda5
Raise a TypeError when trying to compare arrays with different shapes
Sep 26, 2021
a20c44d
swallow a warning
Sep 26, 2021
ec80115
🦉 Updates from OwlBot
gcf-owl-bot[bot] Sep 26, 2021
03a1839
add some documentation for import_default
Sep 26, 2021
9da2890
blacken/lint
Sep 26, 2021
eda5744
Merge branch 'no-warnings' of github.com:googleapis/python-db-dtypes-…
Sep 26, 2021
88a87fc
TIL _s in int literals
Sep 26, 2021
c93dd44
better date and time parsing, especially errors
Sep 27, 2021
d7437ec
🦉 Updates from OwlBot
gcf-owl-bot[bot] Sep 27, 2021
57cfc2e
document why all the pandas constraints
Sep 27, 2021
2b27e8f
Merge branch 'no-warnings' of github.com:googleapis/python-db-dtypes-…
Sep 27, 2021
9f4005b
blacken/lint
Sep 27, 2021
c993d2a
Merge branch 'main' into no-warnings
tswast Sep 27, 2021
1e29e0e
Added missing test for __arrow_array__
Sep 28, 2021
3a9d3e2
fix __arrow_array__ for TimeArray for pandas < 1.0.0
Sep 28, 2021
a22348f
Merge branch 'no-warnings' of github.com:googleapis/python-db-dtypes-…
Sep 28, 2021
d0e8376
use named groups for regex
tswast Sep 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 42 additions & 7 deletions db_dtypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
"""

import datetime
import re

import numpy
import packaging.version
import pandas
import pandas.compat.numpy.function
import pandas.core.algorithms
Expand All @@ -35,6 +37,8 @@
date_dtype_name = "date"
time_dtype_name = "time"

pandas_release = packaging.version.parse(pandas.__version__).release


@pandas.core.dtypes.dtypes.register_extension_dtype
class TimeDtype(core.BaseDatetimeDtype):
Expand All @@ -61,15 +65,33 @@ class TimeArray(core.BaseDatetimeArray):
_npepoch = numpy.datetime64(_epoch)

@classmethod
def _datetime(cls, scalar):
def _datetime(
cls,
scalar,
match_fn=re.compile(
r"\s*(?P<hour>\d+)(?::(?P<minute>\d+)(?::(?P<second>\d+(?:[.]\d+)?)?)?)?\s*$"
).match,
):
if isinstance(scalar, datetime.time):
return datetime.datetime.combine(cls._epoch, scalar)
elif isinstance(scalar, str):
# iso string
h, m, s = map(float, scalar.split(":"))
s, us = divmod(s, 1)
match = match_fn(scalar)
if not match:
raise ValueError(f"Bad time string: {repr(scalar)}")

hour = match.group("hour")
minute = match.group("minute")
second = match.group("second")
second, microsecond = divmod(float(second if second else 0), 1)
return datetime.datetime(
1970, 1, 1, int(h), int(m), int(s), int(us * 1000000)
1970,
1,
1,
int(hour),
int(minute if minute else 0),
int(second),
int(microsecond * 1_000_000),
)
else:
raise TypeError("Invalid value type", scalar)
Expand All @@ -96,6 +118,11 @@ def astype(self, dtype, copy=True):
else:
return super().astype(dtype, copy=copy)

if pandas_release < (1,):

def to_numpy(self, dtype="object"):
return self.astype(dtype)

def __arrow_array__(self, type=None):
return pyarrow.array(
self.to_numpy(), type=type if type is not None else pyarrow.time64("ns"),
Expand Down Expand Up @@ -125,12 +152,20 @@ class DateArray(core.BaseDatetimeArray):
dtype = DateDtype()

@staticmethod
def _datetime(scalar):
def _datetime(
scalar,
match_fn=re.compile(r"\s*(?P<year>\d+)-(?P<month>\d+)-(?P<day>\d+)\s*$").match,
):
if isinstance(scalar, datetime.date):
return datetime.datetime(scalar.year, scalar.month, scalar.day)
elif isinstance(scalar, str):
# iso string
return datetime.datetime(*map(int, scalar.split("-")))
match = match_fn(scalar)
if not match:
raise ValueError(f"Bad date string: {repr(scalar)}")
year = int(match.group("year"))
month = int(match.group("month"))
day = int(match.group("day"))
return datetime.datetime(year, month, day)
else:
raise TypeError("Invalid value type", scalar)

Expand Down
6 changes: 6 additions & 0 deletions db_dtypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ def astype(self, dtype, copy=True):
return super().astype(dtype, copy=copy)

def _cmp_method(self, other, op):
oshape = getattr(other, "shape", None)
if oshape != self.shape and oshape != (1,) and self.shape != (1,):
raise TypeError(
"Can't compare arrays with different shapes", self.shape, oshape
)

if type(other) != type(self):
return NotImplemented
return op(self._ndarray, other._ndarray)
Expand Down
11 changes: 10 additions & 1 deletion db_dtypes/pandas_backports.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,17 @@


def import_default(module_name, force=False, default=None):
"""
Provide an implementation for a class or function when it can't be imported

or when force is True.

This is used to replicate Pandas APIs that are missing or insufficient
(thus the force option) in early pandas versions.
"""

if default is None:
return lambda func: import_default(module_name, force, func)
return lambda func_or_class: import_default(module_name, force, func_or_class)

if force:
return default
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
packaging==17.0
# Make sure we test with pandas 0.24.2. The Python version isn't that relevant.
pandas==0.24.2
pyarrow==3.0.0
numpy==1.16.6
1 change: 1 addition & 0 deletions testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
# Make sure we test with pandas 1.1.0. The Python version isn't that relevant.
pandas==1.1.0
1 change: 1 addition & 0 deletions testing/constraints-3.8.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
# Make sure we test with pandas 1.2.0. The Python version isn't that relevant.
pandas==1.2.0
90 changes: 75 additions & 15 deletions tests/unit/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import datetime

import packaging.version
import pyarrow.lib
import pytest

pd = pytest.importorskip("pandas")
Expand Down Expand Up @@ -171,18 +172,14 @@ def test_timearray_comparisons(

# Bad shape
for bad_shape in ([], [1, 2, 3]):
if op == "==":
assert not comparisons[op](left, np.array(bad_shape))
assert complements[op](left, np.array(bad_shape))
else:
with pytest.raises(
ValueError, match="operands could not be broadcast together",
):
comparisons[op](left, np.array(bad_shape))
with pytest.raises(
ValueError, match="operands could not be broadcast together",
):
complements[op](left, np.array(bad_shape))
with pytest.raises(
TypeError, match="Can't compare arrays with different shapes"
):
comparisons[op](left, np.array(bad_shape))
with pytest.raises(
TypeError, match="Can't compare arrays with different shapes"
):
complements[op](left, np.array(bad_shape))

# Bad items
for bad_items in (
Expand Down Expand Up @@ -478,8 +475,10 @@ def test_asdatetime(dtype, same):
)
def test_astimedelta(dtype):
t = "01:02:03.123456"
expect = pd.to_timedelta([t]).array.astype(
"timedelta64[ns]" if dtype == "timedelta" else dtype
expect = (
pd.to_timedelta([t])
.to_numpy()
.astype("timedelta64[ns]" if dtype == "timedelta" else dtype)
)

a = _cls("time")([t, None])
Expand Down Expand Up @@ -543,7 +542,10 @@ def test_min_max_median(dtype):
assert empty.min(skipna=False) is None
assert empty.max(skipna=False) is None
if pandas_release >= (1, 2):
assert empty.median() is None
with pytest.warns(RuntimeWarning, match="empty slice"):
# It's weird that we get the warning here, and not
# below. :/
assert empty.median() is None
assert empty.median(skipna=False) is None

a = _make_one(dtype)
Expand Down Expand Up @@ -620,3 +622,61 @@ def test_date_sub():
do = pd.Series([pd.DateOffset(days=i) for i in range(4)])
expect = dates.astype("object") - do
assert np.array_equal(dates - do, expect)


@pytest.mark.parametrize(
"value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2))],
)
def test_short_time_parsing(value, expected):
assert _cls("time")([value])[0] == expected


@pytest.mark.parametrize(
"value, error",
[
("thursday", "Bad time string: 'thursday'"),
("1:2:3thursday", "Bad time string: '1:2:3thursday'"),
("1:2:3:4", "Bad time string: '1:2:3:4'"),
("1:2:3.f", "Bad time string: '1:2:3.f'"),
("1:d:3", "Bad time string: '1:d:3'"),
("1:2.3", "Bad time string: '1:2.3'"),
("", "Bad time string: ''"),
("1:2:99", "second must be in 0[.][.]59"),
("1:99", "minute must be in 0[.][.]59"),
("99", "hour must be in 0[.][.]23"),
],
)
def test_bad_time_parsing(value, error):
with pytest.raises(ValueError, match=error):
_cls("time")([value])


@pytest.mark.parametrize(
"value, error",
[
("thursday", "Bad date string: 'thursday'"),
("1-2-thursday", "Bad date string: '1-2-thursday'"),
("1-2-3-4", "Bad date string: '1-2-3-4'"),
("1-2-3.f", "Bad date string: '1-2-3.f'"),
("1-d-3", "Bad date string: '1-d-3'"),
("1-3", "Bad date string: '1-3'"),
("1", "Bad date string: '1'"),
("", "Bad date string: ''"),
("2021-2-99", "day is out of range for month"),
("2021-99-1", "month must be in 1[.][.]12"),
("10000-1-1", "year 10000 is out of range"),
],
)
def test_bad_date_parsing(value, error):
with pytest.raises(ValueError, match=error):
_cls("date")([value])


@for_date_and_time
def test_date___arrow__array__(dtype):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self: remove in #11

a = _make_one(dtype)
ar = a.__arrow_array__()
assert isinstance(
ar, pyarrow.Date32Array if dtype == "date" else pyarrow.Time64Array,
)
assert [v.as_py() for v in ar] == list(a)