From 2ca325e08fcc67b099200bf40d5e355874e7e17b Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 10:11:12 -0600 Subject: [PATCH 01/19] test: Don't use the equal_nan option of array_equal. It requires new versions of numpy --- tests/unit/test_dtypes.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 572915e..325343f 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -58,6 +58,11 @@ for_date_and_time = pytest.mark.parametrize("dtype", ["date", "time"]) +def eq_na(a1, a2): + nna1 = pd.notna(a1) + nna2 = pd.notna(a2) + return np.array_equal(nna1, nna2) and np.array_equal(a1[nna1], a2[nna2]) + @pytest.fixture(autouse=True) def register_dtype(): import db_dtypes # noqa @@ -575,8 +580,8 @@ def test_date_add(): dates = pd.Series(dates) times = pd.Series(times) expect = dates.astype("datetime64") + times.astype("timedelta64")[:2] - assert np.array_equal(dates + times[:2], expect, equal_nan=True) - assert np.array_equal(times[:2] + dates, expect, equal_nan=True) + assert eq_na(dates + times[:2], expect) + assert eq_na(times[:2] + dates, expect) do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") + do @@ -609,7 +614,7 @@ def test_date_sub(): dates = pd.Series(dates) dates2 = pd.Series(dates2) expect = dates.astype("datetime64") - dates2.astype("datetime64")[:2] - assert np.array_equal(dates - dates2[:2], expect, equal_nan=True) + assert eq_na(dates - dates2[:2], expect) do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") - do From fec2a2da5e64eb5e0847ece1b106823e0469f18a Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Sun, 26 Sep 2021 16:16:06 +0000 Subject: [PATCH 02/19] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/test_dtypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 325343f..4991639 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -63,6 +63,7 @@ def eq_na(a1, a2): nna2 = pd.notna(a2) return np.array_equal(nna1, nna2) and np.array_equal(a1[nna1], a2[nna2]) + @pytest.fixture(autouse=True) def register_dtype(): import db_dtypes # noqa From 0258316705f46fc2a41c075a97bd26d9aee553fa Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 10:19:55 -0600 Subject: [PATCH 03/19] blacken --- tests/unit/test_dtypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 325343f..4991639 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -63,6 +63,7 @@ def eq_na(a1, a2): nna2 = pd.notna(a2) return np.array_equal(nna1, nna2) and np.array_equal(a1[nna1], a2[nna2]) + @pytest.fixture(autouse=True) def register_dtype(): import db_dtypes # noqa From e787a4e209e77235056f8814c506fe0d011d81d0 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 10:26:37 -0600 Subject: [PATCH 04/19] fix: support pandas 0.24 --- db_dtypes/core.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index dd1d23a..65539a6 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -17,7 +17,6 @@ import numpy import pandas from pandas._libs import NaT -from pandas._typing import Scalar import pandas.compat.numpy.function import pandas.core.algorithms import pandas.core.arrays @@ -173,7 +172,7 @@ def all( def min( self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs - ) -> Scalar: + ): pandas.compat.numpy.function.validate_min((), kwargs) result = pandas.core.nanops.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna @@ -182,7 +181,7 @@ def min( def max( self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs - ) -> Scalar: + ): pandas.compat.numpy.function.validate_max((), kwargs) result = pandas.core.nanops.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna From 742b2dfd21e2202f61b67841af8775469bb00a0a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 10:29:15 -0600 Subject: [PATCH 05/19] blacken --- db_dtypes/core.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 65539a6..3b05ad6 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -170,18 +170,14 @@ def all( result = pandas.core.nanops.nanall(self._ndarray, axis=axis, skipna=skipna) return result - def min( - self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs - ): + def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): pandas.compat.numpy.function.validate_min((), kwargs) result = pandas.core.nanops.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._box_func(result) - def max( - self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs - ): + def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): pandas.compat.numpy.function.validate_max((), kwargs) result = pandas.core.nanops.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna From b38ab111a01338bc264abbff04bd6e23538c10fc Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 11:38:10 -0600 Subject: [PATCH 06/19] use to_numpy() rather than array to get underlying array for expected data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit so as not to get a wrong dtype. ¯\_(ツ)_/¯ --- tests/unit/test_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 4991639..f2fe044 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -478,7 +478,7 @@ def test_asdatetime(dtype, same): ) def test_astimedelta(dtype): t = "01:02:03.123456" - expect = pd.to_timedelta([t]).array.astype( + expect = pd.to_timedelta([t]).to_numpy().astype( "timedelta64[ns]" if dtype == "timedelta" else dtype ) From 098dda50e6d7a243686fd34962478b1c28474bd3 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 12:01:29 -0600 Subject: [PATCH 07/19] Raise a TypeError when trying to compare arrays with different shapes --- db_dtypes/core.py | 6 ++++++ tests/unit/test_dtypes.py | 16 ++++------------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 3b05ad6..c9ad084 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -15,6 +15,7 @@ from typing import Any, Optional, Sequence import numpy +import operator import pandas from pandas._libs import NaT import pandas.compat.numpy.function @@ -84,6 +85,11 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def _cmp_method(self, other, op): + oshape = getattr(other, 'shape', None) + if oshape != self.shape and oshape != (1,) and self.shape != (1,): + raise TypeError( + "Can't compare arrays with different shapes", self.shape, oshape) + if type(other) != type(self): return NotImplemented return op(self._ndarray, other._ndarray) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index f2fe044..dfc320b 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -171,18 +171,10 @@ def test_timearray_comparisons( # Bad shape for bad_shape in ([], [1, 2, 3]): - if op == "==": - assert not comparisons[op](left, np.array(bad_shape)) - assert complements[op](left, np.array(bad_shape)) - else: - with pytest.raises( - ValueError, match="operands could not be broadcast together", - ): - comparisons[op](left, np.array(bad_shape)) - with pytest.raises( - ValueError, match="operands could not be broadcast together", - ): - complements[op](left, np.array(bad_shape)) + with pytest.raises(TypeError, match="Can't compare arrays with different shapes"): + comparisons[op](left, np.array(bad_shape)) + with pytest.raises(TypeError, match="Can't compare arrays with different shapes"): + complements[op](left, np.array(bad_shape)) # Bad items for bad_items in ( From a20c44df5b76f583d366dfef0a8c6b2dd1b88e4f Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 16:43:16 -0600 Subject: [PATCH 08/19] swallow a warning --- tests/unit/test_dtypes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index dfc320b..18d1cfe 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -535,7 +535,10 @@ def test_min_max_median(dtype): assert empty.min(skipna=False) is None assert empty.max(skipna=False) is None if pandas_release >= (1, 2): - assert empty.median() is None + with pytest.warns(RuntimeWarning, match="empty slice"): + # It's weird that we get the warning here, and not + # below. :/ + assert empty.median() is None assert empty.median(skipna=False) is None a = _make_one(dtype) From ec80115a99864b2957afab06e9f56902c4ed7e54 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Sun, 26 Sep 2021 22:47:36 +0000 Subject: [PATCH 09/19] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- db_dtypes/core.py | 5 +++-- tests/unit/test_dtypes.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index c9ad084..1f284ac 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -85,10 +85,11 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def _cmp_method(self, other, op): - oshape = getattr(other, 'shape', None) + oshape = getattr(other, "shape", None) if oshape != self.shape and oshape != (1,) and self.shape != (1,): raise TypeError( - "Can't compare arrays with different shapes", self.shape, oshape) + "Can't compare arrays with different shapes", self.shape, oshape + ) if type(other) != type(self): return NotImplemented diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 18d1cfe..a514c47 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -171,9 +171,13 @@ def test_timearray_comparisons( # Bad shape for bad_shape in ([], [1, 2, 3]): - with pytest.raises(TypeError, match="Can't compare arrays with different shapes"): + with pytest.raises( + TypeError, match="Can't compare arrays with different shapes" + ): comparisons[op](left, np.array(bad_shape)) - with pytest.raises(TypeError, match="Can't compare arrays with different shapes"): + with pytest.raises( + TypeError, match="Can't compare arrays with different shapes" + ): complements[op](left, np.array(bad_shape)) # Bad items @@ -470,8 +474,10 @@ def test_asdatetime(dtype, same): ) def test_astimedelta(dtype): t = "01:02:03.123456" - expect = pd.to_timedelta([t]).to_numpy().astype( - "timedelta64[ns]" if dtype == "timedelta" else dtype + expect = ( + pd.to_timedelta([t]) + .to_numpy() + .astype("timedelta64[ns]" if dtype == "timedelta" else dtype) ) a = _cls("time")([t, None]) From 03a18397bbecb61b7fc57af0df59b40e7cdd2c21 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 16:55:19 -0600 Subject: [PATCH 10/19] add some documentation for import_default and a better variable name. --- db_dtypes/pandas_backports.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/db_dtypes/pandas_backports.py b/db_dtypes/pandas_backports.py index bfeb148..003224f 100644 --- a/db_dtypes/pandas_backports.py +++ b/db_dtypes/pandas_backports.py @@ -31,8 +31,17 @@ def import_default(module_name, force=False, default=None): + """ + Provide an implementation for a class or function when it can't be imported + + or when force is True. + + This is used to replicate Pandas APIs that are missing or insufficient + (thus the force option) in early pandas versions. + """ + if default is None: - return lambda func: import_default(module_name, force, func) + return lambda func_or_class: import_default(module_name, force, func_or_class) if force: return default From 9da28902e58d3d11e7e335dc92524473fe88854c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 16:56:56 -0600 Subject: [PATCH 11/19] blacken/lint --- db_dtypes/core.py | 6 +++--- tests/unit/test_dtypes.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index c9ad084..fbc784e 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -15,7 +15,6 @@ from typing import Any, Optional, Sequence import numpy -import operator import pandas from pandas._libs import NaT import pandas.compat.numpy.function @@ -85,10 +84,11 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def _cmp_method(self, other, op): - oshape = getattr(other, 'shape', None) + oshape = getattr(other, "shape", None) if oshape != self.shape and oshape != (1,) and self.shape != (1,): raise TypeError( - "Can't compare arrays with different shapes", self.shape, oshape) + "Can't compare arrays with different shapes", self.shape, oshape + ) if type(other) != type(self): return NotImplemented diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 18d1cfe..a514c47 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -171,9 +171,13 @@ def test_timearray_comparisons( # Bad shape for bad_shape in ([], [1, 2, 3]): - with pytest.raises(TypeError, match="Can't compare arrays with different shapes"): + with pytest.raises( + TypeError, match="Can't compare arrays with different shapes" + ): comparisons[op](left, np.array(bad_shape)) - with pytest.raises(TypeError, match="Can't compare arrays with different shapes"): + with pytest.raises( + TypeError, match="Can't compare arrays with different shapes" + ): complements[op](left, np.array(bad_shape)) # Bad items @@ -470,8 +474,10 @@ def test_asdatetime(dtype, same): ) def test_astimedelta(dtype): t = "01:02:03.123456" - expect = pd.to_timedelta([t]).to_numpy().astype( - "timedelta64[ns]" if dtype == "timedelta" else dtype + expect = ( + pd.to_timedelta([t]) + .to_numpy() + .astype("timedelta64[ns]" if dtype == "timedelta" else dtype) ) a = _cls("time")([t, None]) From 88a87fc8440702bdc8ced7a7e4bd93a02c8fa1fc Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 17:02:47 -0600 Subject: [PATCH 12/19] TIL _s in int literals --- db_dtypes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 8a58666..24be127 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -69,7 +69,7 @@ def _datetime(cls, scalar): h, m, s = map(float, scalar.split(":")) s, us = divmod(s, 1) return datetime.datetime( - 1970, 1, 1, int(h), int(m), int(s), int(us * 1000000) + 1970, 1, 1, int(h), int(m), int(s), int(us * 1_000_000) ) else: raise TypeError("Invalid value type", scalar) From c93dd442477f0dd1860651fed7f5e2238fc3e625 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 18:00:10 -0600 Subject: [PATCH 13/19] better date and time parsing, especially errors --- db_dtypes/__init__.py | 29 ++++++++++++++++------ tests/unit/test_dtypes.py | 52 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 24be127..c3218c1 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -16,6 +16,7 @@ """ import datetime +import re import numpy import pandas @@ -61,15 +62,24 @@ class TimeArray(core.BaseDatetimeArray): _npepoch = numpy.datetime64(_epoch) @classmethod - def _datetime(cls, scalar): + def _datetime( + cls, + scalar, + match=re.compile( + r"\s*(\d+)(?::(\d+)(:\d+(?:[.]\d+)?)?)?\s*$").match, + ): if isinstance(scalar, datetime.time): return datetime.datetime.combine(cls._epoch, scalar) elif isinstance(scalar, str): # iso string - h, m, s = map(float, scalar.split(":")) - s, us = divmod(s, 1) + m = match(scalar) + if not m: + raise ValueError(f"Bad time string: {repr(scalar)}") + + h, m, s = m.groups() + s, us = divmod(float(s[1:] if s else 0), 1) return datetime.datetime( - 1970, 1, 1, int(h), int(m), int(s), int(us * 1_000_000) + 1970, 1, 1, int(h), int(m if m else 0), int(s), int(us * 1_000_000) ) else: raise TypeError("Invalid value type", scalar) @@ -125,12 +135,17 @@ class DateArray(core.BaseDatetimeArray): dtype = DateDtype() @staticmethod - def _datetime(scalar): + def _datetime( + scalar, + match=re.compile(r"\s*(\d+)-(\d+)-(\d+)\s*$").match, + ): if isinstance(scalar, datetime.date): return datetime.datetime(scalar.year, scalar.month, scalar.day) elif isinstance(scalar, str): - # iso string - return datetime.datetime(*map(int, scalar.split("-"))) + m = match(scalar) + if not m: + raise ValueError(f"Bad date string: {repr(scalar)}") + return datetime.datetime(*map(int, m.groups())) else: raise TypeError("Invalid value type", scalar) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index a514c47..7bb9218 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -621,3 +621,55 @@ def test_date_sub(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") - do assert np.array_equal(dates - do, expect) + + +@pytest.mark.parametrize( + "value, expected", + [ + ("1", datetime.time(1)), + ("1:2", datetime.time(1,2)), + ], + ) +def test_short_time_parsing(value, expected): + assert _cls('time')([value])[0] == expected + + +@pytest.mark.parametrize( + "value, error", + [ + ("thursday", "Bad time string: 'thursday'"), + ("1:2:3thursday", "Bad time string: '1:2:3thursday'"), + ("1:2:3:4", "Bad time string: '1:2:3:4'"), + ("1:2:3.f", "Bad time string: '1:2:3.f'"), + ("1:d:3", "Bad time string: '1:d:3'"), + ("1:2.3", "Bad time string: '1:2.3'"), + ("", "Bad time string: ''"), + ("1:2:99", "second must be in 0[.][.]59"), + ("1:99", "minute must be in 0[.][.]59"), + ("99", "hour must be in 0[.][.]23"), + ], + ) +def test_bad_time_parsing(value, error): + with pytest.raises(ValueError, match=error): + _cls('time')([value]) + + +@pytest.mark.parametrize( + "value, error", + [ + ("thursday", "Bad date string: 'thursday'"), + ("1-2-thursday", "Bad date string: '1-2-thursday'"), + ("1-2-3-4", "Bad date string: '1-2-3-4'"), + ("1-2-3.f", "Bad date string: '1-2-3.f'"), + ("1-d-3", "Bad date string: '1-d-3'"), + ("1-3", "Bad date string: '1-3'"), + ("1", "Bad date string: '1'"), + ("", "Bad date string: ''"), + ("2021-2-99", "day is out of range for month"), + ("2021-99-1", "month must be in 1[.][.]12"), + ("10000-1-1", "year 10000 is out of range"), + ], + ) +def test_bad_date_parsing(value, error): + with pytest.raises(ValueError, match=error): + _cls('date')([value]) From d7437ec760357881ee14a5aa256e17dad34b0e26 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 27 Sep 2021 00:01:47 +0000 Subject: [PATCH 14/19] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- db_dtypes/__init__.py | 10 ++++------ tests/unit/test_dtypes.py | 22 +++++++++------------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index c3218c1..450ed60 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -65,9 +65,8 @@ class TimeArray(core.BaseDatetimeArray): def _datetime( cls, scalar, - match=re.compile( - r"\s*(\d+)(?::(\d+)(:\d+(?:[.]\d+)?)?)?\s*$").match, - ): + match=re.compile(r"\s*(\d+)(?::(\d+)(:\d+(?:[.]\d+)?)?)?\s*$").match, + ): if isinstance(scalar, datetime.time): return datetime.datetime.combine(cls._epoch, scalar) elif isinstance(scalar, str): @@ -136,9 +135,8 @@ class DateArray(core.BaseDatetimeArray): @staticmethod def _datetime( - scalar, - match=re.compile(r"\s*(\d+)-(\d+)-(\d+)\s*$").match, - ): + scalar, match=re.compile(r"\s*(\d+)-(\d+)-(\d+)\s*$").match, + ): if isinstance(scalar, datetime.date): return datetime.datetime(scalar.year, scalar.month, scalar.day) elif isinstance(scalar, str): diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 7bb9218..2b6aafe 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -624,14 +624,10 @@ def test_date_sub(): @pytest.mark.parametrize( - "value, expected", - [ - ("1", datetime.time(1)), - ("1:2", datetime.time(1,2)), - ], - ) + "value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2)),], +) def test_short_time_parsing(value, expected): - assert _cls('time')([value])[0] == expected + assert _cls("time")([value])[0] == expected @pytest.mark.parametrize( @@ -647,11 +643,11 @@ def test_short_time_parsing(value, expected): ("1:2:99", "second must be in 0[.][.]59"), ("1:99", "minute must be in 0[.][.]59"), ("99", "hour must be in 0[.][.]23"), - ], - ) + ], +) def test_bad_time_parsing(value, error): with pytest.raises(ValueError, match=error): - _cls('time')([value]) + _cls("time")([value]) @pytest.mark.parametrize( @@ -668,8 +664,8 @@ def test_bad_time_parsing(value, error): ("2021-2-99", "day is out of range for month"), ("2021-99-1", "month must be in 1[.][.]12"), ("10000-1-1", "year 10000 is out of range"), - ], - ) + ], +) def test_bad_date_parsing(value, error): with pytest.raises(ValueError, match=error): - _cls('date')([value]) + _cls("date")([value]) From 57cfc2e9c487a71a1f09482f07f9f7701c85f282 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 18:06:28 -0600 Subject: [PATCH 15/19] document why all the pandas constraints --- testing/constraints-3.6.txt | 1 + testing/constraints-3.7.txt | 1 + testing/constraints-3.8.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index fd89d90..a7388cd 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", packaging==17.0 +# Make sure we test with pandas 0.24.2. The Python version isn't that relevant. pandas==0.24.2 pyarrow==3.0.0 numpy==1.16.6 diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 684864f..0b3b309 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -1 +1,2 @@ +# Make sure we test with pandas 1.1.0. The Python version isn't that relevant. pandas==1.1.0 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index 3fd8886..2e7f354 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1 +1,2 @@ +# Make sure we test with pandas 1.2.0. The Python version isn't that relevant. pandas==1.2.0 From 9f4005bb1333cd0cf31020c67af0f7dad11470ee Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 26 Sep 2021 18:07:56 -0600 Subject: [PATCH 16/19] blacken/lint --- tests/unit/test_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 2b6aafe..118458e 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -624,7 +624,7 @@ def test_date_sub(): @pytest.mark.parametrize( - "value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2)),], + "value, expected", [("1", datetime.time(1)), ("1:2", datetime.time(1, 2))], ) def test_short_time_parsing(value, expected): assert _cls("time")([value])[0] == expected From 1e29e0e23bfa6a9df825237051a636d40475828c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 27 Sep 2021 19:42:55 -0600 Subject: [PATCH 17/19] Added missing test for __arrow_array__ --- tests/unit/test_dtypes.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py index 118458e..eca3a31 100644 --- a/tests/unit/test_dtypes.py +++ b/tests/unit/test_dtypes.py @@ -15,6 +15,7 @@ import datetime import packaging.version +import pyarrow.lib import pytest pd = pytest.importorskip("pandas") @@ -669,3 +670,13 @@ def test_bad_time_parsing(value, error): def test_bad_date_parsing(value, error): with pytest.raises(ValueError, match=error): _cls("date")([value]) + + +@for_date_and_time +def test_date___arrow__array__(dtype): + a = _make_one(dtype) + ar = a.__arrow_array__() + assert isinstance( + ar, pyarrow.Date32Array if dtype == "date" else pyarrow.Time64Array, + ) + assert [v.as_py() for v in ar] == list(a) From 3a9d3e2eea57b114130b1f568c0fa152e8b05a32 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 27 Sep 2021 19:43:54 -0600 Subject: [PATCH 18/19] fix __arrow_array__ for TimeArray for pandas < 1.0.0 Because TimeArray didn't have to_numpy for some reason. :/ --- db_dtypes/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 450ed60..08c4b9c 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -19,6 +19,7 @@ import re import numpy +import packaging.version import pandas import pandas.compat.numpy.function import pandas.core.algorithms @@ -36,6 +37,8 @@ date_dtype_name = "date" time_dtype_name = "time" +pandas_release = packaging.version.parse(pandas.__version__).release + @pandas.core.dtypes.dtypes.register_extension_dtype class TimeDtype(core.BaseDatetimeDtype): @@ -105,6 +108,11 @@ def astype(self, dtype, copy=True): else: return super().astype(dtype, copy=copy) + if pandas_release < (1,): + + def to_numpy(self): + return self.astype("object") + def __arrow_array__(self, type=None): return pyarrow.array( self.to_numpy(), type=type if type is not None else pyarrow.time64("ns"), From d0e8376206f83b52599d03f1b3b7287c8137e84a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Sep 2021 16:37:31 -0500 Subject: [PATCH 19/19] use named groups for regex --- db_dtypes/__init__.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 08c4b9c..9495f0c 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -68,20 +68,30 @@ class TimeArray(core.BaseDatetimeArray): def _datetime( cls, scalar, - match=re.compile(r"\s*(\d+)(?::(\d+)(:\d+(?:[.]\d+)?)?)?\s*$").match, + match_fn=re.compile( + r"\s*(?P\d+)(?::(?P\d+)(?::(?P\d+(?:[.]\d+)?)?)?)?\s*$" + ).match, ): if isinstance(scalar, datetime.time): return datetime.datetime.combine(cls._epoch, scalar) elif isinstance(scalar, str): # iso string - m = match(scalar) - if not m: + match = match_fn(scalar) + if not match: raise ValueError(f"Bad time string: {repr(scalar)}") - h, m, s = m.groups() - s, us = divmod(float(s[1:] if s else 0), 1) + hour = match.group("hour") + minute = match.group("minute") + second = match.group("second") + second, microsecond = divmod(float(second if second else 0), 1) return datetime.datetime( - 1970, 1, 1, int(h), int(m if m else 0), int(s), int(us * 1_000_000) + 1970, + 1, + 1, + int(hour), + int(minute if minute else 0), + int(second), + int(microsecond * 1_000_000), ) else: raise TypeError("Invalid value type", scalar) @@ -110,8 +120,8 @@ def astype(self, dtype, copy=True): if pandas_release < (1,): - def to_numpy(self): - return self.astype("object") + def to_numpy(self, dtype="object"): + return self.astype(dtype) def __arrow_array__(self, type=None): return pyarrow.array( @@ -143,15 +153,19 @@ class DateArray(core.BaseDatetimeArray): @staticmethod def _datetime( - scalar, match=re.compile(r"\s*(\d+)-(\d+)-(\d+)\s*$").match, + scalar, + match_fn=re.compile(r"\s*(?P\d+)-(?P\d+)-(?P\d+)\s*$").match, ): if isinstance(scalar, datetime.date): return datetime.datetime(scalar.year, scalar.month, scalar.day) elif isinstance(scalar, str): - m = match(scalar) - if not m: + match = match_fn(scalar) + if not match: raise ValueError(f"Bad date string: {repr(scalar)}") - return datetime.datetime(*map(int, m.groups())) + year = int(match.group("year")) + month = int(match.group("month")) + day = int(match.group("day")) + return datetime.datetime(year, month, day) else: raise TypeError("Invalid value type", scalar)