From b48ee6dc792154a1bf3f67a12e01708edffb1a8f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 27 Jan 2022 11:10:57 -0600 Subject: [PATCH 01/22] fix: address failing compliance tests in DateArray and TimeArray test: add a test session with prerelease versions of dependencies --- tests/unit/test_date_compliance.py | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 tests/unit/test_date_compliance.py diff --git a/tests/unit/test_date_compliance.py b/tests/unit/test_date_compliance.py new file mode 100644 index 0000000..92080d7 --- /dev/null +++ b/tests/unit/test_date_compliance.py @@ -0,0 +1,51 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +import datetime + +import numpy +from pandas.tests.extension import base +import pytest + +from db_dtypes import DateArray + +# NDArrayBacked2DTests suite added in https://github.com/pandas-dev/pandas/pull/44974 +pytest.importorskip("pandas", minversion="1.5.0dev") + + +@pytest.fixture +def data(): + return DateArray( + numpy.arange( + datetime.datetime(1900, 1, 1), + datetime.datetime(2099, 12, 31), + datetime.timedelta(days=13), + dtype="datetime64[ns]", + ) + ) + + +@pytest.fixture +def data_missing(): + return DateArray([None, datetime.date(2022, 1, 27)]) + + +class Test2DCompat(base.NDArrayBacked2DTests): + pass From 90e1573a54c05fa56cdeabbecbaa7a5b55d98ab5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 27 Jan 2022 17:13:00 -0600 Subject: [PATCH 02/22] fix min/max/median for 2D arrays --- db_dtypes/core.py | 44 +++++++++++++++++++++++------------------ tests/unit/test_date.py | 14 +++++++++++++ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 05daf37..3dccb56 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -135,29 +135,35 @@ def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): result = pandas_backports.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) - return self._box_func(result) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): pandas_backports.numpy_validate_max((), kwargs) result = pandas_backports.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) - return self._box_func(result) - - if pandas_release >= (1, 2): - - def median( - self, - *, - axis: Optional[int] = None, - out=None, - overwrite_input: bool = False, - keepdims: bool = False, - skipna: bool = True, - ): - pandas_backports.numpy_validate_median( - (), - {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}, - ) - result = pandas_backports.nanmedian(self._ndarray, axis=axis, skipna=skipna) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + + def median( + self, + *, + axis: Optional[int] = None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, + ): + if not hasattr(pandas_backports, "numpy_validate_median"): + raise NotImplementedError("Need pandas 1.3 or later to calculate median.") + + pandas_backports.numpy_validate_median( + (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}, + ) + result = pandas_backports.nanmedian(self._ndarray, axis=axis, skipna=skipna) + if axis is None or self.ndim == 1: return self._box_func(result) + return self._from_backing_data(result) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index b906f24..3250eec 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -65,3 +65,17 @@ def test_date_parsing(value, expected): def test_date_parsing_errors(value, error): with pytest.raises(ValueError, match=error): pandas.Series([value], dtype="dbdate") + + +# TODO: skip if median not available +@pytest.mark.parametrize( + "values, expected", + [ + (["1970-01-01", "1900-01-01", "2000-01-01"], datetime.date(1970, 1, 1)), + ([None, "1900-01-01", None], datetime.date(1900, 1, 1)), + (["2222-02-01", "2222-02-03"], datetime.date(2222, 2, 2)), + ], +) +def test_date_median(values, expected): + series = pandas.Series(values, dtype="dbdate") + assert series.median() == expected From 47100f5fd567a95e1687115bad5b10f9b386d314 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 1 Feb 2022 16:46:20 -0600 Subject: [PATCH 03/22] fixes except for null contains --- db_dtypes/__init__.py | 2 +- db_dtypes/core.py | 18 ++++--- tests/unit/date_compliance/conftest.py | 48 +++++++++++++++++++ .../date_compliance/test_date_compliance.py | 39 +++++++++++++++ .../test_date_compliance_1_5.py} | 24 +--------- tests/unit/test_date.py | 6 ++- 6 files changed, 107 insertions(+), 30 deletions(-) create mode 100644 tests/unit/date_compliance/conftest.py create mode 100644 tests/unit/date_compliance/test_date_compliance.py rename tests/unit/{test_date_compliance.py => date_compliance/test_date_compliance_1_5.py} (70%) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index a518a0b..d98d5f1 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -145,7 +145,7 @@ def _datetime( raise TypeError("Invalid value type", scalar) def _box_func(self, x): - if pandas.isnull(x): + if pandas.isna(x): return None try: diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 3dccb56..e766af9 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -16,9 +16,8 @@ import numpy import pandas -from pandas import NaT import pandas.api.extensions -from pandas.api.types import is_dtype_equal, is_list_like, pandas_dtype +from pandas.api.types import is_dtype_equal, is_list_like, is_scalar, pandas_dtype from db_dtypes import pandas_backports @@ -27,14 +26,18 @@ class BaseDatetimeDtype(pandas.api.extensions.ExtensionDtype): - na_value = NaT - kind = "o" + na_value = pandas.NaT + kind = "O" names = None @classmethod - def construct_from_string(cls, name): + def construct_from_string(cls, name: str): + if not isinstance(name, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(name)}" + ) if name != cls.name: - raise TypeError() + raise TypeError(f"Cannot construct a '{cls.__name__}' from 'another_type'") return cls() @@ -75,6 +78,9 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def _cmp_method(self, other, op): + if is_scalar(other) and (pandas.isna(other) or type(other) == self.dtype.type): + other = type(self)([other]) + oshape = getattr(other, "shape", None) if oshape != self.shape and oshape != (1,) and self.shape != (1,): raise TypeError( diff --git a/tests/unit/date_compliance/conftest.py b/tests/unit/date_compliance/conftest.py new file mode 100644 index 0000000..f8d2666 --- /dev/null +++ b/tests/unit/date_compliance/conftest.py @@ -0,0 +1,48 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import numpy +import pandas +import pytest + +from db_dtypes import DateArray, DateDtype + + +@pytest.fixture +def data(): + return DateArray( + numpy.arange( + datetime.datetime(1900, 1, 1), + datetime.datetime(2099, 12, 31), + datetime.timedelta(days=731), + dtype="datetime64[ns]", + ) + ) + + +@pytest.fixture +def data_missing(): + return DateArray([None, datetime.date(2022, 1, 27)]) + + +@pytest.fixture +def dtype(): + return DateDtype() + + +@pytest.fixture +def na_value(): + return pandas.NaT diff --git a/tests/unit/date_compliance/test_date_compliance.py b/tests/unit/date_compliance/test_date_compliance.py new file mode 100644 index 0000000..ca47fed --- /dev/null +++ b/tests/unit/date_compliance/test_date_compliance.py @@ -0,0 +1,39 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Tests for extension interface compliance, inherited from pandas. + +See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py +""" + +from pandas.tests.extension import base + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass diff --git a/tests/unit/test_date_compliance.py b/tests/unit/date_compliance/test_date_compliance_1_5.py similarity index 70% rename from tests/unit/test_date_compliance.py rename to tests/unit/date_compliance/test_date_compliance_1_5.py index 92080d7..e8f2c93 100644 --- a/tests/unit/test_date_compliance.py +++ b/tests/unit/date_compliance/test_date_compliance_1_5.py @@ -15,37 +15,17 @@ Tests for extension interface compliance, inherited from pandas. See: +https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py +and https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py """ -import datetime - -import numpy from pandas.tests.extension import base import pytest -from db_dtypes import DateArray - # NDArrayBacked2DTests suite added in https://github.com/pandas-dev/pandas/pull/44974 pytest.importorskip("pandas", minversion="1.5.0dev") -@pytest.fixture -def data(): - return DateArray( - numpy.arange( - datetime.datetime(1900, 1, 1), - datetime.datetime(2099, 12, 31), - datetime.timedelta(days=13), - dtype="datetime64[ns]", - ) - ) - - -@pytest.fixture -def data_missing(): - return DateArray([None, datetime.date(2022, 1, 27)]) - - class Test2DCompat(base.NDArrayBacked2DTests): pass diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index 3250eec..79c705c 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -19,6 +19,7 @@ # To register the types. import db_dtypes # noqa +from db_dtypes import pandas_backports @pytest.mark.parametrize( @@ -67,7 +68,10 @@ def test_date_parsing_errors(value, error): pandas.Series([value], dtype="dbdate") -# TODO: skip if median not available +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) @pytest.mark.parametrize( "values, expected", [ From cece518d279e298125889c1fc4490095129136ea Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 2 Feb 2022 11:15:45 -0600 Subject: [PATCH 04/22] actually use NaT as 'advertised' --- db_dtypes/__init__.py | 4 ++-- tests/unit/test_date.py | 5 ++++- tests/unit/test_dtypes.py | 2 +- tests/unit/test_time.py | 21 +++++++++++++++++++++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index d98d5f1..1a250b0 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -146,7 +146,7 @@ def _datetime( def _box_func(self, x): if pandas.isna(x): - return None + return pandas.NaT try: return x.astype(" Date: Thu, 27 Jan 2022 11:10:57 -0600 Subject: [PATCH 05/22] fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes This makes them consistent with other date/time dtypes, as well as internally consistent with the advertised `dtype.na_value`. BREAKING-CHANGE: dbdate and dbtime dtypes return NaT instead of None for missing values Release-As: 0.4.0 --- db_dtypes/__init__.py | 6 +-- db_dtypes/core.py | 5 +- tests/unit/test_date.py | 27 ++++++++++ tests/unit/test_dtypes.py | 104 +++++++++++++++++++++----------------- tests/unit/test_time.py | 30 +++++++++++ 5 files changed, 119 insertions(+), 53 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index a518a0b..1a250b0 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -145,8 +145,8 @@ def _datetime( raise TypeError("Invalid value type", scalar) def _box_func(self, x): - if pandas.isnull(x): - return None + if pandas.isna(x): + return pandas.NaT try: return x.astype("= (1, 2): - assert empty.median() is None + assert empty.median() is pd.NaT empty = cls([None]) - assert empty.min() is None - assert empty.max() is None - assert empty.min(skipna=False) is None - assert empty.max(skipna=False) is None + assert empty.min() is pd.NaT + assert empty.max() is pd.NaT + assert empty.min(skipna=False) is pd.NaT + assert empty.max(skipna=False) is pd.NaT if pandas_release >= (1, 2): with pytest.warns(RuntimeWarning, match="empty slice"): # It's weird that we get the warning here, and not # below. :/ - assert empty.median() is None - assert empty.median(skipna=False) is None + assert empty.median() is pd.NaT + assert empty.median(skipna=False) is pd.NaT a = _make_one(dtype) assert a.min() == sample_values[0] @@ -563,14 +573,14 @@ def test_date_add(): times = _cls("dbtime")(SAMPLE_VALUES["dbtime"]) expect = dates.astype("datetime64") + times.astype("timedelta64") - assert np.array_equal(dates + times, expect) - assert np.array_equal(times + dates, expect) + np.testing.assert_array_equal(dates + times, expect) + np.testing.assert_array_equal(times + dates, expect) do = pd.DateOffset(days=1) expect = dates.astype("object") + do - assert np.array_equal(dates + do, expect) + np.testing.assert_array_equal(dates + do, expect) if pandas_release >= (1, 1): - assert np.array_equal(do + dates, expect) + np.testing.assert_array_equal(do + dates, expect) with pytest.raises(TypeError): dates + times.astype("timedelta64") @@ -587,8 +597,8 @@ def test_date_add(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") + do - assert np.array_equal(dates + do, expect) - assert np.array_equal(do + dates, expect) + np.testing.assert_array_equal(dates + do, expect) + np.testing.assert_array_equal(do + dates, expect) def test_date_sub(): @@ -602,11 +612,11 @@ def test_date_sub(): ) ) expect = dates.astype("datetime64") - dates2.astype("datetime64") - assert np.array_equal(dates - dates2, expect) + np.testing.assert_array_equal(dates - dates2, expect) do = pd.DateOffset(days=1) expect = dates.astype("object") - do - assert np.array_equal(dates - do, expect) + np.testing.assert_array_equal(dates - do, expect) with pytest.raises(TypeError): dates - 42 @@ -620,4 +630,4 @@ def test_date_sub(): do = pd.Series([pd.DateOffset(days=i) for i in range(4)]) expect = dates.astype("object") - do - assert np.array_equal(dates - do, expect) + np.testing.assert_array_equal(dates - do, expect) diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index ba45949..8ecb996 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -19,6 +19,7 @@ # To register the types. import db_dtypes # noqa +from db_dtypes import pandas_backports @pytest.mark.parametrize( @@ -82,3 +83,32 @@ def test_time_parsing(value, expected): def test_time_parsing_errors(value, error): with pytest.raises(ValueError, match=error): pandas.Series([value], dtype="dbtime") + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +@pytest.mark.parametrize( + "values, expected", + [ + ( + ["00:00:00", "12:34:56.789101", "23:59:59.999999"], + datetime.time(12, 34, 56, 789101), + ), + ( + [ + None, + "06:30:00", + pandas.NA if hasattr(pandas, "NA") else None, + pandas.NaT, + float("nan"), + ], + datetime.time(6, 30), + ), + (["2:22:21.222222", "2:22:23.222222"], datetime.time(2, 22, 22, 222222)), + ], +) +def test_date_median(values, expected): + series = pandas.Series(values, dtype="dbtime") + assert series.median() == expected From cc713a8a6fade4c2249c35a084b13fdb32ca4bea Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 8 Mar 2022 16:17:02 -0600 Subject: [PATCH 06/22] more progress towards compliance --- tests/unit/date_compliance/conftest.py | 57 +++++++++++++++++++ .../date_compliance/test_date_compliance.py | 26 +++++++++ .../test_date_compliance_1_5.py | 4 ++ 3 files changed, 87 insertions(+) diff --git a/tests/unit/date_compliance/conftest.py b/tests/unit/date_compliance/conftest.py index f8d2666..15b1476 100644 --- a/tests/unit/date_compliance/conftest.py +++ b/tests/unit/date_compliance/conftest.py @@ -21,6 +21,43 @@ from db_dtypes import DateArray, DateDtype +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + "prod", + "std", + "var", + "median", + "kurt", + "skew", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request): + """ + Fixture for numeric reduction names. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + +_all_boolean_reductions = ["all", "any"] + + +@pytest.fixture(params=_all_boolean_reductions) +def all_boolean_reductions(request): + """ + Fixture for boolean reduction names. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + @pytest.fixture def data(): return DateArray( @@ -43,6 +80,26 @@ def dtype(): return DateDtype() +@pytest.fixture(params=["ffill", "bfill"]) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series.fillna(method=) testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + @pytest.fixture def na_value(): return pandas.NaT + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return a is pandas.NaT and a is b + + return cmp diff --git a/tests/unit/date_compliance/test_date_compliance.py b/tests/unit/date_compliance/test_date_compliance.py index ca47fed..670fc41 100644 --- a/tests/unit/date_compliance/test_date_compliance.py +++ b/tests/unit/date_compliance/test_date_compliance.py @@ -20,8 +20,12 @@ https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py """ +import datetime + from pandas.tests.extension import base +import db_dtypes + class TestDtype(base.BaseDtypeTests): pass @@ -37,3 +41,25 @@ class TestConstructors(base.BaseConstructorsTests): class TestReshaping(base.BaseReshapingTests): pass + + +class TestGetitem(base.BaseGetitemTests): + def test_take_na_value_other_date(self): + arr = db_dtypes.DateArray( + [datetime.date(2022, 3, 8), datetime.date(2022, 3, 9)] + ) + result = arr.take( + [0, -1], allow_fill=True, fill_value=datetime.date(1969, 12, 31) + ) + expected = db_dtypes.DateArray( + [datetime.date(2022, 3, 8), datetime.date(1969, 12, 31)] + ) + self.assert_extension_array_equal(result, expected) + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + pass diff --git a/tests/unit/date_compliance/test_date_compliance_1_5.py b/tests/unit/date_compliance/test_date_compliance_1_5.py index e8f2c93..9c6da24 100644 --- a/tests/unit/date_compliance/test_date_compliance_1_5.py +++ b/tests/unit/date_compliance/test_date_compliance_1_5.py @@ -29,3 +29,7 @@ class Test2DCompat(base.NDArrayBacked2DTests): pass + + +class TestIndex(base.BaseIndexTests): + pass From 164101ab6c71e33b27768c169adb5ed7b2cd36b6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 9 Mar 2022 12:52:27 -0600 Subject: [PATCH 07/22] address errors in TestMethods --- tests/unit/date_compliance/conftest.py | 146 +++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/tests/unit/date_compliance/conftest.py b/tests/unit/date_compliance/conftest.py index 15b1476..20ac9b3 100644 --- a/tests/unit/date_compliance/conftest.py +++ b/tests/unit/date_compliance/conftest.py @@ -58,6 +58,28 @@ def all_boolean_reductions(request): return request.param +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + @pytest.fixture def data(): return DateArray( @@ -70,11 +92,99 @@ def data(): ) +@pytest.fixture +def data_for_grouping(dtype): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + b = datetime.date(2022, 3, 9) + a = datetime.date(1969, 12, 31) + na = pandas.NaT + return pandas.array([b, b, na, na, a, a, b], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [ + datetime.date(2022, 1, 27), + datetime.date(2022, 3, 9), + datetime.date(1969, 12, 31), + ] + ) + + +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [datetime.date(2022, 1, 27), pandas.NaT, datetime.date(1969, 12, 31)] + ) + + @pytest.fixture def data_missing(): + """Length-2 array with [NA, Valid] + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ return DateArray([None, datetime.date(2022, 1, 27)]) +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing' + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/arrays/floating/conftest.py + """ + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + @pytest.fixture def dtype(): return DateDtype() @@ -103,3 +213,39 @@ def cmp(a, b): return a is pandas.NaT and a is b return cmp + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture +def invalid_scalar(data): + """ + A scalar that *cannot* be held by this ExtensionArray. + The default should work for most subclasses, but is not guaranteed. + If the array can hold any item (i.e. object dtype), then use pytest.skip. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return object.__new__(object) From d9edc063393c4431cb8622106c7999afea5f8532 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Mar 2022 09:37:49 -0500 Subject: [PATCH 08/22] move tests --- .../date}/test_date_compliance_1_5.py | 0 tests/unit/date_compliance/conftest.py | 251 ------------------ .../date_compliance/test_date_compliance.py | 65 ----- 3 files changed, 316 deletions(-) rename tests/{unit/date_compliance => compliance/date}/test_date_compliance_1_5.py (100%) delete mode 100644 tests/unit/date_compliance/conftest.py delete mode 100644 tests/unit/date_compliance/test_date_compliance.py diff --git a/tests/unit/date_compliance/test_date_compliance_1_5.py b/tests/compliance/date/test_date_compliance_1_5.py similarity index 100% rename from tests/unit/date_compliance/test_date_compliance_1_5.py rename to tests/compliance/date/test_date_compliance_1_5.py diff --git a/tests/unit/date_compliance/conftest.py b/tests/unit/date_compliance/conftest.py deleted file mode 100644 index 20ac9b3..0000000 --- a/tests/unit/date_compliance/conftest.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime - -import numpy -import pandas -import pytest - -from db_dtypes import DateArray, DateDtype - - -_all_numeric_reductions = [ - "sum", - "max", - "min", - "mean", - "prod", - "std", - "var", - "median", - "kurt", - "skew", -] - - -@pytest.fixture(params=_all_numeric_reductions) -def all_numeric_reductions(request): - """ - Fixture for numeric reduction names. - - See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py - """ - return request.param - - -_all_boolean_reductions = ["all", "any"] - - -@pytest.fixture(params=_all_boolean_reductions) -def all_boolean_reductions(request): - """ - Fixture for boolean reduction names. - - See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def as_frame(request): - """ - Boolean fixture to support Series and Series.to_frame() comparison testing. - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def as_series(request): - """ - Boolean fixture to support arr and Series(arr) comparison testing. - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return request.param - - -@pytest.fixture -def data(): - return DateArray( - numpy.arange( - datetime.datetime(1900, 1, 1), - datetime.datetime(2099, 12, 31), - datetime.timedelta(days=731), - dtype="datetime64[ns]", - ) - ) - - -@pytest.fixture -def data_for_grouping(dtype): - """ - Data for factorization, grouping, and unique tests. - - Expected to be like [B, B, NA, NA, A, A, B, C] - - Where A < B < C and NA is missing - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - b = datetime.date(2022, 3, 9) - a = datetime.date(1969, 12, 31) - na = pandas.NaT - return pandas.array([b, b, na, na, a, a, b], dtype=dtype) - - -@pytest.fixture -def data_for_sorting(): - """ - Length-3 array with a known sort order. - - This should be three items [B, C, A] with - A < B < C - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return DateArray( - [ - datetime.date(2022, 1, 27), - datetime.date(2022, 3, 9), - datetime.date(1969, 12, 31), - ] - ) - - -@pytest.fixture -def data_missing_for_sorting(): - """ - Length-3 array with a known sort order. - - This should be three items [B, NA, A] with - A < B and NA missing. - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return DateArray( - [datetime.date(2022, 1, 27), pandas.NaT, datetime.date(1969, 12, 31)] - ) - - -@pytest.fixture -def data_missing(): - """Length-2 array with [NA, Valid] - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return DateArray([None, datetime.date(2022, 1, 27)]) - - -@pytest.fixture -def data_repeated(data): - """ - Generate many datasets. - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - - def gen(count): - for _ in range(count): - yield data - - return gen - - -@pytest.fixture(params=["data", "data_missing"]) -def all_data(request, data, data_missing): - """Parametrized fixture giving 'data' and 'data_missing' - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/arrays/floating/conftest.py - """ - if request.param == "data": - return data - elif request.param == "data_missing": - return data_missing - - -@pytest.fixture -def dtype(): - return DateDtype() - - -@pytest.fixture(params=["ffill", "bfill"]) -def fillna_method(request): - """ - Parametrized fixture giving method parameters 'ffill' and 'bfill' for - Series.fillna(method=) testing. - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return request.param - - -@pytest.fixture -def na_value(): - return pandas.NaT - - -@pytest.fixture -def na_cmp(): - def cmp(a, b): - return a is pandas.NaT and a is b - - return cmp - - -@pytest.fixture(params=[None, lambda x: x]) -def sort_by_key(request): - """ - Simple fixture for testing keys in sorting methods. - Tests None (no key) and the identity key. - - See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py - """ - return request.param - - -@pytest.fixture(params=[True, False]) -def use_numpy(request): - """ - Boolean fixture to support comparison testing of ExtensionDtype array - and numpy array. - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return request.param - - -@pytest.fixture -def invalid_scalar(data): - """ - A scalar that *cannot* be held by this ExtensionArray. - The default should work for most subclasses, but is not guaranteed. - If the array can hold any item (i.e. object dtype), then use pytest.skip. - - See: - https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py - """ - return object.__new__(object) diff --git a/tests/unit/date_compliance/test_date_compliance.py b/tests/unit/date_compliance/test_date_compliance.py deleted file mode 100644 index 670fc41..0000000 --- a/tests/unit/date_compliance/test_date_compliance.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Tests for extension interface compliance, inherited from pandas. - -See: -https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/decimal/test_decimal.py -and -https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py -""" - -import datetime - -from pandas.tests.extension import base - -import db_dtypes - - -class TestDtype(base.BaseDtypeTests): - pass - - -class TestInterface(base.BaseInterfaceTests): - pass - - -class TestConstructors(base.BaseConstructorsTests): - pass - - -class TestReshaping(base.BaseReshapingTests): - pass - - -class TestGetitem(base.BaseGetitemTests): - def test_take_na_value_other_date(self): - arr = db_dtypes.DateArray( - [datetime.date(2022, 3, 8), datetime.date(2022, 3, 9)] - ) - result = arr.take( - [0, -1], allow_fill=True, fill_value=datetime.date(1969, 12, 31) - ) - expected = db_dtypes.DateArray( - [datetime.date(2022, 3, 8), datetime.date(1969, 12, 31)] - ) - self.assert_extension_array_equal(result, expected) - - -class TestMissing(base.BaseMissingTests): - pass - - -class TestMethods(base.BaseMethodsTests): - pass From cdb0d0fc5a98e5555502fd32c65e4a7fa95b61e6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Mar 2022 10:31:07 -0500 Subject: [PATCH 09/22] add prerelease deps --- db_dtypes/core.py | 1 + noxfile.py | 89 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index e99023d..90a7163 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -36,6 +36,7 @@ def construct_from_string(cls, name: str): raise TypeError( f"'construct_from_string' expects a string, got {type(name)}" ) + if name != cls.name: raise TypeError(f"Cannot construct a '{cls.__name__}' from 'another_type'") diff --git a/noxfile.py b/noxfile.py index 54421d8..92c2b40 100644 --- a/noxfile.py +++ b/noxfile.py @@ -19,6 +19,7 @@ from __future__ import absolute_import import os import pathlib +import re import shutil import nox @@ -112,18 +113,106 @@ def default(session, tests_path): ) +def prerelease(session, tests_path): + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + + # PyArrow prerelease packages are published to an alternative PyPI host. + # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + session.install( + "--extra-index-url", + "https://pypi.fury.io/arrow-nightlies/", + "--prefer-binary", + "--pre", + "--upgrade", + "pyarrow", + ) + session.install( + "--extra-index-url", + "https://pypi.anaconda.org/scipy-wheels-nightly/simple", + "--prefer-binary", + "--pre", + "--upgrade", + "pandas", + ) + session.install( + "mock", + "asyncmock", + "pytest", + "pytest-cov", + "pytest-asyncio", + "-c", + constraints_path, + ) + + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + # We use --no-deps to ensure that pre-release versions aren't overwritten + # by the version ranges in setup.py. + session.install(*deps) + session.install("--no-deps", "-e", ".") + + # Print out prerelease package versions. + session.run("python", "-m", "pip", "freeze") + + # Run py.test against the unit tests. + session.run( + "py.test", + "--quiet", + f"--junitxml=prerelease_unit_{session.python}_sponge_log.xml", + "--cov=db_dtypes", + "--cov=tests/unit", + "--cov-append", + "--cov-config=.coveragerc", + "--cov-report=", + "--cov-fail-under=0", + tests_path, + *session.posargs, + ) + + @nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) def compliance(session): """Run the compliance test suite.""" default(session, os.path.join("tests", "compliance")) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def compliance_prerelease(session): + """Run the compliance test suite with prerelease dependencies.""" + prerelease(session, os.path.join("tests", "compliance")) + + @nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): """Run the unit test suite.""" default(session, os.path.join("tests", "unit")) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def unit_prerelease(session): + """Run the unit test suite with prerelease dependencies.""" + prerelease(session, os.path.join("tests", "unit")) + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" From 804cee266c3027d1092b4daf661537b8dcfe1f3d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 27 Jan 2022 11:10:57 -0600 Subject: [PATCH 10/22] fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies --- .github/workflows/compliance.yml | 21 +++++ .github/workflows/unittest-prerelease.yml | 32 ++++++++ db_dtypes/core.py | 6 ++ noxfile.py | 89 +++++++++++++++++++++ owlbot.py | 96 ++++++++++++++++++++++- 5 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/unittest-prerelease.yml diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml index 77e6b05..eca8cc2 100644 --- a/.github/workflows/compliance.yml +++ b/.github/workflows/compliance.yml @@ -25,3 +25,24 @@ jobs: COVERAGE_FILE: .coverage-compliance-${{ matrix.python }} run: | nox -s compliance + compliance-prerelease: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run compliance prerelease tests + env: + COVERAGE_FILE: .coverage-compliance-prerelease-${{ matrix.python }} + run: | + nox -s compliance_prerelease diff --git a/.github/workflows/unittest-prerelease.yml b/.github/workflows/unittest-prerelease.yml new file mode 100644 index 0000000..a11568a --- /dev/null +++ b/.github/workflows/unittest-prerelease.yml @@ -0,0 +1,32 @@ +on: + pull_request: + branches: + - main +name: unittest-prerelease +jobs: + unit: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-prerelease-${{ matrix.python }} + run: | + nox -s unit_prerelease + - name: Upload coverage results + uses: actions/upload-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-${{ matrix.python }} diff --git a/db_dtypes/core.py b/db_dtypes/core.py index b5b0b7a..14d76aa 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -46,6 +46,12 @@ def construct_from_string(cls, name: str): class BaseDatetimeArray( pandas_backports.OpsMixin, pandas_backports.NDArrayBackedExtensionArray ): + # scalar used to denote NA value inside our self._ndarray, e.g. -1 for + # Categorical, iNaT for Period. Outside of object dtype, self.isna() should + # be exactly locations in self._ndarray with _internal_fill_value. See: + # https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/_mixins.py + _internal_fill_value = numpy.datetime64("NaT") + def __init__(self, values, dtype=None, copy: bool = False): if not ( isinstance(values, numpy.ndarray) and values.dtype == numpy.dtype(" Date: Wed, 16 Mar 2022 11:17:35 -0500 Subject: [PATCH 11/22] fix owlbot config --- owlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlbot.py b/owlbot.py index 58362e7..ec5a5bf 100644 --- a/owlbot.py +++ b/owlbot.py @@ -138,7 +138,7 @@ def prerelease(session, tests_path): deps = [ match.group(1) for match in re.finditer( - r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + r"^\\s*(\\S+)(?===\\S+)", constraints_text, flags=re.MULTILINE ) ] From 6db3c4fbe7332df8e37dd9018f6e48f6e2926d5d Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 16 Mar 2022 16:37:00 +0000 Subject: [PATCH 12/22] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/noxfile.py b/noxfile.py index 92c2b40..e3f4d5c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,7 +38,9 @@ nox.options.sessions = [ "lint", "unit", + "unit_prerelease", "compliance", + "compliance_prerelease", "cover", "lint_setup_py", "blacken", From 10c66217c0494f6041b1eabab3d3a8e1b42e1084 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 16 Mar 2022 16:37:44 +0000 Subject: [PATCH 13/22] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/noxfile.py b/noxfile.py index 92c2b40..e3f4d5c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,7 +38,9 @@ nox.options.sessions = [ "lint", "unit", + "unit_prerelease", "compliance", + "compliance_prerelease", "cover", "lint_setup_py", "blacken", From d2e69312509123e2e50f8f83ecdb052b9ebf0d31 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Mar 2022 16:33:38 -0500 Subject: [PATCH 14/22] document why microsecond precision is used --- db_dtypes/__init__.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index a222e6d..7b2c836 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -34,6 +34,12 @@ time_dtype_name = "dbtime" _EPOCH = datetime.datetime(1970, 1, 1) _NPEPOCH = numpy.datetime64(_EPOCH) +_NP_DTYPE = "datetime64[ns]" + +# Use microseconds for conversion datetime.datetime. +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/63): Keep +# nanosecond precision when boxing scalars. +_NP_BOX_DTYPE = "datetime64[us]" pandas_release = packaging.version.parse(pandas.__version__).release @@ -149,12 +155,12 @@ def _box_func(self, x): return pandas.NaT try: - return x.astype(" Date: Wed, 16 Mar 2022 16:35:47 -0500 Subject: [PATCH 15/22] use correct units --- db_dtypes/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 7b2c836..9fda9a6 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -157,7 +157,9 @@ def _box_func(self, x): try: return x.astype(_NP_BOX_DTYPE).item().time() except AttributeError: - x = numpy.datetime64(x, _NP_BOX_DTYPE) + x = numpy.datetime64( + x, "ns" + ) # Integers are stored with nanosecond precision. return x.astype(_NP_BOX_DTYPE).item().time() __return_deltas = {"timedelta", "timedelta64", "timedelta64[ns]", " Date: Wed, 16 Mar 2022 16:47:19 -0500 Subject: [PATCH 16/22] add box_func tests --- tests/unit/test_date.py | 24 ++++++++++++++++++++++++ tests/unit/test_time.py | 26 ++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index bce2dc1..79c97ac 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -15,6 +15,7 @@ import datetime import operator +import numpy import pandas import pandas.testing import pytest @@ -23,6 +24,29 @@ from db_dtypes import pandas_backports +def test_box_func(): + input_array = db_dtypes.DateArray([]) + input_datetime = datetime.datetime(2022, 3, 16) + input_np = numpy.datetime64(input_datetime) + + boxed_value = input_array._box_func(input_np) + assert boxed_value.year == 2022 + assert boxed_value.month == 3 + assert boxed_value.day == 16 + + input_delta = input_datetime - datetime.datetime(1970, 1, 1) + input_nanoseconds = ( + 1_000 * input_delta.microseconds + + 1_000_000_000 * input_delta.seconds + + 1_000_000_000 * 60 * 60 * 24 * input_delta.days + ) + + boxed_value = input_array._box_func(input_nanoseconds) + assert boxed_value.year == 2022 + assert boxed_value.month == 3 + assert boxed_value.day == 16 + + def test_construct_from_string_with_nonstring(): with pytest.raises(TypeError): db_dtypes.DateDtype.construct_from_string(object()) diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index 8ecb996..db533f5 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -14,6 +14,7 @@ import datetime +import numpy import pandas import pytest @@ -22,6 +23,31 @@ from db_dtypes import pandas_backports +def test_box_func(): + input_array = db_dtypes.TimeArray([]) + input_datetime = datetime.datetime(1970, 1, 1, 1, 2, 3, 456789) + input_np = numpy.datetime64(input_datetime) + + boxed_value = input_array._box_func(input_np) + assert boxed_value.hour == 1 + assert boxed_value.minute == 2 + assert boxed_value.second == 3 + assert boxed_value.microsecond == 456789 + + input_delta = input_datetime - datetime.datetime(1970, 1, 1) + input_nanoseconds = ( + 1_000 * input_delta.microseconds + + 1_000_000_000 * input_delta.seconds + + 1_000_000_000 * 60 * 60 * 24 * input_delta.days + ) + + boxed_value = input_array._box_func(input_nanoseconds) + assert boxed_value.hour == 1 + assert boxed_value.minute == 2 + assert boxed_value.second == 3 + assert boxed_value.microsecond == 456789 + + @pytest.mark.parametrize( "value, expected", [ From 1f17580c530d634e83d570c82a54a1cc5af0dfb6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Mar 2022 16:51:07 -0500 Subject: [PATCH 17/22] typo --- db_dtypes/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 9fda9a6..d8e2ae5 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -36,7 +36,9 @@ _NPEPOCH = numpy.datetime64(_EPOCH) _NP_DTYPE = "datetime64[ns]" -# Use microseconds for conversion datetime.datetime. +# Numpy converts datetime64 scalars to datetime.datetime only if microsecond or +# smaller precision is used. +# # TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/63): Keep # nanosecond precision when boxing scalars. _NP_BOX_DTYPE = "datetime64[us]" From 2dfe9b436c87e5e507df018e9021ff2adac9bcf3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 18 Mar 2022 15:37:40 -0500 Subject: [PATCH 18/22] fix: avoid TypeError when using sorted search --- db_dtypes/core.py | 6 ++ tests/compliance/conftest.py | 60 +++++++++++++ tests/compliance/date/conftest.py | 85 +++++++++++++++++++ tests/compliance/date/test_date_compliance.py | 10 +++ 4 files changed, 161 insertions(+) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 05ac707..7bb6153 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -121,6 +121,12 @@ def _validate_scalar(self, value): """ return self._datetime(value) + def _validate_searchsorted_value(self, value): + """ + TODO: With pandas 2.0, this may be unnecessary. https://github.com/pandas-dev/pandas/pull/45544#issuecomment-1052809232 + """ + raise NotImplementedError("TODO TODO TODO") + def any( self, *, diff --git a/tests/compliance/conftest.py b/tests/compliance/conftest.py index bc76692..54b767c 100644 --- a/tests/compliance/conftest.py +++ b/tests/compliance/conftest.py @@ -16,6 +16,28 @@ import pytest +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param + + @pytest.fixture(params=["ffill", "bfill"]) def fillna_method(request): """ @@ -28,6 +50,21 @@ def fillna_method(request): return request.param +@pytest.fixture +def invalid_scalar(data): + """ + A scalar that *cannot* be held by this ExtensionArray. + + The default should work for most subclasses, but is not guaranteed. + + If the array can hold any item (i.e. object dtype), then use pytest.skip. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return object.__new__(object) + + @pytest.fixture def na_value(): return pandas.NaT @@ -51,3 +88,26 @@ def cmp(a, b): return a is pandas.NaT and a is b return cmp + + +@pytest.fixture(params=[None, lambda x: x]) +def sort_by_key(request): + """ + Simple fixture for testing keys in sorting methods. + Tests None (no key) and the identity key. + + See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return request.param diff --git a/tests/compliance/date/conftest.py b/tests/compliance/date/conftest.py index e25ccc9..6f0a816 100644 --- a/tests/compliance/date/conftest.py +++ b/tests/compliance/date/conftest.py @@ -20,6 +20,15 @@ from db_dtypes import DateArray, DateDtype +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + @pytest.fixture def data(): return DateArray( @@ -32,6 +41,52 @@ def data(): ) +@pytest.fixture +def data_for_grouping(): + """ + Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [ + datetime.date(1980, 1, 27), + datetime.date(1980, 1, 27), + None, + None, + datetime.date(1969, 12, 30), + datetime.date(1969, 12, 30), + datetime.date(1980, 1, 27), + datetime.date(2022, 3, 18), + ] + ) + + +@pytest.fixture +def data_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray( + [ + datetime.date(1980, 1, 27), + datetime.date(2022, 3, 18), + datetime.date(1969, 12, 30), + ] + ) + + @pytest.fixture def data_missing(): """Length-2 array with [NA, Valid] @@ -42,6 +97,36 @@ def data_missing(): return DateArray([None, datetime.date(2022, 1, 27)]) +@pytest.fixture +def data_missing_for_sorting(): + """ + Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + return DateArray([datetime.date(1980, 1, 27), None, datetime.date(1969, 12, 30)]) + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + See: + https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + @pytest.fixture def dtype(): return DateDtype() diff --git a/tests/compliance/date/test_date_compliance.py b/tests/compliance/date/test_date_compliance.py index a805ecd..cfbb364 100644 --- a/tests/compliance/date/test_date_compliance.py +++ b/tests/compliance/date/test_date_compliance.py @@ -21,6 +21,7 @@ """ from pandas.tests.extension import base +import pytest class TestDtype(base.BaseDtypeTests): @@ -45,3 +46,12 @@ class TestGetitem(base.BaseGetitemTests): class TestMissing(base.BaseMissingTests): pass + + +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add +# compliance tests for reduction operations. + + +class TestMethods(base.BaseMethodsTests): + def test_combine_add(self): + pytest.skip("Cannot add dates.") From 8ba12f6946892f5b5b233131f360b638486d4e37 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 18 Mar 2022 17:05:54 -0500 Subject: [PATCH 19/22] add unit tests --- db_dtypes/pandas_backports.py | 4 - tests/unit/test_date.py | 150 ++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 4 deletions(-) diff --git a/db_dtypes/pandas_backports.py b/db_dtypes/pandas_backports.py index f53adff..6455555 100644 --- a/db_dtypes/pandas_backports.py +++ b/db_dtypes/pandas_backports.py @@ -106,12 +106,8 @@ def __ge__(self, other): # See: https://github.com/pandas-dev/pandas/pull/45544 @import_default("pandas.core.arrays._mixins", pandas_release < (1, 3)) class NDArrayBackedExtensionArray(pandas.core.arrays.base.ExtensionArray): - - ndim = 1 - def __init__(self, values, dtype): assert isinstance(values, numpy.ndarray) - assert values.ndim == 1 self._ndarray = values self._dtype = dtype diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index 79c97ac..85a5d39 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -16,6 +16,7 @@ import operator import numpy +import numpy.testing import pandas import pandas.testing import pytest @@ -104,6 +105,100 @@ def test_date_parsing_errors(value, error): pandas.Series([value], dtype="dbdate") +def test_date_max_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + numpy.testing.assert_array_equal( + input_array.max(axis=0)._ndarray, + numpy.array( + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + dtype="datetime64[ns]", + ), + ) + numpy.testing.assert_array_equal( + input_array.max(axis=1)._ndarray, + numpy.array( + [ + numpy.datetime64("1990-03-03"), + numpy.datetime64("1991-04-04"), + numpy.datetime64("1992-05-05"), + ], + dtype="datetime64[ns]", + ), + ) + + +def test_date_min_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + numpy.testing.assert_array_equal( + input_array.min(axis=0)._ndarray, + numpy.array( + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + dtype="datetime64[ns]", + ), + ) + numpy.testing.assert_array_equal( + input_array.min(axis=1)._ndarray, + numpy.array( + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1971-02-02"), + numpy.datetime64("1972-03-03"), + ], + dtype="datetime64[ns]", + ), + ) + + @pytest.mark.skipif( not hasattr(pandas_backports, "numpy_validate_median"), reason="median not available with this version of pandas", @@ -128,3 +223,58 @@ def test_date_parsing_errors(value, error): def test_date_median(values, expected): series = pandas.Series(values, dtype="dbdate") assert series.median() == expected + + +@pytest.mark.skipif( + not hasattr(pandas_backports, "numpy_validate_median"), + reason="median not available with this version of pandas", +) +def test_date_median_2d(): + input_array = db_dtypes.DateArray( + numpy.array( + [ + [ + numpy.datetime64("1970-01-01"), + numpy.datetime64("1980-02-02"), + numpy.datetime64("1990-03-03"), + ], + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + [ + numpy.datetime64("1972-03-03"), + numpy.datetime64("1982-04-04"), + numpy.datetime64("1992-05-05"), + ], + ], + dtype="datetime64[ns]", + ) + ) + pandas.testing.assert_extension_array_equal( + input_array.median(axis=0), + db_dtypes.DateArray( + numpy.array( + [ + numpy.datetime64("1971-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1991-04-04"), + ], + dtype="datetime64[ns]", + ) + ), + ) + pandas.testing.assert_extension_array_equal( + input_array.median(axis=1), + db_dtypes.DateArray( + numpy.array( + [ + numpy.datetime64("1980-02-02"), + numpy.datetime64("1981-03-03"), + numpy.datetime64("1982-04-04"), + ], + dtype="datetime64[ns]", + ) + ), + ) From ae224958327ff75e60833cc3c8ae07b9508c5004 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 21 Mar 2022 12:12:11 -0500 Subject: [PATCH 20/22] fix: dbdate and dbtime support set item --- db_dtypes/__init__.py | 10 ++++- db_dtypes/core.py | 18 ++++---- db_dtypes/pandas_backports.py | 2 +- tests/unit/test_date.py | 82 ++++++++++++++++++++++++++++------- tests/unit/test_time.py | 6 +++ 5 files changed, 91 insertions(+), 27 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index d8e2ae5..7889dac 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -106,6 +106,9 @@ def _datetime( r"(?:\.(?P\d*))?)?)?\s*$" ).match, ) -> Optional[numpy.datetime64]: + if isinstance(scalar, numpy.datetime64): + return scalar + # Convert pyarrow values to datetime.time. if isinstance(scalar, (pyarrow.Time32Scalar, pyarrow.Time64Scalar)): scalar = ( @@ -116,7 +119,7 @@ def _datetime( ) if pandas.isna(scalar): - return None + return numpy.datetime64("NaT") if isinstance(scalar, datetime.time): return pandas.Timestamp( year=1970, @@ -238,12 +241,15 @@ def _datetime( scalar, match_fn=re.compile(r"\s*(?P\d+)-(?P\d+)-(?P\d+)\s*$").match, ) -> Optional[numpy.datetime64]: + if isinstance(scalar, numpy.datetime64): + return scalar + # Convert pyarrow values to datetime.date. if isinstance(scalar, (pyarrow.Date32Scalar, pyarrow.Date64Scalar)): scalar = scalar.as_py() if pandas.isna(scalar): - return None + return numpy.datetime64("NaT") elif isinstance(scalar, datetime.date): return pandas.Timestamp( year=scalar.year, month=scalar.month, day=scalar.day diff --git a/db_dtypes/core.py b/db_dtypes/core.py index 14d76aa..7879571 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -100,14 +100,6 @@ def _cmp_method(self, other, op): return NotImplemented return op(self._ndarray, other._ndarray) - def __setitem__(self, key, value): - if is_list_like(value): - _datetime = self._datetime - value = [_datetime(v) for v in value] - elif not pandas.isna(value): - value = self._datetime(value) - return super().__setitem__(key, value) - def _from_factorized(self, unique, original): return self.__class__(unique) @@ -121,6 +113,16 @@ def _validate_scalar(self, value): """ return self._datetime(value) + def _validate_setitem_value(self, value): + """ + Convert a value for use in setting a value in the backing numpy array. + """ + if is_list_like(value): + _datetime = self._datetime + return [_datetime(v) for v in value] + + return self._datetime(value) + def any( self, *, diff --git a/db_dtypes/pandas_backports.py b/db_dtypes/pandas_backports.py index f53adff..0e39986 100644 --- a/db_dtypes/pandas_backports.py +++ b/db_dtypes/pandas_backports.py @@ -126,7 +126,7 @@ def __getitem__(self, index): return self.__class__(value, self._dtype) def __setitem__(self, index, value): - self._ndarray[index] = value + self._ndarray[index] = self._validate_setitem_value(value) def __len__(self): return len(self._ndarray) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index 79c97ac..fb41620 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -24,6 +24,33 @@ from db_dtypes import pandas_backports +VALUE_PARSING_TEST_CASES = [ + # Min/Max values for pandas.Timestamp. + ("1677-09-22", datetime.date(1677, 9, 22)), + ("2262-04-11", datetime.date(2262, 4, 11)), + # Typical "zero" values. + ("1900-01-01", datetime.date(1900, 1, 1)), + ("1970-01-01", datetime.date(1970, 1, 1)), + # Assorted values. + ("1993-10-31", datetime.date(1993, 10, 31)), + (datetime.date(1993, 10, 31), datetime.date(1993, 10, 31)), + ("2012-02-29", datetime.date(2012, 2, 29)), + (numpy.datetime64("2012-02-29"), datetime.date(2012, 2, 29)), + ("2021-12-17", datetime.date(2021, 12, 17)), + (pandas.Timestamp("2021-12-17"), datetime.date(2021, 12, 17)), + ("2038-01-19", datetime.date(2038, 1, 19)), +] + +NULL_VALUE_TEST_CASES = [ + None, + pandas.NaT, + float("nan"), +] + +if hasattr(pandas, "NA"): + NULL_VALUE_TEST_CASES.append(pandas.NA) + + def test_box_func(): input_array = db_dtypes.DateArray([]) input_datetime = datetime.datetime(2022, 3, 16) @@ -58,26 +85,49 @@ def test__cmp_method_with_scalar(): assert got[0] -@pytest.mark.parametrize( - "value, expected", - [ - # Min/Max values for pandas.Timestamp. - ("1677-09-22", datetime.date(1677, 9, 22)), - ("2262-04-11", datetime.date(2262, 4, 11)), - # Typical "zero" values. - ("1900-01-01", datetime.date(1900, 1, 1)), - ("1970-01-01", datetime.date(1970, 1, 1)), - # Assorted values. - ("1993-10-31", datetime.date(1993, 10, 31)), - ("2012-02-29", datetime.date(2012, 2, 29)), - ("2021-12-17", datetime.date(2021, 12, 17)), - ("2038-01-19", datetime.date(2038, 1, 19)), - ], -) +@pytest.mark.parametrize("value, expected", VALUE_PARSING_TEST_CASES) def test_date_parsing(value, expected): assert pandas.Series([value], dtype="dbdate")[0] == expected +@pytest.mark.parametrize("value", NULL_VALUE_TEST_CASES) +def test_date_parsing_null(value): + assert pandas.Series([value], dtype="dbdate")[0] is pandas.NaT + + +@pytest.mark.parametrize("value, expected", VALUE_PARSING_TEST_CASES) +def test_date_set_item(value, expected): + series = pandas.Series([None], dtype="dbdate") + series[0] = value + assert series[0] == expected + + +@pytest.mark.parametrize("value", NULL_VALUE_TEST_CASES) +def test_date_set_item_null(value): + series = pandas.Series(["1970-01-01"], dtype="dbdate") + series[0] = value + assert series[0] is pandas.NaT + + +def test_date_set_slice(): + series = pandas.Series([None, None, None], dtype="dbdate") + series[:] = [ + datetime.date(2022, 3, 21), + "2011-12-13", + numpy.datetime64("1998-09-04"), + ] + assert series[0] == datetime.date(2022, 3, 21) + assert series[1] == datetime.date(2011, 12, 13) + assert series[2] == datetime.date(1998, 9, 4) + + +def test_date_set_slice_null(): + series = pandas.Series(["1970-01-01"] * len(NULL_VALUE_TEST_CASES), dtype="dbdate") + series[:] = NULL_VALUE_TEST_CASES + for row_index in range(len(NULL_VALUE_TEST_CASES)): + assert series[row_index] is pandas.NaT + + @pytest.mark.parametrize( "value, error", [ diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index db533f5..bdfc48b 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -73,8 +73,14 @@ def test_box_func(): # Fractional seconds can cause rounding problems if cast to float. See: # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 ("0:0:59.876543", datetime.time(0, 0, 59, 876543)), + ( + numpy.datetime64("1970-01-01 00:00:59.876543"), + datetime.time(0, 0, 59, 876543), + ), ("01:01:01.010101", datetime.time(1, 1, 1, 10101)), + (pandas.Timestamp("1970-01-01 01:01:01.010101"), datetime.time(1, 1, 1, 10101)), ("09:09:09.090909", datetime.time(9, 9, 9, 90909)), + (datetime.time(9, 9, 9, 90909), datetime.time(9, 9, 9, 90909)), ("11:11:11.111111", datetime.time(11, 11, 11, 111111)), ("19:16:23.987654", datetime.time(19, 16, 23, 987654)), # Microsecond precision From ba482062084fcde026999261c9c825225137dfa6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 21 Mar 2022 14:11:07 -0500 Subject: [PATCH 21/22] add TestMethods --- db_dtypes/core.py | 4 +++- tests/compliance/date/test_date_compliance.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/db_dtypes/core.py b/db_dtypes/core.py index b1b6285..f577960 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -115,9 +115,11 @@ def _validate_scalar(self, value): def _validate_searchsorted_value(self, value): """ + Convert a value for use in searching for a value in the backing numpy array. + TODO: With pandas 2.0, this may be unnecessary. https://github.com/pandas-dev/pandas/pull/45544#issuecomment-1052809232 """ - raise NotImplementedError("TODO TODO TODO") + return self._validate_setitem_value(value) def _validate_setitem_value(self, value): """ diff --git a/tests/compliance/date/test_date_compliance.py b/tests/compliance/date/test_date_compliance.py index cfbb364..13327a7 100644 --- a/tests/compliance/date/test_date_compliance.py +++ b/tests/compliance/date/test_date_compliance.py @@ -20,9 +20,12 @@ https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py """ +import pandas from pandas.tests.extension import base import pytest +import db_dtypes + class TestDtype(base.BaseDtypeTests): pass @@ -55,3 +58,19 @@ class TestMissing(base.BaseMissingTests): class TestMethods(base.BaseMethodsTests): def test_combine_add(self): pytest.skip("Cannot add dates.") + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + # Overridden from + # https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/methods.py + # to avoid difference in dtypes. + other = db_dtypes.DateArray(all_data[~all_data.isna()]) + else: + other = all_data + + result = pandas.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pandas.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) From 506ef46f76801d74fd23da29797578c12d7e7f2f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 24 Mar 2022 15:03:59 -0500 Subject: [PATCH 22/22] add unit test for search sorted --- tests/unit/test_date.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index b8f36f6..bbe74cb 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -328,3 +328,30 @@ def test_date_median_2d(): ) ), ) + + +@pytest.mark.parametrize( + ("search_term", "expected_index"), + ( + (datetime.date(1899, 12, 31), 0), + (datetime.date(1900, 1, 1), 0), + (datetime.date(1920, 2, 2), 1), + (datetime.date(1930, 3, 3), 1), + (datetime.date(1950, 5, 5), 2), + (datetime.date(1990, 9, 9), 3), + (datetime.date(2012, 12, 12), 3), + (datetime.date(2022, 3, 24), 4), + ), +) +def test_date_searchsorted(search_term, expected_index): + test_series = pandas.Series( + [ + datetime.date(1900, 1, 1), + datetime.date(1930, 3, 3), + datetime.date(1980, 8, 8), + datetime.date(2012, 12, 12), + ], + dtype="dbdate", + ) + got = test_series.searchsorted(search_term) + assert got == expected_index