Skip to content

fix: avoid TypeError when using sorted search #84

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Mar 24, 2022
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b48ee6d
fix: address failing compliance tests in DateArray and TimeArray
tswast Jan 27, 2022
90e1573
fix min/max/median for 2D arrays
tswast Jan 27, 2022
47100f5
fixes except for null contains
tswast Feb 1, 2022
cece518
actually use NaT as 'advertised'
tswast Feb 2, 2022
cc5b178
fix!: use `pandas.NaT` for missing values in dbdate and dbtime dtypes
tswast Jan 27, 2022
cd84754
Merge branch 'issue28-NaT' into issue28-NDArrayBacked2DTests
tswast Feb 2, 2022
f807c6f
Merge remote-tracking branch 'upstream/main' into issue28-NDArrayBack…
tswast Feb 2, 2022
cc713a8
more progress towards compliance
tswast Mar 8, 2022
164101a
address errors in TestMethods
tswast Mar 9, 2022
1f06c78
Merge remote-tracking branch 'upstream/main' into issue28-NDArrayBack…
tswast Mar 16, 2022
d9edc06
move tests
tswast Mar 16, 2022
cdb0d0f
add prerelease deps
tswast Mar 16, 2022
804cee2
fix: address failing tests with pandas 1.5.0
tswast Jan 27, 2022
3e088ac
fix owlbot config
tswast Mar 16, 2022
6db3c4f
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Mar 16, 2022
10c6621
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Mar 16, 2022
83451c7
Merge branch 'issue81-pandas-1.5.0-unit-tests' of https://github.com/…
gcf-owl-bot[bot] Mar 16, 2022
d2e6931
document why microsecond precision is used
tswast Mar 16, 2022
6023008
use correct units
tswast Mar 16, 2022
d85c356
add box_func tests
tswast Mar 16, 2022
1f17580
typo
tswast Mar 16, 2022
e54050e
Merge branch 'issue81-pandas-1.5.0-unit-tests' into issue28-NDArrayBa…
tswast Mar 17, 2022
432bce1
Merge branch 'main' into issue28-NDArrayBacked2DTests
tswast Mar 18, 2022
2dfe9b4
fix: avoid TypeError when using sorted search
tswast Mar 18, 2022
8ba12f6
add unit tests
tswast Mar 18, 2022
f3a326c
Merge remote-tracking branch 'upstream/issue28-NDArrayBacked2DTests' …
tswast Mar 18, 2022
f05364b
Merge remote-tracking branch 'upstream/issue28-NDArrayBacked2DTests' …
tswast Mar 21, 2022
ae22495
fix: dbdate and dbtime support set item
tswast Mar 21, 2022
0a55117
Merge branch 'issue28-set-item' into issue28-more-compliance-tests
tswast Mar 21, 2022
ba48206
add TestMethods
tswast Mar 21, 2022
9801e8b
Merge remote-tracking branch 'upstream/main' into issue28-more-compli…
tswast Mar 21, 2022
b355977
Merge branch 'main' into issue28-more-compliance-tests
tswast Mar 24, 2022
506ef46
add unit test for search sorted
tswast Mar 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions db_dtypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ def _validate_scalar(self, value):
"""
return self._datetime(value)

def _validate_searchsorted_value(self, value):
"""
Convert a value for use in searching for a value in the backing numpy array.

TODO: With pandas 2.0, this may be unnecessary. https://github.com/pandas-dev/pandas/pull/45544#issuecomment-1052809232
"""
return self._validate_setitem_value(value)

def _validate_setitem_value(self, value):
"""
Convert a value for use in setting a value in the backing numpy array.
Expand Down
60 changes: 60 additions & 0 deletions tests/compliance/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,28 @@
import pytest


@pytest.fixture(params=[True, False])
def as_frame(request):
"""
Boolean fixture to support Series and Series.to_frame() comparison testing.

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""
return request.param


@pytest.fixture(params=[True, False])
def as_series(request):
"""
Boolean fixture to support arr and Series(arr) comparison testing.

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""
return request.param


@pytest.fixture(params=["ffill", "bfill"])
def fillna_method(request):
"""
Expand All @@ -28,6 +50,21 @@ def fillna_method(request):
return request.param


@pytest.fixture
def invalid_scalar(data):
"""
A scalar that *cannot* be held by this ExtensionArray.

The default should work for most subclasses, but is not guaranteed.

If the array can hold any item (i.e. object dtype), then use pytest.skip.

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""
return object.__new__(object)


@pytest.fixture
def na_value():
return pandas.NaT
Expand All @@ -51,3 +88,26 @@ def cmp(a, b):
return a is pandas.NaT and a is b

return cmp


@pytest.fixture(params=[None, lambda x: x])
def sort_by_key(request):
"""
Simple fixture for testing keys in sorting methods.
Tests None (no key) and the identity key.

See: https://github.com/pandas-dev/pandas/blob/main/pandas/conftest.py
"""
return request.param


@pytest.fixture(params=[True, False])
def use_numpy(request):
"""
Boolean fixture to support comparison testing of ExtensionDtype array
and numpy array.

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""
return request.param
85 changes: 85 additions & 0 deletions tests/compliance/date/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@
from db_dtypes import DateArray, DateDtype


@pytest.fixture(params=["data", "data_missing"])
def all_data(request, data, data_missing):
"""Parametrized fixture giving 'data' and 'data_missing'"""
if request.param == "data":
return data
elif request.param == "data_missing":
return data_missing


@pytest.fixture
def data():
return DateArray(
Expand All @@ -32,6 +41,52 @@ def data():
)


@pytest.fixture
def data_for_grouping():
"""
Data for factorization, grouping, and unique tests.

Expected to be like [B, B, NA, NA, A, A, B, C]

Where A < B < C and NA is missing

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""
return DateArray(
[
datetime.date(1980, 1, 27),
datetime.date(1980, 1, 27),
None,
None,
datetime.date(1969, 12, 30),
datetime.date(1969, 12, 30),
datetime.date(1980, 1, 27),
datetime.date(2022, 3, 18),
]
)


@pytest.fixture
def data_for_sorting():
"""
Length-3 array with a known sort order.

This should be three items [B, C, A] with
A < B < C

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""
return DateArray(
[
datetime.date(1980, 1, 27),
datetime.date(2022, 3, 18),
datetime.date(1969, 12, 30),
]
)


@pytest.fixture
def data_missing():
"""Length-2 array with [NA, Valid]
Expand All @@ -42,6 +97,36 @@ def data_missing():
return DateArray([None, datetime.date(2022, 1, 27)])


@pytest.fixture
def data_missing_for_sorting():
"""
Length-3 array with a known sort order.

This should be three items [B, NA, A] with
A < B and NA missing.

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""
return DateArray([datetime.date(1980, 1, 27), None, datetime.date(1969, 12, 30)])


@pytest.fixture
def data_repeated(data):
"""
Generate many datasets.

See:
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/conftest.py
"""

def gen(count):
for _ in range(count):
yield data

return gen


@pytest.fixture
def dtype():
return DateDtype()
29 changes: 29 additions & 0 deletions tests/compliance/date/test_date_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/test_period.py
"""

import pandas
from pandas.tests.extension import base
import pytest

import db_dtypes


class TestDtype(base.BaseDtypeTests):
Expand All @@ -45,3 +49,28 @@ class TestGetitem(base.BaseGetitemTests):

class TestMissing(base.BaseMissingTests):
pass


# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/78): Add
# compliance tests for reduction operations.


class TestMethods(base.BaseMethodsTests):
def test_combine_add(self):
pytest.skip("Cannot add dates.")

@pytest.mark.parametrize("dropna", [True, False])
def test_value_counts(self, all_data, dropna):
all_data = all_data[:10]
if dropna:
# Overridden from
# https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/base/methods.py
# to avoid difference in dtypes.
other = db_dtypes.DateArray(all_data[~all_data.isna()])
else:
other = all_data

result = pandas.Series(all_data).value_counts(dropna=dropna).sort_index()
expected = pandas.Series(other).value_counts(dropna=dropna).sort_index()

self.assert_series_equal(result, expected)