From 804cee266c3027d1092b4daf661537b8dcfe1f3d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 27 Jan 2022 11:10:57 -0600 Subject: [PATCH 1/8] fix: address failing tests with pandas 1.5.0 test: add a test session with prerelease versions of dependencies --- .github/workflows/compliance.yml | 21 +++++ .github/workflows/unittest-prerelease.yml | 32 ++++++++ db_dtypes/core.py | 6 ++ noxfile.py | 89 +++++++++++++++++++++ owlbot.py | 96 ++++++++++++++++++++++- 5 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/unittest-prerelease.yml diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml index 77e6b05..eca8cc2 100644 --- a/.github/workflows/compliance.yml +++ b/.github/workflows/compliance.yml @@ -25,3 +25,24 @@ jobs: COVERAGE_FILE: .coverage-compliance-${{ matrix.python }} run: | nox -s compliance + compliance-prerelease: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run compliance prerelease tests + env: + COVERAGE_FILE: .coverage-compliance-prerelease-${{ matrix.python }} + run: | + nox -s compliance_prerelease diff --git a/.github/workflows/unittest-prerelease.yml b/.github/workflows/unittest-prerelease.yml new file mode 100644 index 0000000..a11568a --- /dev/null +++ b/.github/workflows/unittest-prerelease.yml @@ -0,0 +1,32 @@ +on: + pull_request: + branches: + - main +name: unittest-prerelease +jobs: + unit: + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.10'] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-prerelease-${{ matrix.python }} + run: | + nox -s unit_prerelease + - name: Upload coverage results + uses: actions/upload-artifact@v3 + with: + name: coverage-artifacts + path: .coverage-${{ matrix.python }} diff --git a/db_dtypes/core.py b/db_dtypes/core.py index b5b0b7a..14d76aa 100644 --- a/db_dtypes/core.py +++ b/db_dtypes/core.py @@ -46,6 +46,12 @@ def construct_from_string(cls, name: str): class BaseDatetimeArray( pandas_backports.OpsMixin, pandas_backports.NDArrayBackedExtensionArray ): + # scalar used to denote NA value inside our self._ndarray, e.g. -1 for + # Categorical, iNaT for Period. Outside of object dtype, self.isna() should + # be exactly locations in self._ndarray with _internal_fill_value. See: + # https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/_mixins.py + _internal_fill_value = numpy.datetime64("NaT") + def __init__(self, values, dtype=None, copy: bool = False): if not ( isinstance(values, numpy.ndarray) and values.dtype == numpy.dtype(" Date: Wed, 16 Mar 2022 11:17:35 -0500 Subject: [PATCH 2/8] fix owlbot config --- owlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlbot.py b/owlbot.py index 58362e7..ec5a5bf 100644 --- a/owlbot.py +++ b/owlbot.py @@ -138,7 +138,7 @@ def prerelease(session, tests_path): deps = [ match.group(1) for match in re.finditer( - r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + r"^\\s*(\\S+)(?===\\S+)", constraints_text, flags=re.MULTILINE ) ] From 6db3c4fbe7332df8e37dd9018f6e48f6e2926d5d Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 16 Mar 2022 16:37:00 +0000 Subject: [PATCH 3/8] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/noxfile.py b/noxfile.py index 92c2b40..e3f4d5c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,7 +38,9 @@ nox.options.sessions = [ "lint", "unit", + "unit_prerelease", "compliance", + "compliance_prerelease", "cover", "lint_setup_py", "blacken", From 10c66217c0494f6041b1eabab3d3a8e1b42e1084 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 16 Mar 2022 16:37:44 +0000 Subject: [PATCH 4/8] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/noxfile.py b/noxfile.py index 92c2b40..e3f4d5c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,7 +38,9 @@ nox.options.sessions = [ "lint", "unit", + "unit_prerelease", "compliance", + "compliance_prerelease", "cover", "lint_setup_py", "blacken", From d2e69312509123e2e50f8f83ecdb052b9ebf0d31 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Mar 2022 16:33:38 -0500 Subject: [PATCH 5/8] document why microsecond precision is used --- db_dtypes/__init__.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index a222e6d..7b2c836 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -34,6 +34,12 @@ time_dtype_name = "dbtime" _EPOCH = datetime.datetime(1970, 1, 1) _NPEPOCH = numpy.datetime64(_EPOCH) +_NP_DTYPE = "datetime64[ns]" + +# Use microseconds for conversion datetime.datetime. +# TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/63): Keep +# nanosecond precision when boxing scalars. +_NP_BOX_DTYPE = "datetime64[us]" pandas_release = packaging.version.parse(pandas.__version__).release @@ -149,12 +155,12 @@ def _box_func(self, x): return pandas.NaT try: - return x.astype(" Date: Wed, 16 Mar 2022 16:35:47 -0500 Subject: [PATCH 6/8] use correct units --- db_dtypes/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 7b2c836..9fda9a6 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -157,7 +157,9 @@ def _box_func(self, x): try: return x.astype(_NP_BOX_DTYPE).item().time() except AttributeError: - x = numpy.datetime64(x, _NP_BOX_DTYPE) + x = numpy.datetime64( + x, "ns" + ) # Integers are stored with nanosecond precision. return x.astype(_NP_BOX_DTYPE).item().time() __return_deltas = {"timedelta", "timedelta64", "timedelta64[ns]", " Date: Wed, 16 Mar 2022 16:47:19 -0500 Subject: [PATCH 7/8] add box_func tests --- tests/unit/test_date.py | 24 ++++++++++++++++++++++++ tests/unit/test_time.py | 26 ++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tests/unit/test_date.py b/tests/unit/test_date.py index bce2dc1..79c97ac 100644 --- a/tests/unit/test_date.py +++ b/tests/unit/test_date.py @@ -15,6 +15,7 @@ import datetime import operator +import numpy import pandas import pandas.testing import pytest @@ -23,6 +24,29 @@ from db_dtypes import pandas_backports +def test_box_func(): + input_array = db_dtypes.DateArray([]) + input_datetime = datetime.datetime(2022, 3, 16) + input_np = numpy.datetime64(input_datetime) + + boxed_value = input_array._box_func(input_np) + assert boxed_value.year == 2022 + assert boxed_value.month == 3 + assert boxed_value.day == 16 + + input_delta = input_datetime - datetime.datetime(1970, 1, 1) + input_nanoseconds = ( + 1_000 * input_delta.microseconds + + 1_000_000_000 * input_delta.seconds + + 1_000_000_000 * 60 * 60 * 24 * input_delta.days + ) + + boxed_value = input_array._box_func(input_nanoseconds) + assert boxed_value.year == 2022 + assert boxed_value.month == 3 + assert boxed_value.day == 16 + + def test_construct_from_string_with_nonstring(): with pytest.raises(TypeError): db_dtypes.DateDtype.construct_from_string(object()) diff --git a/tests/unit/test_time.py b/tests/unit/test_time.py index 8ecb996..db533f5 100644 --- a/tests/unit/test_time.py +++ b/tests/unit/test_time.py @@ -14,6 +14,7 @@ import datetime +import numpy import pandas import pytest @@ -22,6 +23,31 @@ from db_dtypes import pandas_backports +def test_box_func(): + input_array = db_dtypes.TimeArray([]) + input_datetime = datetime.datetime(1970, 1, 1, 1, 2, 3, 456789) + input_np = numpy.datetime64(input_datetime) + + boxed_value = input_array._box_func(input_np) + assert boxed_value.hour == 1 + assert boxed_value.minute == 2 + assert boxed_value.second == 3 + assert boxed_value.microsecond == 456789 + + input_delta = input_datetime - datetime.datetime(1970, 1, 1) + input_nanoseconds = ( + 1_000 * input_delta.microseconds + + 1_000_000_000 * input_delta.seconds + + 1_000_000_000 * 60 * 60 * 24 * input_delta.days + ) + + boxed_value = input_array._box_func(input_nanoseconds) + assert boxed_value.hour == 1 + assert boxed_value.minute == 2 + assert boxed_value.second == 3 + assert boxed_value.microsecond == 456789 + + @pytest.mark.parametrize( "value, expected", [ From 1f17580c530d634e83d570c82a54a1cc5af0dfb6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 16 Mar 2022 16:51:07 -0500 Subject: [PATCH 8/8] typo --- db_dtypes/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index 9fda9a6..d8e2ae5 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -36,7 +36,9 @@ _NPEPOCH = numpy.datetime64(_EPOCH) _NP_DTYPE = "datetime64[ns]" -# Use microseconds for conversion datetime.datetime. +# Numpy converts datetime64 scalars to datetime.datetime only if microsecond or +# smaller precision is used. +# # TODO(https://github.com/googleapis/python-db-dtypes-pandas/issues/63): Keep # nanosecond precision when boxing scalars. _NP_BOX_DTYPE = "datetime64[us]"