From a2b2097702b0574907aad416e207bd7f618592e5 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 5 Nov 2024 23:25:51 +0000 Subject: [PATCH 1/6] fix: support correct numpy construction for dbjson dtype in pandas 1.5 --- db_dtypes/json.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/db_dtypes/json.py b/db_dtypes/json.py index d0bc6ca..e345f07 100644 --- a/db_dtypes/json.py +++ b/db_dtypes/json.py @@ -231,3 +231,16 @@ def _reduce( if name in ["min", "max"]: raise TypeError("JSONArray does not support min/max reducntion.") super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs) + + def __array__(self, dtype=None, copy: bool | None = None) -> np.ndarray: + """Correctly construct numpy arrays when passed to `np.asarray()`.""" + pa_type = self.pa_data.type + data = self + if dtype is None: + empty = pa.array([], type=pa_type).to_numpy(zero_copy_only=False) + dtype = empty.dtype + result = np.empty(len(data), dtype=dtype) + mask = data.isna() + result[mask] = pd.NA + result[~mask] = data[~mask].pa_data.to_numpy() + return result From b029be48f5cab6c886fb17730626b4e0b24f798f Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 5 Nov 2024 23:52:26 +0000 Subject: [PATCH 2/6] add unit tests for pandas 1.5 --- tests/unit/test_json.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py index 365bd8f..bb85181 100644 --- a/tests/unit/test_json.py +++ b/tests/unit/test_json.py @@ -13,7 +13,9 @@ # limitations under the License. +import numpy as np import pandas as pd +import pandas._testing as tm import pytest import db_dtypes @@ -81,3 +83,14 @@ def test_deterministic_json_serialization(): y = {"b": 1, "a": 0} data = db_dtypes.JSONArray._from_sequence([y]) assert data[0] == x + + +def test_to_numpy(): + data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values()) + expected = np.asarray(data) + + result = data.to_numpy() + tm.assert_equal(result, expected) + + result = pd.Series(data).to_numpy() + tm.assert_equal(result, expected) From c5b5377be4dd3e268481000c92cae5b1d8e6d827 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 5 Nov 2024 23:54:20 +0000 Subject: [PATCH 3/6] nit --- db_dtypes/json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db_dtypes/json.py b/db_dtypes/json.py index e345f07..c43ebc2 100644 --- a/db_dtypes/json.py +++ b/db_dtypes/json.py @@ -241,6 +241,6 @@ def __array__(self, dtype=None, copy: bool | None = None) -> np.ndarray: dtype = empty.dtype result = np.empty(len(data), dtype=dtype) mask = data.isna() - result[mask] = pd.NA + result[mask] = self._dtype.na_value result[~mask] = data[~mask].pa_data.to_numpy() return result From ec065bf154988eccc611e1b9757afce55dce8148 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 6 Nov 2024 00:05:10 +0000 Subject: [PATCH 4/6] fixing import error in python 3.7 --- tests/unit/test_json.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py index bb85181..3db1dc5 100644 --- a/tests/unit/test_json.py +++ b/tests/unit/test_json.py @@ -15,7 +15,6 @@ import numpy as np import pandas as pd -import pandas._testing as tm import pytest import db_dtypes @@ -90,7 +89,7 @@ def test_to_numpy(): expected = np.asarray(data) result = data.to_numpy() - tm.assert_equal(result, expected) + pd._testing.assert_equal(result, expected) result = pd.Series(data).to_numpy() - tm.assert_equal(result, expected) + pd._testing.assert_equal(result, expected) From 38dbc89b441c10dae2fa87fcfade7e3e8592feea Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 8 Nov 2024 21:57:14 +0000 Subject: [PATCH 5/6] update unit tests --- tests/unit/test_json.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py index 3db1dc5..112b50c 100644 --- a/tests/unit/test_json.py +++ b/tests/unit/test_json.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import numpy as np import pandas as pd @@ -85,6 +86,12 @@ def test_deterministic_json_serialization(): def test_to_numpy(): + """ + Verifies that JSONArray can be cast to a NumPy array. + This test ensures compatibility with Python 3.9 and replicates the behavior + of the `test_to_numpy` test from `test_json_compliance.py::TestJSONArrayCasting`, + which is run with Python 3.12 environments only. + """ data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values()) expected = np.asarray(data) @@ -93,3 +100,17 @@ def test_to_numpy(): result = pd.Series(data).to_numpy() pd._testing.assert_equal(result, expected) + + +def test_as_numpy_array(): + data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values()) + result = np.asarray(data) + expected = np.asarray( + [ + json.dumps(value, sort_keys=True, separators=(",", ":")) + if value is not None + else pd.NA + for value in JSON_DATA.values() + ] + ) + pd._testing.assert_equal(result, expected) From e5416b162c250216add258221cc70e72b9cd2f31 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 8 Nov 2024 22:04:32 +0000 Subject: [PATCH 6/6] nit --- testing/constraints-3.9.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 4700825..afea9b0 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -1,3 +1,3 @@ -# Make sure we test with pandas 1.5.0. The Python version isn't that relevant. +# Make sure we test with pandas 1.5.3. The Python version isn't that relevant. pandas==1.5.3 numpy==1.24.0 \ No newline at end of file