diff --git a/db_dtypes/json.py b/db_dtypes/json.py index d0bc6ca..c43ebc2 100644 --- a/db_dtypes/json.py +++ b/db_dtypes/json.py @@ -231,3 +231,16 @@ def _reduce( if name in ["min", "max"]: raise TypeError("JSONArray does not support min/max reducntion.") super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs) + + def __array__(self, dtype=None, copy: bool | None = None) -> np.ndarray: + """Correctly construct numpy arrays when passed to `np.asarray()`.""" + pa_type = self.pa_data.type + data = self + if dtype is None: + empty = pa.array([], type=pa_type).to_numpy(zero_copy_only=False) + dtype = empty.dtype + result = np.empty(len(data), dtype=dtype) + mask = data.isna() + result[mask] = self._dtype.na_value + result[~mask] = data[~mask].pa_data.to_numpy() + return result diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 4700825..afea9b0 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -1,3 +1,3 @@ -# Make sure we test with pandas 1.5.0. The Python version isn't that relevant. +# Make sure we test with pandas 1.5.3. The Python version isn't that relevant. pandas==1.5.3 numpy==1.24.0 \ No newline at end of file diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py index 365bd8f..112b50c 100644 --- a/tests/unit/test_json.py +++ b/tests/unit/test_json.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json +import numpy as np import pandas as pd import pytest @@ -81,3 +83,34 @@ def test_deterministic_json_serialization(): y = {"b": 1, "a": 0} data = db_dtypes.JSONArray._from_sequence([y]) assert data[0] == x + + +def test_to_numpy(): + """ + Verifies that JSONArray can be cast to a NumPy array. + This test ensures compatibility with Python 3.9 and replicates the behavior + of the `test_to_numpy` test from `test_json_compliance.py::TestJSONArrayCasting`, + which is run with Python 3.12 environments only. + """ + data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values()) + expected = np.asarray(data) + + result = data.to_numpy() + pd._testing.assert_equal(result, expected) + + result = pd.Series(data).to_numpy() + pd._testing.assert_equal(result, expected) + + +def test_as_numpy_array(): + data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values()) + result = np.asarray(data) + expected = np.asarray( + [ + json.dumps(value, sort_keys=True, separators=(",", ":")) + if value is not None + else pd.NA + for value in JSON_DATA.values() + ] + ) + pd._testing.assert_equal(result, expected)