only import when pandas version is higher than 1.5.0

chelsea-lin · chelsea-lin · commit 9e92dc764a65 · 2024-08-02T20:40:23.000Z
diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py
@@ -28,7 +28,6 @@
 import pyarrow.compute
 
 from db_dtypes import core
-from db_dtypes.json import JSONArray, JSONDtype
 from db_dtypes.version import __version__
 
 date_dtype_name = "dbdate"
@@ -44,7 +43,14 @@
 # nanosecond precision when boxing scalars.
 _NP_BOX_DTYPE = "datetime64[us]"
 
-pandas_release = packaging.version.parse(pandas.__version__).release
+
+# To use JSONArray and JSONDtype, you'll need Pandas 1.5.0 or later. With the removal
+# of Python 3.7 compatibility, the minimum Pandas version will be updated to 1.5.0.
+if packaging.version.Version(pandas.__version__) >= packaging.version.Version("1.5.0"):
+    from db_dtypes.json import JSONArray, JSONDtype
+else:
+    JSONArray = None
+    JSONDtype = None
 
 
 @pandas.api.extensions.register_extension_dtype
diff --git a/db_dtypes/json.py b/db_dtypes/json.py
@@ -48,8 +48,13 @@ def na_value(self) -> pd.NA:
 
     @property
     def type(self) -> type[str]:
-        """Return the scalar type for the array, e.g. int."""
-        return dict
+        """
+        Return the scalar type for the array elements.
+        The standard JSON data types can be one of `dict`, `list`, `str`, `int`, `float`,
+        `bool` and `None`. However, this method returns a `str` type to indicate its
+        storage type, because the union of multiple types are not supported well in pandas.
+        """
+        return str
 
     @property
     def _is_numeric(self) -> bool:
diff --git a/tests/compliance/json/test_json_compliance.py b/tests/compliance/json/test_json_compliance.py
@@ -18,12 +18,11 @@
 import numpy as np
 import pandas as pd
 import pandas._testing as tm
-from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
-from pandas.tests.extension import base
+import pandas.tests.extension.base
 import pytest
 
 
-class TestJSONArray(base.ExtensionTests):
+class TestJSONArray(pandas.tests.extension.base.ExtensionTests):
     @pytest.mark.xfail(reason="Unhashable")
     def test_value_counts_with_normalize(self, data):
         super().test_value_counts_with_normalize(data)
@@ -157,9 +156,9 @@ def test_array_interface(self, data):
         result = np.array(data, dtype=object)
         # Use `json.dumps(x)` instead of passing `x` directly to the super method.
         expected = np.array([json.dumps(x) for x in data], dtype=object)
-        if expected.ndim > 1:
-            # nested data, explicitly construct as 1D
-            expected = construct_1d_object_array_from_listlike(list(data))
+        # if expected.ndim > 1:
+        #     # nested data, explicitly construct as 1D
+        #     expected = construct_1d_object_array_from_listlike(list(data))
         tm.assert_numpy_array_equal(result, expected)
 
     @pytest.mark.xfail(reason="Setting a dict as a scalar")
@@ -212,6 +211,16 @@ def test_series_constructor_scalar_with_index(self, data, dtype):
         expected = pd.Series([scalar], index=["foo"], dtype=dtype)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(reason="Unhashable")
+    def test_getitem_scalar(self, data):
+        """
+        `_getitem_` can return any JSON-types objects while `data.dtype.type` returns
+        a string to indicate its storage type.
+        >       assert isinstance(result, data.dtype.type)
+        E       AssertionError
+        """
+        super().test_getitem_scalar()
+
     # Patching `[....] * len()` to base.BaseSetitemTests because pandas' internals
     # has trouble setting sequences of values into scalar positions.