Skip to content

Commit 7c876ed

Browse files
BUG: checking for value type when parquet is partitioned (#54074)
* checking for value type when parquet is partitioned * whatsnew formatting * added whatsnew * removed aliasing and skip_if_no_arrow decorator * added unit test for valid pyarrow array of dictionary with string value_type * updated whatsnew with suggested changes
1 parent dc830ea commit 7c876ed

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

doc/source/whatsnew/v2.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -585,13 +585,13 @@ Sparse
585585

586586
ExtensionArray
587587
^^^^^^^^^^^^^^
588+
- Bug in :class:`ArrowStringArray` constructor raises ``ValueError`` with dictionary types of strings (:issue:`54074`)
588589
- Bug in :class:`DataFrame` constructor not copying :class:`Series` with extension dtype when given in dict (:issue:`53744`)
589590
- Bug in :class:`~arrays.ArrowExtensionArray` converting pandas non-nanosecond temporal objects from non-zero values to zero values (:issue:`53171`)
590591
- Bug in :meth:`Series.quantile` for pyarrow temporal types raising ArrowInvalid (:issue:`52678`)
591592
- Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`)
592593
- Bug in :meth:`~arrays.ArrowExtensionArray.__iter__` and :meth:`~arrays.ArrowExtensionArray.__getitem__` returning python datetime and timedelta objects for non-nano dtypes (:issue:`53326`)
593594
- Bug where the ``__from_arrow__`` method of masked ExtensionDtypes(e.g. :class:`Float64Dtype`, :class:`BooleanDtype`) would not accept pyarrow arrays of type ``pyarrow.null()`` (:issue:`52223`)
594-
-
595595

596596
Styler
597597
^^^^^^

pandas/core/arrays/string_arrow.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,10 @@ def __init__(self, values) -> None:
118118
super().__init__(values)
119119
self._dtype = StringDtype(storage="pyarrow")
120120

121-
if not pa.types.is_string(self._pa_array.type):
121+
if not pa.types.is_string(self._pa_array.type) and not (
122+
pa.types.is_dictionary(self._pa_array.type)
123+
and pa.types.is_string(self._pa_array.type.value_type)
124+
):
122125
raise ValueError(
123126
"ArrowStringArray requires a PyArrow (chunked) array of string type"
124127
)

pandas/tests/arrays/string_/test_string_arrow.py

+27
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,33 @@ def test_constructor_not_string_type_raises(array, chunked):
6969
ArrowStringArray(arr)
7070

7171

72+
@pytest.mark.parametrize("chunked", [True, False])
73+
def test_constructor_not_string_type_value_dictionary_raises(chunked):
74+
pa = pytest.importorskip("pyarrow")
75+
76+
arr = pa.array([1, 2, 3], pa.dictionary(pa.int32(), pa.int32()))
77+
if chunked:
78+
arr = pa.chunked_array(arr)
79+
80+
msg = re.escape(
81+
"ArrowStringArray requires a PyArrow (chunked) array of string type"
82+
)
83+
with pytest.raises(ValueError, match=msg):
84+
ArrowStringArray(arr)
85+
86+
87+
@pytest.mark.parametrize("chunked", [True, False])
88+
def test_constructor_valid_string_type_value_dictionary(chunked):
89+
pa = pytest.importorskip("pyarrow")
90+
91+
arr = pa.array(["1", "2", "3"], pa.dictionary(pa.int32(), pa.utf8()))
92+
if chunked:
93+
arr = pa.chunked_array(arr)
94+
95+
arr = ArrowStringArray(arr)
96+
assert pa.types.is_string(arr._pa_array.type.value_type)
97+
98+
7299
@skip_if_no_pyarrow
73100
def test_from_sequence_wrong_dtype_raises():
74101
with pd.option_context("string_storage", "python"):

0 commit comments

Comments
 (0)