Skip to content

Commit 5a2b113

Browse files
committed
Assorted cleanups
1 parent 25087f7 commit 5a2b113

File tree

4 files changed

+9
-78
lines changed

4 files changed

+9
-78
lines changed

pandas/core/arrays/list_.py

+6-20
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class ListDtype(ArrowDtype):
7474
An ExtensionDtype suitable for storing homogeneous lists of data.
7575
"""
7676

77-
_is_immutable = True # TODO(wayd): should we allow mutability?
77+
_is_immutable = True
7878

7979
def __init__(self, value_dtype: pa.DataType) -> None:
8080
super().__init__(pa.large_list(value_dtype))
@@ -100,10 +100,7 @@ def name(self) -> str: # type: ignore[override]
100100
"""
101101
A string identifying the data type.
102102
"""
103-
# TODO: reshaping tests require the name list to match the large_list
104-
# implementation; assumedly there are some astype(str(dtype)) casts
105-
# going on. Should fix so this can just be "list[...]" for end user
106-
return f"large_list[{self.pyarrow_dtype.value_type!s}]"
103+
return f"list[{self.pyarrow_dtype.value_type!s}]"
107104

108105
@property
109106
def kind(self) -> str:
@@ -124,7 +121,6 @@ def construct_array_type(cls) -> type_t[ListArray]:
124121
return ListArray
125122

126123
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
127-
# TODO(wayd): should we implemented value type support?
128124
for dtype in dtypes:
129125
if (
130126
isinstance(dtype, ListDtype)
@@ -153,8 +149,7 @@ def __init__(
153149
if isinstance(values, (pa.Array, pa.ChunkedArray)):
154150
parent_type = values.type
155151
if not isinstance(parent_type, (pa.ListType, pa.LargeListType)):
156-
# Ideally could cast here, but I don't think pyarrow implements
157-
# many list casts
152+
# TODO: maybe implement native casts in pyarrow
158153
new_values = [
159154
[x.as_py()] if x.is_valid else None for x in values
160155
]
@@ -164,12 +159,10 @@ def __init__(
164159
else:
165160
value_type = pa.array(values).type.value_type
166161

167-
# Internally always use large_string instead of string
168162
if value_type == pa.string():
169163
value_type = pa.large_string()
170164

171165
if not isinstance(values, pa.ChunkedArray):
172-
# To support NA, we need to create an Array first :-(
173166
arr = pa.array(values, type=pa.large_list(value_type), from_pandas=True)
174167
self._pa_array = pa.chunked_array(arr, type=pa.large_list(value_type))
175168
else:
@@ -200,8 +193,6 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
200193
values = pa.array(scalars, from_pandas=True)
201194

202195
if values.type == "null" and dtype is not None:
203-
# TODO: the sequencing here seems wrong; just making the tests pass for now
204-
# but this needs a comprehensive review
205196
pa_type = string_to_pyarrow_type(str(dtype))
206197
values = pa.array(values, type=pa_type)
207198

@@ -232,8 +223,6 @@ def _box_pa(
232223
return cls._box_pa_array(value, pa_type)
233224

234225
def __getitem__(self, item):
235-
# PyArrow does not support NumPy's selection with an equal length
236-
# mask, so let's convert those to integral positions if needed
237226
if isinstance(item, (np.ndarray, ExtensionArray)):
238227
if is_bool_dtype(item.dtype):
239228
mask_len = len(item)
@@ -305,9 +294,6 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
305294
ExtensionDtype.empty
306295
ExtensionDtype.empty is the 'official' public version of this API.
307296
"""
308-
# Implementer note: while ExtensionDtype.empty is the public way to
309-
# call this method, it is still required to implement this `_empty`
310-
# method as well (it is called internally in pandas)
311297
if isinstance(shape, tuple):
312298
if len(shape) > 1:
313299
raise ValueError("ListArray may only be 1-D")
@@ -334,9 +320,9 @@ def __eq__(self, other):
334320
elif isinstance(other, (pa.ListScalar, pa.LargeListScalar)):
335321
from pandas.arrays import BooleanArray
336322

337-
# TODO: pyarrow.compute does not implement broadcasting equality
338-
# for an array of lists to a listscalar
339-
# TODO: pyarrow doesn't compare missing values as missing???
323+
# TODO: pyarrow.compute does not implement equal for lists
324+
# https://github.com/apache/arrow/issues/45167
325+
# TODO: pyarrow doesn't compare missing values in Python as missing???
340326
# arr = pa.array([1, 2, None])
341327
# pc.equal(arr, arr[2]) returns all nulls but
342328
# arr[2] == arr[2] returns True

pandas/core/internals/construction.py

-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
import numpy as np
1515
from numpy import ma
16-
import pyarrow as pa
1716

1817
from pandas._config import using_string_dtype
1918

@@ -462,8 +461,6 @@ def treat_as_nested(data, dtype) -> bool:
462461
len(data) > 0
463462
and is_list_like(data[0])
464463
and getattr(data[0], "ndim", 1) == 1
465-
# TODO(wayd): hack so pyarrow list elements don't expand
466-
and not isinstance(data[0], pa.ListScalar)
467464
and not isinstance(dtype, ListDtype)
468465
and not (isinstance(data, ExtensionArray) and data.ndim == 2)
469466
)

pandas/core/internals/managers.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1976,10 +1976,7 @@ def from_blocks(
19761976

19771977
@classmethod
19781978
def from_array(
1979-
cls,
1980-
array: ArrayLike,
1981-
index: Index,
1982-
refs: BlockValuesRefs | None = None,
1979+
cls, array: ArrayLike, index: Index, refs: BlockValuesRefs | None = None
19831980
) -> SingleBlockManager:
19841981
"""
19851982
Constructor for if we have an array that is not yet a Block.

pandas/tests/extension/list/test_list.py

+2-51
Original file line numberDiff line numberDiff line change
@@ -10,36 +10,7 @@
1010
ListArray,
1111
ListDtype,
1212
)
13-
from pandas.tests.extension.base.accumulate import BaseAccumulateTests
14-
from pandas.tests.extension.base.casting import BaseCastingTests
15-
from pandas.tests.extension.base.constructors import BaseConstructorsTests
16-
from pandas.tests.extension.base.dim2 import ( # noqa: F401
17-
Dim2CompatTests,
18-
NDArrayBacked2DTests,
19-
)
20-
from pandas.tests.extension.base.dtype import BaseDtypeTests
21-
from pandas.tests.extension.base.getitem import BaseGetitemTests
22-
from pandas.tests.extension.base.groupby import BaseGroupbyTests
23-
from pandas.tests.extension.base.index import BaseIndexTests
24-
from pandas.tests.extension.base.interface import BaseInterfaceTests
25-
from pandas.tests.extension.base.io import BaseParsingTests
26-
from pandas.tests.extension.base.methods import BaseMethodsTests
27-
from pandas.tests.extension.base.missing import BaseMissingTests
28-
from pandas.tests.extension.base.ops import ( # noqa: F401
29-
BaseArithmeticOpsTests,
30-
BaseComparisonOpsTests,
31-
BaseOpsUtil,
32-
BaseUnaryOpsTests,
33-
)
34-
from pandas.tests.extension.base.printing import BasePrintingTests
35-
from pandas.tests.extension.base.reduce import BaseReduceTests
36-
from pandas.tests.extension.base.reshaping import BaseReshapingTests
37-
from pandas.tests.extension.base.setitem import BaseSetitemTests
38-
39-
# TODO(wayd): This is copied from string tests - is it required here?
40-
# @pytest.fixture(params=[True, False])
41-
# def chunked(request):
42-
# return request.param
13+
from pandas.tests.extension import base
4314

4415

4516
@pytest.fixture
@@ -93,27 +64,7 @@ def data_for_grouping(dtype):
9364
return ListArray([B, B, NA, NA, A, A, B, C])
9465

9566

96-
class TestListArray(
97-
BaseAccumulateTests,
98-
BaseCastingTests,
99-
BaseConstructorsTests,
100-
BaseDtypeTests,
101-
BaseGetitemTests,
102-
BaseGroupbyTests,
103-
BaseIndexTests,
104-
BaseInterfaceTests,
105-
BaseParsingTests,
106-
BaseMethodsTests,
107-
BaseMissingTests,
108-
BaseArithmeticOpsTests,
109-
BaseComparisonOpsTests,
110-
BaseUnaryOpsTests,
111-
BasePrintingTests,
112-
BaseReduceTests,
113-
BaseReshapingTests,
114-
BaseSetitemTests,
115-
Dim2CompatTests,
116-
):
67+
class TestListArray(base.ExtensionTests):
11768
def test_fillna_no_op_returns_copy(self, data):
11869
# TODO(wayd): This test is copied from test_arrow.py
11970
# It appears the TestArrowArray class has different expectations around

0 commit comments

Comments
 (0)