Skip to content

Commit ef378f7

Browse files
committed
Test cleanups
1 parent 66d8a1d commit ef378f7

File tree

3 files changed

+51
-4
lines changed

3 files changed

+51
-4
lines changed

pandas/core/arrays/list_.py

+46-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@
2323
from pandas.core.arrays import ExtensionArray
2424

2525
if TYPE_CHECKING:
26-
from pandas._typing import type_t
26+
from pandas._typing import (
27+
type_t,
28+
Shape,
29+
)
2730

2831
import pyarrow as pa
2932

@@ -82,8 +85,21 @@ def __init__(self, values: pa.Array | pa.ChunkedArray | list | ListArray) -> Non
8285
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
8386
if isinstance(scalars, ListArray):
8487
return cls(scalars)
88+
elif isinstance(scalars, pa.Scalar):
89+
scalars = [scalars]
90+
return cls(scalars)
8591

86-
values = pa.array(scalars, from_pandas=True)
92+
try:
93+
values = pa.array(scalars, from_pandas=True)
94+
except TypeError:
95+
# TypeError: object of type 'NoneType' has no len() if you have
96+
# pa.ListScalar(None). Upstream issue in Arrow - see:
97+
# https://github.com/apache/arrow/issues/40319
98+
for i in range(len(scalars)):
99+
if not scalars[i].is_valid:
100+
scalars[i] = None
101+
102+
values = pa.array(scalars, from_pandas=True)
87103
if values.type == "null":
88104
# TODO(wayd): this is a hack to get the tests to pass, but the overall issue
89105
# is that our extension types don't support parametrization but the pyarrow
@@ -113,8 +129,35 @@ def take(self, indexer, allow_fill=False, fill_value=None):
113129
# TODO: what do we need to do with allow_fill and fill_value here?
114130
return type(self)(self._pa_array.take(indexer))
115131

132+
@classmethod
133+
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
134+
"""
135+
Create an ExtensionArray with the given shape and dtype.
136+
137+
See also
138+
--------
139+
ExtensionDtype.empty
140+
ExtensionDtype.empty is the 'official' public version of this API.
141+
"""
142+
# Implementer note: while ExtensionDtype.empty is the public way to
143+
# call this method, it is still required to implement this `_empty`
144+
# method as well (it is called internally in pandas)
145+
if isinstance(shape, tuple):
146+
if len(shape) > 1:
147+
raise ValueError("ListArray may only be 1-D")
148+
else:
149+
length = shape[0]
150+
else:
151+
length = shape
152+
return cls._from_sequence([None] * length, dtype=pa.list_(pa.null()))
153+
116154
def copy(self):
117-
return type(self)(self._pa_array.take(pa.array(range(len(self._pa_array)))))
155+
mm = pa.default_cpu_memory_manager()
156+
157+
# TODO(wayd): ChunkedArray does not implement copy_to so this
158+
# ends up creating an Array
159+
copied = self._pa_array.combine_chunks().copy_to(mm.device)
160+
return type(self)(copied)
118161

119162
def astype(self, dtype, copy=True):
120163
if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:

pandas/core/internals/construction.py

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import numpy as np
1515
from numpy import ma
16+
import pyarrow as pa
1617

1718
from pandas._config import using_string_dtype
1819

@@ -460,6 +461,8 @@ def treat_as_nested(data) -> bool:
460461
len(data) > 0
461462
and is_list_like(data[0])
462463
and getattr(data[0], "ndim", 1) == 1
464+
# TODO(wayd): hack so pyarrow list elements don't expand
465+
and not isinstance(data[0], pa.ListScalar)
463466
and not (isinstance(data, ExtensionArray) and data.ndim == 2)
464467
)
465468

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
StructAccessor,
112112
)
113113
from pandas.core.arrays.categorical import CategoricalAccessor
114+
from pandas.core.arrays.list_ import ListDtype
114115
from pandas.core.arrays.sparse import SparseAccessor
115116
from pandas.core.arrays.string_ import StringDtype
116117
from pandas.core.construction import (
@@ -494,7 +495,7 @@ def __init__(
494495
if not is_list_like(data):
495496
data = [data]
496497
index = default_index(len(data))
497-
elif is_list_like(data):
498+
elif is_list_like(data) and not isinstance(dtype, ListDtype):
498499
com.require_length_match(data, index)
499500

500501
# create/copy the manager

0 commit comments

Comments
 (0)