Skip to content

Commit 0c64095

Browse files
Merge changes from PR#53089
Rewrite to use ideas from 53089 to handle scalar objects that are different in size than the objects in the EA. There are some parse/eval failures that I'm pretty sure have nothing to do with these changes, but due instead to a moment in time when I merged with the HEAD of the development branch. Signed-off-by: Michael Tiemann <[email protected]>
1 parent aff01ac commit 0c64095

19 files changed

+162
-104
lines changed

pandas/core/arrays/_mixins.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -517,9 +517,7 @@ def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
517517
# numpy-like methods
518518

519519
@classmethod
520-
def _empty(
521-
cls, shape: Shape, dtype: ExtensionDtype, fill_value: object = None
522-
) -> Self:
520+
def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
523521
"""
524522
Analogous to np.empty(shape, dtype=dtype)
525523

pandas/core/arrays/arrow/array.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -251,14 +251,7 @@ def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
251251
self._dtype = ArrowDtype(self._pa_array.type)
252252

253253
@classmethod
254-
def _from_sequence(
255-
cls,
256-
scalars,
257-
*,
258-
dtype: Dtype | None = None,
259-
fill_value: object | None = None,
260-
copy: bool = False,
261-
):
254+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
262255
"""
263256
Construct a new ExtensionArray from a sequence of scalars.
264257
"""

pandas/core/arrays/base.py

+33-10
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
AstypeArg,
8686
AxisInt,
8787
Dtype,
88+
DtypeObj,
8889
FillnaOptions,
8990
InterpolateOptions,
9091
NumpySorter,
@@ -262,14 +263,7 @@ class ExtensionArray:
262263
# ------------------------------------------------------------------------
263264

264265
@classmethod
265-
def _from_sequence(
266-
cls,
267-
scalars,
268-
*,
269-
dtype: Dtype | None = None,
270-
fill_value: object = None,
271-
copy: bool = False,
272-
):
266+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
273267
"""
274268
Construct a new ExtensionArray from a sequence of scalars.
275269
@@ -297,6 +291,35 @@ def _from_sequence(
297291
"""
298292
raise AbstractMethodError(cls)
299293

294+
@classmethod
295+
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
296+
"""
297+
Strict analogue to _from_sequence, allowing only sequences of scalars
298+
that should be specifically inferred to the given dtype.
299+
Parameters
300+
----------
301+
scalars : sequence
302+
dtype : ExtensionDtype
303+
Raises
304+
------
305+
TypeError or ValueError
306+
Notes
307+
-----
308+
This is called in a try/except block when casting the result of a
309+
pointwise operation.
310+
"""
311+
try:
312+
return cls._from_sequence(scalars, dtype=dtype, copy=False)
313+
except (ValueError, TypeError):
314+
raise
315+
except Exception:
316+
warnings.warn(
317+
"_from_scalars should only raise ValueError or TypeError. "
318+
"Consider overriding _from_scalars where appropriate.",
319+
stacklevel=find_stack_level(),
320+
)
321+
raise
322+
300323
@classmethod
301324
def _from_sequence_of_strings(
302325
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
@@ -2092,7 +2115,7 @@ def _rank(
20922115
)
20932116

20942117
@classmethod
2095-
def _empty(cls, shape: Shape, dtype: ExtensionDtype, fill_value: object = None):
2118+
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
20962119
"""
20972120
Create an ExtensionArray with the given shape and dtype.
20982121
@@ -2104,7 +2127,7 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype, fill_value: object = None):
21042127
# Implementer note: while ExtensionDtype.empty is the public way to
21052128
# call this method, it is still required to implement this `_empty`
21062129
# method as well (it is called internally in pandas)
2107-
obj = cls._from_sequence([], dtype=dtype, fill_value=fill_value)
2130+
obj = cls._from_sequence([], dtype=dtype)
21082131

21092132
taker = np.broadcast_to(np.intp(-1), shape)
21102133
result = obj.take(taker, allow_fill=True)

pandas/core/arrays/boolean.py

+32
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
TYPE_CHECKING,
66
cast,
77
)
8+
import warnings
89

910
import numpy as np
1011

1112
from pandas._libs import (
1213
lib,
1314
missing as libmissing,
1415
)
16+
from pandas.util._exceptions import find_stack_level
1517

1618
from pandas.core.dtypes.common import is_list_like
1719
from pandas.core.dtypes.dtypes import register_extension_dtype
@@ -318,6 +320,36 @@ def __init__(
318320
def dtype(self) -> BooleanDtype:
319321
return self._dtype
320322

323+
@classmethod
324+
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
325+
"""
326+
Strict analogue to _from_sequence, allowing only sequences of scalars
327+
that should be specifically inferred to the given dtype.
328+
Parameters
329+
----------
330+
scalars : sequence
331+
dtype : ExtensionDtype
332+
Raises
333+
------
334+
TypeError or ValueError
335+
Notes
336+
-----
337+
This is called in a try/except block when casting the result of a
338+
pointwise operation.
339+
"""
340+
try:
341+
# DtypeObj is bool, which conflicts with test assertion dtype=='boolean'
342+
return cls._from_sequence(scalars, dtype=BooleanDtype(), copy=False)
343+
except (ValueError, TypeError):
344+
raise
345+
except Exception:
346+
warnings.warn(
347+
"_from_scalars should only raise ValueError or TypeError. "
348+
"Consider overriding _from_scalars where appropriate.",
349+
stacklevel=find_stack_level(),
350+
)
351+
raise
352+
321353
@classmethod
322354
def _from_sequence_of_strings(
323355
cls,

pandas/core/arrays/categorical.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
AstypeArg,
102102
AxisInt,
103103
Dtype,
104+
DtypeObj,
104105
NpDtype,
105106
Ordered,
106107
Self,
@@ -505,15 +506,26 @@ def _internal_fill_value(self) -> int:
505506

506507
@classmethod
507508
def _from_sequence(
508-
cls,
509-
scalars,
510-
*,
511-
dtype: Dtype | None = None,
512-
fill_value: object = None,
513-
copy: bool = False,
509+
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
514510
) -> Self:
515511
return cls(scalars, dtype=dtype, copy=copy)
516512

513+
@classmethod
514+
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
515+
if dtype is None:
516+
# The _from_scalars strictness doesn't make much sense in this case.
517+
raise NotImplementedError
518+
519+
res = cls._from_sequence(scalars, dtype=dtype)
520+
521+
# if there are any non-category elements in scalars, these will be
522+
# converted to NAs in res.
523+
mask = isna(scalars)
524+
if not (mask == res.isna()).all():
525+
# Some non-category element in scalars got converted to NA in res.
526+
raise ValueError
527+
return res
528+
517529
@overload
518530
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
519531
...
@@ -1805,7 +1817,7 @@ def value_counts(self, dropna: bool = True) -> Series:
18051817
# "ExtensionDtype"
18061818
@classmethod
18071819
def _empty( # type: ignore[override]
1808-
cls, shape: Shape, dtype: CategoricalDtype, fill_value: object = None
1820+
cls, shape: Shape, dtype: CategoricalDtype
18091821
) -> Self:
18101822
"""
18111823
Analogous to np.empty(shape, dtype=dtype)

pandas/core/arrays/datetimes.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474

7575
from pandas._typing import (
7676
DateTimeErrorChoices,
77+
DtypeObj,
7778
IntervalClosedType,
7879
Self,
7980
TimeAmbiguous,
@@ -293,11 +294,17 @@ def _simple_new( # type: ignore[override]
293294
return result
294295

295296
@classmethod
296-
def _from_sequence(
297-
cls, scalars, *, dtype=None, fill_value: object = None, copy: bool = False
298-
):
297+
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
299298
return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
300299

300+
@classmethod
301+
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
302+
if lib.infer_dtype(scalars, skipna=True) not in ["datetime", "datetime64"]:
303+
# TODO: require any NAs be valid-for-DTA
304+
# TODO: if dtype is passed, check for tzawareness compat?
305+
raise ValueError
306+
return cls._from_sequence(scalars, dtype=dtype)
307+
301308
@classmethod
302309
def _from_sequence_not_strict(
303310
cls,

pandas/core/arrays/interval.py

-1
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,6 @@ def _from_sequence(
382382
scalars,
383383
*,
384384
dtype: Dtype | None = None,
385-
fill_value: object = None,
386385
copy: bool = False,
387386
) -> Self:
388387
return cls(scalars, dtype=dtype, copy=copy)

pandas/core/arrays/masked.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,13 @@ def __init__(
145145
self._mask = mask
146146

147147
@classmethod
148-
def _from_sequence(
149-
cls, scalars, *, dtype=None, fill_value: object = None, copy: bool = False
150-
) -> Self:
148+
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
151149
values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)
152150
return cls(values, mask)
153151

154152
@classmethod
155153
@doc(ExtensionArray._empty)
156-
def _empty(cls, shape: Shape, dtype: ExtensionDtype, fill_value: object = None):
154+
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
157155
values = np.empty(shape, dtype=dtype.type)
158156
values.fill(cls._internal_fill_value)
159157
mask = np.ones(shape, dtype=bool)

pandas/core/arrays/numpy_.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,7 @@ def __init__(
117117

118118
@classmethod
119119
def _from_sequence(
120-
cls,
121-
scalars,
122-
*,
123-
dtype: Dtype | None = None,
124-
fill_value: object = None,
125-
copy: bool = False,
120+
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
126121
) -> NumpyExtensionArray:
127122
if isinstance(dtype, NumpyEADtype):
128123
dtype = dtype._dtype

pandas/core/arrays/period.py

-1
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,6 @@ def _from_sequence(
276276
scalars,
277277
*,
278278
dtype: Dtype | None = None,
279-
fill_value: object | None = None,
280279
copy: bool = False,
281280
) -> Self:
282281
if dtype is not None:

pandas/core/arrays/sparse/array.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -583,14 +583,7 @@ def __setitem__(self, key, value) -> None:
583583
raise TypeError(msg)
584584

585585
@classmethod
586-
def _from_sequence(
587-
cls,
588-
scalars,
589-
*,
590-
dtype: Dtype | None = None,
591-
fill_value: object | None = None,
592-
copy: bool = False,
593-
):
586+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
594587
return cls(scalars, dtype=dtype)
595588

596589
@classmethod

pandas/core/arrays/string_.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,11 @@
5454
from pandas._typing import (
5555
AxisInt,
5656
Dtype,
57+
DtypeObj,
5758
NumpySorter,
5859
NumpyValueArrayLike,
5960
Scalar,
61+
Self,
6062
npt,
6163
type_t,
6264
)
@@ -228,6 +230,13 @@ def tolist(self):
228230
return [x.tolist() for x in self]
229231
return list(self.to_numpy())
230232

233+
@classmethod
234+
def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
235+
if lib.infer_dtype(scalars, skipna=True) != "string":
236+
# TODO: require any NAs be valid-for-string
237+
raise ValueError
238+
return cls._from_sequence(scalars, dtype=dtype)
239+
231240

232241
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
233242
# incompatible with definition in base class "ExtensionArray"
@@ -340,14 +349,7 @@ def _validate(self):
340349
lib.convert_nans_to_NA(self._ndarray)
341350

342351
@classmethod
343-
def _from_sequence(
344-
cls,
345-
scalars,
346-
*,
347-
dtype: Dtype | None = None,
348-
fill_value: object | None = None,
349-
copy: bool = False,
350-
):
352+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
351353
if dtype and not (isinstance(dtype, str) and dtype == "string"):
352354
dtype = pandas_dtype(dtype)
353355
assert isinstance(dtype, StringDtype) and dtype.storage == "python"
@@ -384,7 +386,7 @@ def _from_sequence_of_strings(
384386
return cls._from_sequence(strings, dtype=dtype, copy=copy)
385387

386388
@classmethod
387-
def _empty(cls, shape, dtype, fill_value=None) -> StringArray:
389+
def _empty(cls, shape, dtype) -> StringArray:
388390
values = np.empty(shape, dtype=object)
389391
values[:] = libmissing.NA
390392
return cls(values).astype(dtype, copy=False)

pandas/core/arrays/string_arrow.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,7 @@ def __len__(self) -> int:
137137
return len(self._pa_array)
138138

139139
@classmethod
140-
def _from_sequence(
141-
cls,
142-
scalars,
143-
dtype: Dtype | None = None,
144-
fill_value: object | None = None,
145-
copy: bool = False,
146-
):
140+
def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
147141
from pandas.core.arrays.masked import BaseMaskedArray
148142

149143
_chk_pyarrow_available()

pandas/core/arrays/timedeltas.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,7 @@ def _simple_new( # type: ignore[override]
229229
return result
230230

231231
@classmethod
232-
def _from_sequence(
233-
cls, data, *, dtype=None, fill_value: object = None, copy: bool = False
234-
) -> Self:
232+
def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
235233
if dtype:
236234
dtype = _validate_td64_dtype(dtype)
237235

0 commit comments

Comments
 (0)