Skip to content

Commit 4a97c2f

Browse files
authored
Merge branch 'main' into pandas.Index
2 parents 6736946 + 78a2ef2 commit 4a97c2f

File tree

22 files changed

+219
-268
lines changed

22 files changed

+219
-268
lines changed

ci/code_checks.sh

-6
Original file line numberDiff line numberDiff line change
@@ -92,22 +92,17 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9292
-i "pandas.Grouper PR02" \
9393
-i "pandas.Index.append PR07,RT03,SA01" \
9494
-i "pandas.Index.difference PR07,RT03,SA01" \
95-
-i "pandas.Index.duplicated RT03" \
9695
-i "pandas.Index.get_indexer PR07,SA01" \
9796
-i "pandas.Index.get_indexer_for PR01,SA01" \
9897
-i "pandas.Index.get_indexer_non_unique PR07,SA01" \
9998
-i "pandas.Index.get_loc PR07,RT03,SA01" \
100-
-i "pandas.Index.identical PR01,SA01" \
10199
-i "pandas.Index.join PR07,RT03,SA01" \
102100
-i "pandas.Index.names GL08" \
103-
-i "pandas.Index.nunique RT03" \
104101
-i "pandas.Index.putmask PR01,RT03" \
105102
-i "pandas.Index.ravel PR01,RT03" \
106103
-i "pandas.Index.slice_indexer PR07,RT03,SA01" \
107104
-i "pandas.Index.str PR01,SA01" \
108-
-i "pandas.Index.symmetric_difference PR07,RT03,SA01" \
109105
-i "pandas.Index.take PR01,PR07" \
110-
-i "pandas.Index.union PR07,RT03,SA01" \
111106
-i "pandas.Index.view GL08" \
112107
-i "pandas.Int16Dtype SA01" \
113108
-i "pandas.Int32Dtype SA01" \
@@ -257,7 +252,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
257252
-i "pandas.Series.mode SA01" \
258253
-i "pandas.Series.mul PR07" \
259254
-i "pandas.Series.ne PR07,SA01" \
260-
-i "pandas.Series.nunique RT03" \
261255
-i "pandas.Series.pad PR01,SA01" \
262256
-i "pandas.Series.plot PR02,SA01" \
263257
-i "pandas.Series.pop RT03,SA01" \

doc/source/whatsnew/v3.0.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ Removal of prior version deprecations/changes
221221
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
222222
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
223223
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
224+
- Changed behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` to always treat integer keys as labels, never as positional, consistent with :class:`DataFrame` behavior (:issue:`50617`)
224225
- Disallow allowing logical operations (``||``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``); wrap the objects in :class:`Series`, :class:`Index`, or ``np.array`` first instead (:issue:`52264`)
225226
- Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`)
226227
- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
@@ -389,6 +390,7 @@ Numeric
389390

390391
Conversion
391392
^^^^^^^^^^
393+
- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`)
392394
- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
393395
- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
394396
- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
@@ -469,6 +471,7 @@ Styler
469471
Other
470472
^^^^^
471473
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
474+
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
472475
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
473476
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
474477
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
@@ -480,6 +483,7 @@ Other
480483
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
481484
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
482485
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
486+
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
483487

484488
.. ***DO NOT USE THIS SECTION***
485489

pandas/core/arrays/arrow/accessors.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ def len(self) -> Series:
110110
from pandas import Series
111111

112112
value_lengths = pc.list_value_length(self._pa_array)
113-
return Series(value_lengths, dtype=ArrowDtype(value_lengths.type))
113+
return Series(
114+
value_lengths, dtype=ArrowDtype(value_lengths.type), index=self._data.index
115+
)
114116

115117
def __getitem__(self, key: int | slice) -> Series:
116118
"""
@@ -149,7 +151,9 @@ def __getitem__(self, key: int | slice) -> Series:
149151
# if key < 0:
150152
# key = pc.add(key, pc.list_value_length(self._pa_array))
151153
element = pc.list_element(self._pa_array, key)
152-
return Series(element, dtype=ArrowDtype(element.type))
154+
return Series(
155+
element, dtype=ArrowDtype(element.type), index=self._data.index
156+
)
153157
elif isinstance(key, slice):
154158
if pa_version_under11p0:
155159
raise NotImplementedError(
@@ -167,7 +171,7 @@ def __getitem__(self, key: int | slice) -> Series:
167171
if step is None:
168172
step = 1
169173
sliced = pc.list_slice(self._pa_array, start, stop, step)
170-
return Series(sliced, dtype=ArrowDtype(sliced.type))
174+
return Series(sliced, dtype=ArrowDtype(sliced.type), index=self._data.index)
171175
else:
172176
raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
173177

@@ -195,15 +199,17 @@ def flatten(self) -> Series:
195199
... )
196200
>>> s.list.flatten()
197201
0 1
198-
1 2
199-
2 3
200-
3 3
202+
0 2
203+
0 3
204+
1 3
201205
dtype: int64[pyarrow]
202206
"""
203207
from pandas import Series
204208

205-
flattened = pc.list_flatten(self._pa_array)
206-
return Series(flattened, dtype=ArrowDtype(flattened.type))
209+
counts = pa.compute.list_value_length(self._pa_array)
210+
flattened = pa.compute.list_flatten(self._pa_array)
211+
index = self._data.index.repeat(counts.fill_null(pa.scalar(0, counts.type)))
212+
return Series(flattened, dtype=ArrowDtype(flattened.type), index=index)
207213

208214

209215
class StructAccessor(ArrowAccessor):

pandas/core/arrays/arrow/array.py

+2
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ def _box_pa_array(
525525
if pa_type is not None and pa_array.type != pa_type:
526526
if pa.types.is_dictionary(pa_type):
527527
pa_array = pa_array.dictionary_encode()
528+
if pa_array.type != pa_type:
529+
pa_array = pa_array.cast(pa_type)
528530
else:
529531
try:
530532
pa_array = pa_array.cast(pa_type)

pandas/core/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,7 @@ def nunique(self, dropna: bool = True) -> int:
10621062
Returns
10631063
-------
10641064
int
1065+
A integer indicating the number of unique elements in the object.
10651066
10661067
See Also
10671068
--------

pandas/core/computation/align.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -160,19 +160,24 @@ def align_terms(terms):
160160
# can't iterate so it must just be a constant or single variable
161161
if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
162162
typ = type(terms.value)
163-
return typ, _zip_axes_from_type(typ, terms.value.axes)
164-
return np.result_type(terms.type), None
163+
name = terms.value.name if isinstance(terms.value, ABCSeries) else None
164+
return typ, _zip_axes_from_type(typ, terms.value.axes), name
165+
return np.result_type(terms.type), None, None
165166

166167
# if all resolved variables are numeric scalars
167168
if all(term.is_scalar for term in terms):
168-
return result_type_many(*(term.value for term in terms)).type, None
169+
return result_type_many(*(term.value for term in terms)).type, None, None
170+
171+
# if all input series have a common name, propagate it to the returned series
172+
names = {term.value.name for term in terms if isinstance(term.value, ABCSeries)}
173+
name = names.pop() if len(names) == 1 else None
169174

170175
# perform the main alignment
171176
typ, axes = _align_core(terms)
172-
return typ, axes
177+
return typ, axes, name
173178

174179

175-
def reconstruct_object(typ, obj, axes, dtype):
180+
def reconstruct_object(typ, obj, axes, dtype, name):
176181
"""
177182
Reconstruct an object given its type, raw value, and possibly empty
178183
(None) axes.
@@ -200,7 +205,9 @@ def reconstruct_object(typ, obj, axes, dtype):
200205
res_t = np.result_type(obj.dtype, dtype)
201206

202207
if not isinstance(typ, partial) and issubclass(typ, PandasObject):
203-
return typ(obj, dtype=res_t, **axes)
208+
if name is None:
209+
return typ(obj, dtype=res_t, **axes)
210+
return typ(obj, dtype=res_t, name=name, **axes)
204211

205212
# special case for pathological things like ~True/~False
206213
if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:

pandas/core/computation/engines.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def __init__(self, expr) -> None:
5454
self.expr = expr
5555
self.aligned_axes = None
5656
self.result_type = None
57+
self.result_name = None
5758

5859
def convert(self) -> str:
5960
"""
@@ -76,12 +77,18 @@ def evaluate(self) -> object:
7677
The result of the passed expression.
7778
"""
7879
if not self._is_aligned:
79-
self.result_type, self.aligned_axes = align_terms(self.expr.terms)
80+
self.result_type, self.aligned_axes, self.result_name = align_terms(
81+
self.expr.terms
82+
)
8083

8184
# make sure no names in resolvers and locals/globals clash
8285
res = self._evaluate()
8386
return reconstruct_object(
84-
self.result_type, res, self.aligned_axes, self.expr.terms.return_type
87+
self.result_type,
88+
res,
89+
self.aligned_axes,
90+
self.expr.terms.return_type,
91+
self.result_name,
8592
)
8693

8794
@property

pandas/core/indexes/base.py

+33
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,7 @@ def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
27682768
Returns
27692769
-------
27702770
np.ndarray[bool]
2771+
A numpy array of boolean values indicating duplicate index values.
27712772
27722773
See Also
27732774
--------
@@ -2873,6 +2874,8 @@ def union(self, other, sort=None):
28732874
Parameters
28742875
----------
28752876
other : Index or array-like
2877+
Index or an array-like object containing elements to form the union
2878+
with the original Index.
28762879
sort : bool or None, default None
28772880
Whether to sort the resulting Index.
28782881
@@ -2889,6 +2892,14 @@ def union(self, other, sort=None):
28892892
Returns
28902893
-------
28912894
Index
2895+
Returns a new Index object with all unique elements from both the original
2896+
Index and the `other` Index.
2897+
2898+
See Also
2899+
--------
2900+
Index.unique : Return unique values in the index.
2901+
Index.intersection : Form the intersection of two Index objects.
2902+
Index.difference : Return a new Index with elements of index not in `other`.
28922903
28932904
Examples
28942905
--------
@@ -3313,7 +3324,10 @@ def symmetric_difference(self, other, result_name=None, sort=None):
33133324
Parameters
33143325
----------
33153326
other : Index or array-like
3327+
Index or an array-like object with elements to compute the symmetric
3328+
difference with the original Index.
33163329
result_name : str
3330+
A string representing the name of the resulting Index, if desired.
33173331
sort : bool or None, default None
33183332
Whether to sort the resulting index. By default, the
33193333
values are attempted to be sorted, but any TypeError from
@@ -3327,6 +3341,14 @@ def symmetric_difference(self, other, result_name=None, sort=None):
33273341
Returns
33283342
-------
33293343
Index
3344+
Returns a new Index object containing elements that appear in either the
3345+
original Index or the `other` Index, but not both.
3346+
3347+
See Also
3348+
--------
3349+
Index.difference : Return a new Index with elements of index not in other.
3350+
Index.union : Form the union of two Index objects.
3351+
Index.intersection : Form the intersection of two Index objects.
33303352
33313353
Notes
33323354
-----
@@ -5323,12 +5345,23 @@ def identical(self, other) -> bool:
53235345
"""
53245346
Similar to equals, but checks that object attributes and types are also equal.
53255347
5348+
Parameters
5349+
----------
5350+
other : Index
5351+
The Index object you want to compare with the current Index object.
5352+
53265353
Returns
53275354
-------
53285355
bool
53295356
If two Index objects have equal elements and same type True,
53305357
otherwise False.
53315358
5359+
See Also
5360+
--------
5361+
Index.equals: Determine if two Index object are equal.
5362+
Index.has_duplicates: Check if the Index has duplicate values.
5363+
Index.is_unique: Return if the index has unique values.
5364+
53325365
Examples
53335366
--------
53345367
>>> idx1 = pd.Index(["1", "2", "3"])

0 commit comments

Comments
 (0)