Skip to content

Commit 786e67b

Browse files
committed
DOC: resolve merge conflict
2 parents 6736946 + 086b047 commit 786e67b

31 files changed

+541
-375
lines changed

ci/code_checks.sh

-14
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7070
--format=actions \
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
73-
-i "pandas.DataFrame.__dataframe__ SA01" \
74-
-i "pandas.DataFrame.at_time PR01" \
75-
-i "pandas.DataFrame.kurt RT03,SA01" \
76-
-i "pandas.DataFrame.kurtosis RT03,SA01" \
7773
-i "pandas.DataFrame.max RT03" \
7874
-i "pandas.DataFrame.mean RT03,SA01" \
7975
-i "pandas.DataFrame.median RT03,SA01" \
@@ -92,23 +88,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9288
-i "pandas.Grouper PR02" \
9389
-i "pandas.Index.append PR07,RT03,SA01" \
9490
-i "pandas.Index.difference PR07,RT03,SA01" \
95-
-i "pandas.Index.duplicated RT03" \
9691
-i "pandas.Index.get_indexer PR07,SA01" \
9792
-i "pandas.Index.get_indexer_for PR01,SA01" \
9893
-i "pandas.Index.get_indexer_non_unique PR07,SA01" \
9994
-i "pandas.Index.get_loc PR07,RT03,SA01" \
100-
-i "pandas.Index.identical PR01,SA01" \
10195
-i "pandas.Index.join PR07,RT03,SA01" \
10296
-i "pandas.Index.names GL08" \
103-
-i "pandas.Index.nunique RT03" \
10497
-i "pandas.Index.putmask PR01,RT03" \
10598
-i "pandas.Index.ravel PR01,RT03" \
106-
-i "pandas.Index.slice_indexer PR07,RT03,SA01" \
10799
-i "pandas.Index.str PR01,SA01" \
108-
-i "pandas.Index.symmetric_difference PR07,RT03,SA01" \
109-
-i "pandas.Index.take PR01,PR07" \
110-
-i "pandas.Index.union PR07,RT03,SA01" \
111-
-i "pandas.Index.view GL08" \
112100
-i "pandas.Int16Dtype SA01" \
113101
-i "pandas.Int32Dtype SA01" \
114102
-i "pandas.Int64Dtype SA01" \
@@ -197,7 +185,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
197185
-i "pandas.Series SA01" \
198186
-i "pandas.Series.__iter__ RT03,SA01" \
199187
-i "pandas.Series.add PR07" \
200-
-i "pandas.Series.at_time PR01" \
201188
-i "pandas.Series.backfill PR01,SA01" \
202189
-i "pandas.Series.case_when RT03" \
203190
-i "pandas.Series.cat PR07,SA01" \
@@ -257,7 +244,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
257244
-i "pandas.Series.mode SA01" \
258245
-i "pandas.Series.mul PR07" \
259246
-i "pandas.Series.ne PR07,SA01" \
260-
-i "pandas.Series.nunique RT03" \
261247
-i "pandas.Series.pad PR01,SA01" \
262248
-i "pandas.Series.plot PR02,SA01" \
263249
-i "pandas.Series.pop RT03,SA01" \

doc/source/whatsnew/v3.0.0.rst

+6
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Other enhancements
3939
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
4040
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
4141
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
42+
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
4243
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
4344
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4445
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
@@ -221,6 +222,7 @@ Removal of prior version deprecations/changes
221222
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
222223
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
223224
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
225+
- Changed behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` to always treat integer keys as labels, never as positional, consistent with :class:`DataFrame` behavior (:issue:`50617`)
224226
- Disallow allowing logical operations (``||``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``); wrap the objects in :class:`Series`, :class:`Index`, or ``np.array`` first instead (:issue:`52264`)
225227
- Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`)
226228
- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
@@ -334,6 +336,7 @@ Performance improvements
334336
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
335337
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
336338
- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`)
339+
- Performance improvement in :meth:`MultiIndex._engine` to use smaller dtypes if possible (:issue:`58411`)
337340
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
338341
- Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`)
339342
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
@@ -389,6 +392,7 @@ Numeric
389392

390393
Conversion
391394
^^^^^^^^^^
395+
- Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`)
392396
- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
393397
- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
394398
- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
@@ -469,6 +473,7 @@ Styler
469473
Other
470474
^^^^^
471475
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
476+
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
472477
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
473478
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
474479
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
@@ -480,6 +485,7 @@ Other
480485
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
481486
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
482487
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
488+
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
483489

484490
.. ***DO NOT USE THIS SECTION***
485491

pandas/_libs/index.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,13 @@ class MaskedBoolEngine(MaskedUInt8Engine): ...
7474

7575
class BaseMultiIndexCodesEngine:
7676
levels: list[np.ndarray]
77-
offsets: np.ndarray # ndarray[uint64_t, ndim=1]
77+
offsets: np.ndarray # np.ndarray[..., ndim=1]
7878

7979
def __init__(
8080
self,
8181
levels: list[Index], # all entries hashable
8282
labels: list[np.ndarray], # all entries integer-dtyped
83-
offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1]
83+
offsets: np.ndarray, # np.ndarray[..., ndim=1]
8484
) -> None: ...
8585
def get_indexer(self, target: npt.NDArray[np.object_]) -> npt.NDArray[np.intp]: ...
8686
def _extract_level_codes(self, target: MultiIndex) -> np.ndarray: ...

pandas/_libs/index.pyx

+38-10
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ from numpy cimport (
99
intp_t,
1010
ndarray,
1111
uint8_t,
12-
uint64_t,
1312
)
1413

1514
cnp.import_array()
@@ -699,16 +698,15 @@ cdef class BaseMultiIndexCodesEngine:
699698
Keys are located by first locating each component against the respective
700699
level, then locating (the integer representation of) codes.
701700
"""
702-
def __init__(self, object levels, object labels,
703-
ndarray[uint64_t, ndim=1] offsets):
701+
def __init__(self, object levels, object labels, ndarray offsets):
704702
"""
705703
Parameters
706704
----------
707705
levels : list-like of numpy arrays
708706
Levels of the MultiIndex.
709707
labels : list-like of numpy arrays of integer dtype
710708
Labels of the MultiIndex.
711-
offsets : numpy array of uint64 dtype
709+
offsets : numpy array of int dtype
712710
Pre-calculated offsets, one for each level of the index.
713711
"""
714712
self.levels = levels
@@ -718,8 +716,9 @@ cdef class BaseMultiIndexCodesEngine:
718716
# with positive integers (-1 for NaN becomes 1). This enables us to
719717
# differentiate between values that are missing in other and matching
720718
# NaNs. We will set values that are not found to 0 later:
721-
labels_arr = np.array(labels, dtype="int64").T + multiindex_nulls_shift
722-
codes = labels_arr.astype("uint64", copy=False)
719+
codes = np.array(labels).T
720+
codes += multiindex_nulls_shift # inplace sum optimisation
721+
723722
self.level_has_nans = [-1 in lab for lab in labels]
724723

725724
# Map each codes combination in the index to an integer unambiguously
@@ -731,8 +730,37 @@ cdef class BaseMultiIndexCodesEngine:
731730
# integers representing labels: we will use its get_loc and get_indexer
732731
self._base.__init__(self, lab_ints)
733732

734-
def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
735-
raise NotImplementedError("Implemented by subclass") # pragma: no cover
733+
def _codes_to_ints(self, ndarray codes) -> np.ndarray:
734+
"""
735+
Transform combination(s) of uint in one uint or Python integer (each), in a
736+
strictly monotonic way (i.e. respecting the lexicographic order of integer
737+
combinations).
738+
739+
Parameters
740+
----------
741+
codes : 1- or 2-dimensional array of dtype uint
742+
Combinations of integers (one per row)
743+
744+
Returns
745+
-------
746+
scalar or 1-dimensional array, of dtype _codes_dtype
747+
Integer(s) representing one combination (each).
748+
"""
749+
# To avoid overflows, first make sure we are working with the right dtype:
750+
codes = codes.astype(self._codes_dtype, copy=False)
751+
752+
# Shift the representation of each level by the pre-calculated number of bits:
753+
codes <<= self.offsets # inplace shift optimisation
754+
755+
# Now sum and OR are in fact interchangeable. This is a simple
756+
# composition of the (disjunct) significant bits of each level (i.e.
757+
# each column in "codes") in a single positive integer (per row):
758+
if codes.ndim == 1:
759+
# Single key
760+
return np.bitwise_or.reduce(codes)
761+
762+
# Multiple keys
763+
return np.bitwise_or.reduce(codes, axis=1)
736764

737765
def _extract_level_codes(self, target) -> np.ndarray:
738766
"""
@@ -757,7 +785,7 @@ cdef class BaseMultiIndexCodesEngine:
757785
codes[codes > 0] += 1
758786
if self.level_has_nans[i]:
759787
codes[target.codes[i] == -1] += 1
760-
return self._codes_to_ints(np.array(level_codes, dtype="uint64").T)
788+
return self._codes_to_ints(np.array(level_codes, dtype=self._codes_dtype).T)
761789

762790
def get_indexer(self, target: np.ndarray) -> np.ndarray:
763791
"""
@@ -788,7 +816,7 @@ cdef class BaseMultiIndexCodesEngine:
788816
raise KeyError(key)
789817

790818
# Transform indices into single integer:
791-
lab_int = self._codes_to_ints(np.array(indices, dtype="uint64"))
819+
lab_int = self._codes_to_ints(np.array(indices, dtype=self._codes_dtype))
792820

793821
return self._base.get_loc(self, lab_int)
794822

pandas/core/arrays/arrow/accessors.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,9 @@ def len(self) -> Series:
110110
from pandas import Series
111111

112112
value_lengths = pc.list_value_length(self._pa_array)
113-
return Series(value_lengths, dtype=ArrowDtype(value_lengths.type))
113+
return Series(
114+
value_lengths, dtype=ArrowDtype(value_lengths.type), index=self._data.index
115+
)
114116

115117
def __getitem__(self, key: int | slice) -> Series:
116118
"""
@@ -149,7 +151,9 @@ def __getitem__(self, key: int | slice) -> Series:
149151
# if key < 0:
150152
# key = pc.add(key, pc.list_value_length(self._pa_array))
151153
element = pc.list_element(self._pa_array, key)
152-
return Series(element, dtype=ArrowDtype(element.type))
154+
return Series(
155+
element, dtype=ArrowDtype(element.type), index=self._data.index
156+
)
153157
elif isinstance(key, slice):
154158
if pa_version_under11p0:
155159
raise NotImplementedError(
@@ -167,7 +171,7 @@ def __getitem__(self, key: int | slice) -> Series:
167171
if step is None:
168172
step = 1
169173
sliced = pc.list_slice(self._pa_array, start, stop, step)
170-
return Series(sliced, dtype=ArrowDtype(sliced.type))
174+
return Series(sliced, dtype=ArrowDtype(sliced.type), index=self._data.index)
171175
else:
172176
raise ValueError(f"key must be an int or slice, got {type(key).__name__}")
173177

@@ -195,15 +199,17 @@ def flatten(self) -> Series:
195199
... )
196200
>>> s.list.flatten()
197201
0 1
198-
1 2
199-
2 3
200-
3 3
202+
0 2
203+
0 3
204+
1 3
201205
dtype: int64[pyarrow]
202206
"""
203207
from pandas import Series
204208

205-
flattened = pc.list_flatten(self._pa_array)
206-
return Series(flattened, dtype=ArrowDtype(flattened.type))
209+
counts = pa.compute.list_value_length(self._pa_array)
210+
flattened = pa.compute.list_flatten(self._pa_array)
211+
index = self._data.index.repeat(counts.fill_null(pa.scalar(0, counts.type)))
212+
return Series(flattened, dtype=ArrowDtype(flattened.type), index=index)
207213

208214

209215
class StructAccessor(ArrowAccessor):

pandas/core/arrays/arrow/array.py

+2
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ def _box_pa_array(
525525
if pa_type is not None and pa_array.type != pa_type:
526526
if pa.types.is_dictionary(pa_type):
527527
pa_array = pa_array.dictionary_encode()
528+
if pa_array.type != pa_type:
529+
pa_array = pa_array.cast(pa_type)
528530
else:
529531
try:
530532
pa_array = pa_array.cast(pa_type)

pandas/core/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,7 @@ def nunique(self, dropna: bool = True) -> int:
10621062
Returns
10631063
-------
10641064
int
1065+
A integer indicating the number of unique elements in the object.
10651066
10661067
See Also
10671068
--------

pandas/core/computation/align.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -160,19 +160,24 @@ def align_terms(terms):
160160
# can't iterate so it must just be a constant or single variable
161161
if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
162162
typ = type(terms.value)
163-
return typ, _zip_axes_from_type(typ, terms.value.axes)
164-
return np.result_type(terms.type), None
163+
name = terms.value.name if isinstance(terms.value, ABCSeries) else None
164+
return typ, _zip_axes_from_type(typ, terms.value.axes), name
165+
return np.result_type(terms.type), None, None
165166

166167
# if all resolved variables are numeric scalars
167168
if all(term.is_scalar for term in terms):
168-
return result_type_many(*(term.value for term in terms)).type, None
169+
return result_type_many(*(term.value for term in terms)).type, None, None
170+
171+
# if all input series have a common name, propagate it to the returned series
172+
names = {term.value.name for term in terms if isinstance(term.value, ABCSeries)}
173+
name = names.pop() if len(names) == 1 else None
169174

170175
# perform the main alignment
171176
typ, axes = _align_core(terms)
172-
return typ, axes
177+
return typ, axes, name
173178

174179

175-
def reconstruct_object(typ, obj, axes, dtype):
180+
def reconstruct_object(typ, obj, axes, dtype, name):
176181
"""
177182
Reconstruct an object given its type, raw value, and possibly empty
178183
(None) axes.
@@ -200,7 +205,9 @@ def reconstruct_object(typ, obj, axes, dtype):
200205
res_t = np.result_type(obj.dtype, dtype)
201206

202207
if not isinstance(typ, partial) and issubclass(typ, PandasObject):
203-
return typ(obj, dtype=res_t, **axes)
208+
if name is None:
209+
return typ(obj, dtype=res_t, **axes)
210+
return typ(obj, dtype=res_t, name=name, **axes)
204211

205212
# special case for pathological things like ~True/~False
206213
if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:

pandas/core/computation/engines.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def __init__(self, expr) -> None:
5454
self.expr = expr
5555
self.aligned_axes = None
5656
self.result_type = None
57+
self.result_name = None
5758

5859
def convert(self) -> str:
5960
"""
@@ -76,12 +77,18 @@ def evaluate(self) -> object:
7677
The result of the passed expression.
7778
"""
7879
if not self._is_aligned:
79-
self.result_type, self.aligned_axes = align_terms(self.expr.terms)
80+
self.result_type, self.aligned_axes, self.result_name = align_terms(
81+
self.expr.terms
82+
)
8083

8184
# make sure no names in resolvers and locals/globals clash
8285
res = self._evaluate()
8386
return reconstruct_object(
84-
self.result_type, res, self.aligned_axes, self.expr.terms.return_type
87+
self.result_type,
88+
res,
89+
self.aligned_axes,
90+
self.expr.terms.return_type,
91+
self.result_name,
8592
)
8693

8794
@property

0 commit comments

Comments
 (0)