Skip to content

Commit e02a124

Browse files
author
Sumanau Sareen
committed
Merge remote-tracking branch 'upstream/master' into BUG-32967
2 parents 0c1aa9e + b6cb1a4 commit e02a124

File tree

110 files changed

+2472
-1848
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+2472
-1848
lines changed

asv_bench/benchmarks/series_methods.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -223,27 +223,27 @@ def time_series_datetimeindex_repr(self):
223223

224224
class All:
225225

226-
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
227-
param_names = ["N", "case"]
226+
params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
227+
param_names = ["N", "case", "dtype"]
228228

229-
def setup(self, N, case):
229+
def setup(self, N, case, dtype):
230230
val = case != "fast"
231-
self.s = Series([val] * N)
231+
self.s = Series([val] * N, dtype=dtype)
232232

233-
def time_all(self, N, case):
233+
def time_all(self, N, case, dtype):
234234
self.s.all()
235235

236236

237237
class Any:
238238

239-
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
240-
param_names = ["N", "case"]
239+
params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
240+
param_names = ["N", "case", "dtype"]
241241

242-
def setup(self, N, case):
242+
def setup(self, N, case, dtype):
243243
val = case == "fast"
244-
self.s = Series([val] * N)
244+
self.s = Series([val] * N, dtype=dtype)
245245

246-
def time_any(self, N, case):
246+
def time_any(self, N, case, dtype):
247247
self.s.any()
248248

249249

@@ -265,11 +265,14 @@ class NanOps:
265265
"prod",
266266
],
267267
[10 ** 3, 10 ** 6],
268-
["int8", "int32", "int64", "float64"],
268+
["int8", "int32", "int64", "float64", "Int64", "boolean"],
269269
]
270270
param_names = ["func", "N", "dtype"]
271271

272272
def setup(self, func, N, dtype):
273+
if func == "argmax" and dtype in {"Int64", "boolean"}:
274+
# Skip argmax for nullable int since this doesn't work yet (GH-24382)
275+
raise NotImplementedError
273276
self.s = Series([1] * N, dtype=dtype)
274277
self.func = getattr(self.s, func)
275278

asv_bench/benchmarks/stat_ops.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,17 @@
77

88
class FrameOps:
99

10-
params = [ops, ["float", "int"], [0, 1]]
10+
params = [ops, ["float", "int", "Int64"], [0, 1]]
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
14+
if op == "mad" and dtype == "Int64" and axis == 1:
15+
# GH-33036
16+
raise NotImplementedError
17+
values = np.random.randn(100000, 4)
18+
if dtype == "Int64":
19+
values = values.astype(int)
20+
df = pd.DataFrame(values).astype(dtype)
1521
self.df_func = getattr(df, op)
1622

1723
def time_op(self, op, dtype, axis):

doc/source/whatsnew/v1.1.0.rst

+40-1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,32 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss
168168
...
169169
KeyError: Timestamp('1970-01-01 00:00:00')
170170
171+
:meth:`DataFrame.merge` preserves right frame's row order
172+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
173+
:meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
174+
175+
.. ipython:: python
176+
177+
left_df = pd.DataFrame({'animal': ['dog', 'pig'], 'max_speed': [40, 11]})
178+
right_df = pd.DataFrame({'animal': ['quetzal', 'pig'], 'max_speed': [80, 11]})
179+
left_df
180+
right_df
181+
182+
*Previous behavior*:
183+
184+
.. code-block:: python
185+
186+
>>> left_df.merge(right_df, on=['animal', 'max_speed'], how="right")
187+
animal max_speed
188+
0 pig 11
189+
1 quetzal 80
190+
191+
*New behavior*:
192+
193+
.. ipython:: python
194+
195+
left_df.merge(right_df, on=['animal', 'max_speed'], how="right")
196+
171197
.. ---------------------------------------------------------------------------
172198
173199
.. _whatsnew_110.api_breaking.assignment_to_multiple_columns:
@@ -229,6 +255,8 @@ Performance improvements
229255
sparse values from ``scipy.sparse`` matrices using the
230256
:meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
231257
:issue:`32825`, :issue:`32826`, :issue:`32856`, :issue:`32858`).
258+
- Performance improvement in :meth:`Series.sum` for nullable (integer and boolean) dtypes (:issue:`30982`).
259+
232260

233261
.. ---------------------------------------------------------------------------
234262
@@ -255,6 +283,7 @@ Datetimelike
255283
- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`)
256284
- :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`)
257285
- Bug in :class:`DatetimeIndex` constructor incorrectly accepting ``bool``-dtyped inputs (:issue:`32668`)
286+
- Bug in :meth:`DatetimeIndex.searchsorted` not accepting a ``list`` or :class:`Series` as its argument (:issue:`32762`)
258287

259288
Timedelta
260289
^^^^^^^^^
@@ -287,7 +316,7 @@ Conversion
287316
Strings
288317
^^^^^^^
289318

290-
-
319+
- Bug in the :meth:`~Series.astype` method when converting "string" dtype data to nullable integer dtype (:issue:`32450`).
291320
-
292321

293322

@@ -309,6 +338,9 @@ Indexing
309338
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
310339
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
311340
- Bug in :meth:`Series.__getitem__` indexing with non-standard scalars, e.g. ``np.dtype`` (:issue:`32684`)
341+
- Fix to preserve the ability to index with the "nearest" method with xarray's CFTimeIndex, an :class:`Index` subclass (`pydata/xarray#3751 <https://github.com/pydata/xarray/issues/3751>`_, :issue:`32905`).
342+
- Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`)
343+
- Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`)
312344

313345
Missing
314346
^^^^^^^
@@ -370,6 +402,8 @@ Groupby/resample/rolling
370402

371403
- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
372404
- Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`)
405+
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` produces inconsistent type when aggregating Boolean series (:issue:`32894`)
406+
373407

374408
Reshaping
375409
^^^^^^^^^
@@ -382,11 +416,16 @@ Reshaping
382416
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
383417
- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`)
384418
- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
419+
- Bug where :meth:`Index.astype` would lose the name attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`)
385420
- :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`)
386421
- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`)
422+
- Bug on inplace operation of a Series that was adding a column to the DataFrame from where it was originally dropped from (using inplace=True) (:issue:`30484`)
387423
- Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`)
388424
- Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`)
425+
- Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`)
389426
- :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`)
427+
- Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`)
428+
390429

391430
Sparse
392431
^^^^^^

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ dependencies:
101101
- s3fs # pandas.read_csv... when using 's3://...' path
102102
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
103103
- xarray # DataFrame.to_xarray
104+
- cftime # Needed for downstream xarray.CFTimeIndex test
104105
- pyreadstat # pandas.read_spss
105106
- tabulate>=0.8.3 # DataFrame.to_markdown
106107
- pip:

pandas/_libs/algos.pyx

+30-9
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,15 @@ cimport pandas._libs.util as util
3838
from pandas._libs.util cimport numeric, get_nat
3939

4040
from pandas._libs.khash cimport (
41-
khiter_t, kh_destroy_int64, kh_put_int64, kh_init_int64, kh_int64_t,
42-
kh_resize_int64, kh_get_int64)
41+
kh_destroy_int64,
42+
kh_get_int64,
43+
kh_init_int64,
44+
kh_int64_t,
45+
kh_put_int64,
46+
kh_resize_int64,
47+
khiter_t,
48+
)
49+
4350

4451
import pandas._libs.missing as missing
4552

@@ -791,8 +798,13 @@ ctypedef fused rank_t:
791798

792799
@cython.wraparound(False)
793800
@cython.boundscheck(False)
794-
def rank_1d(rank_t[:] in_arr, ties_method='average',
795-
ascending=True, na_option='keep', pct=False):
801+
def rank_1d(
802+
rank_t[:] in_arr,
803+
ties_method="average",
804+
bint ascending=True,
805+
na_option="keep",
806+
bint pct=False,
807+
):
796808
"""
797809
Fast NaN-friendly version of ``scipy.stats.rankdata``.
798810
"""
@@ -1009,8 +1021,14 @@ def rank_1d(rank_t[:] in_arr, ties_method='average',
10091021
return ranks
10101022

10111023

1012-
def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average',
1013-
ascending=True, na_option='keep', pct=False):
1024+
def rank_2d(
1025+
rank_t[:, :] in_arr,
1026+
int axis=0,
1027+
ties_method="average",
1028+
bint ascending=True,
1029+
na_option="keep",
1030+
bint pct=False,
1031+
):
10141032
"""
10151033
Fast NaN-friendly version of ``scipy.stats.rankdata``.
10161034
"""
@@ -1190,9 +1208,12 @@ ctypedef fused out_t:
11901208

11911209
@cython.boundscheck(False)
11921210
@cython.wraparound(False)
1193-
def diff_2d(diff_t[:, :] arr,
1194-
out_t[:, :] out,
1195-
Py_ssize_t periods, int axis):
1211+
def diff_2d(
1212+
diff_t[:, :] arr,
1213+
out_t[:, :] out,
1214+
Py_ssize_t periods,
1215+
int axis,
1216+
):
11961217
cdef:
11971218
Py_ssize_t i, j, sx, sy, start, stop
11981219
bint f_contig = arr.is_f_contig()

pandas/_libs/index.pyx

+15-6
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,19 @@ import warnings
22

33
import numpy as np
44
cimport numpy as cnp
5-
from numpy cimport (ndarray, intp_t,
6-
float64_t, float32_t,
7-
int64_t, int32_t, int16_t, int8_t,
8-
uint64_t, uint32_t, uint16_t, uint8_t
5+
from numpy cimport (
6+
float32_t,
7+
float64_t,
8+
int8_t,
9+
int16_t,
10+
int32_t,
11+
int64_t,
12+
intp_t,
13+
ndarray,
14+
uint8_t,
15+
uint16_t,
16+
uint32_t,
17+
uint64_t,
918
)
1019
cnp.import_array()
1120

@@ -364,7 +373,7 @@ cdef class ObjectEngine(IndexEngine):
364373

365374
cdef class DatetimeEngine(Int64Engine):
366375

367-
cdef _get_box_dtype(self):
376+
cdef str _get_box_dtype(self):
368377
return 'M8[ns]'
369378

370379
cdef int64_t _unbox_scalar(self, scalar) except? -1:
@@ -454,7 +463,7 @@ cdef class DatetimeEngine(Int64Engine):
454463

455464
cdef class TimedeltaEngine(DatetimeEngine):
456465

457-
cdef _get_box_dtype(self):
466+
cdef str _get_box_dtype(self):
458467
return 'm8[ns]'
459468

460469
cdef int64_t _unbox_scalar(self, scalar) except? -1:

pandas/_libs/indexing.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ cdef class _NDFrameIndexerBase:
22
"""
33
A base class for _NDFrameIndexer for fast instantiation and attribute access.
44
"""
5-
cdef public object obj, name, _ndim
5+
cdef public:
6+
object obj, name, _ndim
67

78
def __init__(self, name, obj):
89
self.obj = obj

0 commit comments

Comments
 (0)