Skip to content

Commit 72a2a2a

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents dfc1c7a + de34ca3 commit 72a2a2a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+699
-375
lines changed

.github/workflows/ci.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ on:
44
push:
55
branches:
66
- master
7+
- 1.3.x
78
pull_request:
89
branches:
910
- master
@@ -132,15 +133,15 @@ jobs:
132133
echo "${{ secrets.server_ssh_key }}" > ~/.ssh/id_rsa
133134
chmod 600 ~/.ssh/id_rsa
134135
echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBE1Kkopomm7FHG5enATf7SgnpICZ4W2bw+Ho+afqin+w7sMcrsa0je7sbztFAV8YchDkiBKnWTG4cRT+KZgZCaY=" > ~/.ssh/known_hosts
135-
if: github.event_name == 'push'
136+
if: ${{github.event_name == 'push' && github.ref == 'refs/head/master'}}
136137

137138
- name: Upload web
138139
run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' --exclude='Pandas_Cheat_Sheet*' web/build/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas
139-
if: github.event_name == 'push'
140+
if: ${{github.event_name == 'push' && github.ref == 'refs/head/master'}}
140141

141142
- name: Upload dev docs
142143
run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/dev
143-
if: github.event_name == 'push'
144+
if: ${{github.event_name == 'push' && github.ref == 'refs/head/master'}}
144145

145146
- name: Move docs into site directory
146147
run: mv doc/build/html web/build/docs

.github/workflows/posix.yml

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ on:
44
push:
55
branches:
66
- master
7+
- 1.3.x
78
pull_request:
89
branches:
910
- master

.github/workflows/pre-commit.yml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ on:
55
push:
66
branches:
77
- master
8+
- 1.3.x
89

910
jobs:
1011
pre-commit:

.github/workflows/sdist.yml

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ on:
44
push:
55
branches:
66
- master
7+
- 1.3.x
78
pull_request:
89
branches:
910
- master

asv_bench/benchmarks/algos/isin.py

+3-15
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
import numpy as np
22

3-
try:
4-
from pandas.compat import np_version_under1p20
5-
except ImportError:
6-
from pandas.compat.numpy import _np_version_under1p20 as np_version_under1p20
7-
83
from pandas import (
94
Categorical,
105
NaT,
@@ -283,10 +278,6 @@ class IsInLongSeriesLookUpDominates:
283278
def setup(self, dtype, MaxNumber, series_type):
284279
N = 10 ** 7
285280

286-
# https://github.com/pandas-dev/pandas/issues/39844
287-
if not np_version_under1p20 and dtype in ("Int64", "Float64"):
288-
raise NotImplementedError
289-
290281
if series_type == "random_hits":
291282
array = np.random.randint(0, MaxNumber, N)
292283
if series_type == "random_misses":
@@ -297,7 +288,8 @@ def setup(self, dtype, MaxNumber, series_type):
297288
array = np.arange(N) + MaxNumber
298289

299290
self.series = Series(array).astype(dtype)
300-
self.values = np.arange(MaxNumber).astype(dtype)
291+
292+
self.values = np.arange(MaxNumber).astype(dtype.lower())
301293

302294
def time_isin(self, dtypes, MaxNumber, series_type):
303295
self.series.isin(self.values)
@@ -313,16 +305,12 @@ class IsInLongSeriesValuesDominate:
313305
def setup(self, dtype, series_type):
314306
N = 10 ** 7
315307

316-
# https://github.com/pandas-dev/pandas/issues/39844
317-
if not np_version_under1p20 and dtype in ("Int64", "Float64"):
318-
raise NotImplementedError
319-
320308
if series_type == "random":
321309
vals = np.random.randint(0, 10 * N, N)
322310
if series_type == "monotone":
323311
vals = np.arange(N)
324312

325-
self.values = vals.astype(dtype)
313+
self.values = vals.astype(dtype.lower())
326314
M = 10 ** 6 + 1
327315
self.series = Series(np.arange(M)).astype(dtype)
328316

doc/source/user_guide/categorical.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -777,8 +777,8 @@ value is included in the ``categories``:
777777
df
778778
try:
779779
df.iloc[2:4, :] = [["c", 3], ["c", 3]]
780-
except ValueError as e:
781-
print("ValueError:", str(e))
780+
except TypeError as e:
781+
print("TypeError:", str(e))
782782
783783
Setting values by assigning categorical data will also check that the ``categories`` match:
784784

@@ -788,8 +788,8 @@ Setting values by assigning categorical data will also check that the ``categori
788788
df
789789
try:
790790
df.loc["j":"k", "cats"] = pd.Categorical(["b", "b"], categories=["a", "b", "c"])
791-
except ValueError as e:
792-
print("ValueError:", str(e))
791+
except TypeError as e:
792+
print("TypeError:", str(e))
793793
794794
Assigning a ``Categorical`` to parts of a column of other types will use the values:
795795

doc/source/user_guide/groupby.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,6 @@ something different for each of the columns. Thus, using ``[]`` similar to
391391
getting a column from a DataFrame, you can do:
392392

393393
.. ipython:: python
394-
:suppress:
395394
396395
df = pd.DataFrame(
397396
{
@@ -402,7 +401,7 @@ getting a column from a DataFrame, you can do:
402401
}
403402
)
404403
405-
.. ipython:: python
404+
df
406405
407406
grouped = df.groupby(["A"])
408407
grouped_C = grouped["C"]

doc/source/user_guide/style.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1201,7 +1201,7 @@
12011201
"metadata": {},
12021202
"outputs": [],
12031203
"source": [
1204-
"df2.style.bar(align=0, vmin=-2.5, vmax=2.5, color=['#d65f5f', '#5fba7d'],\n",
1204+
"df2.style.bar(align=0, vmin=-2.5, vmax=2.5, color=['#d65f5f', '#5fba7d'], height=50,\n",
12051205
" width=60, props=\"width: 120px; border-right: 1px solid black;\").format('{:.3f}', na_rep=\"\")"
12061206
]
12071207
},

doc/source/whatsnew/v1.3.1.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ Fixed regressions
2222
- Performance regression in :class:`DataFrame` in reduction operations requiring casting such as :meth:`DataFrame.mean` on integer data (:issue:`38592`)
2323
- Performance regression in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when ``orient`` argument one of "records", "dict", or "split" (:issue:`42352`)
2424
- Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:`42461`)
25-
-
25+
- Fixed regression in :meth:`DataFrame.isin` and :meth:`Series.isin` raising ``TypeError`` with nullable data containing at least one missing value (:issue:`42405`)
26+
- Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`)
2627

2728
.. ---------------------------------------------------------------------------
2829

doc/source/whatsnew/v1.4.0.rst

+14-4
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ Deprecations
154154
- Deprecated ``method`` argument in :meth:`Index.get_loc`, use ``index.get_indexer([label], method=...)`` instead (:issue:`42269`)
155155
- Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`)
156156
- Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`)
157-
-
157+
- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
158158

159159
.. ---------------------------------------------------------------------------
160160
@@ -174,12 +174,17 @@ Bug fixes
174174

175175
Categorical
176176
^^^^^^^^^^^
177-
-
177+
- Bug in setting dtype-incompatible values into a :class:`Categorical` (or ``Series`` or ``DataFrame`` backed by ``Categorical``) raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`)
178+
- Bug in :meth:`Categorical.searchsorted` when passing a dtype-incompatible value raising ``KeyError`` instead of ``TypeError`` (:issue:`41919`)
179+
- Bug in :meth:`Series.where` with ``CategoricalDtype`` when passing a dtype-incompatible value raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`)
180+
- Bug in :meth:`Categorical.fillna` when passing a dtype-incompatible value raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`)
181+
- Bug in :meth:`Categorical.fillna` with a tuple-like category raising ``ValueError`` instead of ``TypeError`` when filling with a non-category tuple (:issue:`41919`)
178182
-
179183

180184
Datetimelike
181185
^^^^^^^^^^^^
182186
- Bug in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
187+
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
183188
-
184189

185190
Timedelta
@@ -215,8 +220,11 @@ Interval
215220

216221
Indexing
217222
^^^^^^^^
218-
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
219223
- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` when the object's Index has a length greater than one but only one unique value (:issue:`42365`)
224+
- Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
225+
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
226+
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
227+
-
220228

221229
Missing
222230
^^^^^^^
@@ -225,13 +233,15 @@ Missing
225233

226234
MultiIndex
227235
^^^^^^^^^^
236+
- Bug in :meth:`MultiIndex.get_loc` where the first level is a :class:`DatetimeIndex` and a string key is passed (:issue:`42465`)
228237
- Bug in :meth:`MultiIndex.reindex` when passing a ``level`` that corresponds to an ``ExtensionDtype`` level (:issue:`42043`)
229238
-
230239

231240
I/O
232241
^^^
233242
- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
234-
-
243+
- Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
244+
- Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
235245
-
236246

237247
Period

pandas/_libs/internals.pyi

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import (
22
Iterator,
33
Sequence,
4+
final,
45
overload,
56
)
67

@@ -50,10 +51,12 @@ class SharedBlock:
5051

5152
class NumpyBlock(SharedBlock):
5253
values: np.ndarray
54+
@final
5355
def getitem_block_index(self: T, slicer: slice) -> T: ...
5456

5557
class NDArrayBackedBlock(SharedBlock):
5658
values: NDArrayBackedExtensionArray
59+
@final
5760
def getitem_block_index(self: T, slicer: slice) -> T: ...
5861

5962
class Block(SharedBlock): ...

pandas/_libs/internals.pyx

-2
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,6 @@ cdef class NumpyBlock(SharedBlock):
517517
# set placement and ndim
518518
self.values = values
519519

520-
# @final # not useful in cython, but we _would_ annotate with @final
521520
cpdef NumpyBlock getitem_block_index(self, slice slicer):
522521
"""
523522
Perform __getitem__-like specialized to slicing along index.
@@ -540,7 +539,6 @@ cdef class NDArrayBackedBlock(SharedBlock):
540539
# set placement and ndim
541540
self.values = values
542541

543-
# @final # not useful in cython, but we _would_ annotate with @final
544542
cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer):
545543
"""
546544
Perform __getitem__-like specialized to slicing along index.

pandas/_libs/parsers.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,8 @@ cdef class TextReader:
12801280
# generate extra (bogus) headers if there are more columns than headers
12811281
if j >= len(self.header[0]):
12821282
return j
1283+
elif self.has_mi_columns:
1284+
return tuple(header_row[j] for header_row in self.header)
12831285
else:
12841286
return self.header[0][j]
12851287
else:

pandas/_typing.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,7 @@
3737
# and use a string literal forward reference to it in subsequent types
3838
# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
3939
if TYPE_CHECKING:
40-
from typing import (
41-
TypedDict,
42-
final,
43-
)
40+
from typing import TypedDict
4441

4542
import numpy.typing as npt
4643

@@ -76,8 +73,6 @@
7673
from pandas.tseries.offsets import DateOffset
7774
else:
7875
npt: Any = None
79-
# typing.final does not exist until py38
80-
final = lambda x: x
8176
# typing.TypedDict does not exist until py38
8277
TypedDict = dict
8378

pandas/core/algorithms.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ def factorize_array(
531531
mask: np.ndarray | None = None,
532532
) -> tuple[npt.NDArray[np.intp], np.ndarray]:
533533
"""
534-
Factorize an array-like to codes and uniques.
534+
Factorize a numpy array to codes and uniques.
535535
536536
This doesn't do any coercion of types or unboxing before factorization.
537537
@@ -910,7 +910,7 @@ def duplicated(
910910
911911
Parameters
912912
----------
913-
values : ndarray-like
913+
values : nd.array, ExtensionArray or Series
914914
Array over which to check for duplicate values.
915915
keep : {'first', 'last', False}, default 'first'
916916
- ``first`` : Mark duplicates as ``True`` except for the first
@@ -1412,8 +1412,8 @@ def take(
14121412
14131413
Parameters
14141414
----------
1415-
arr : sequence
1416-
Non array-likes (sequences without a dtype) are coerced
1415+
arr : array-like or scalar value
1416+
Non array-likes (sequences/scalars without a dtype) are coerced
14171417
to an ndarray.
14181418
indices : sequence of integers
14191419
Indices to be taken.
@@ -1523,11 +1523,11 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
15231523
15241524
Parameters
15251525
----------
1526-
arr: array-like
1526+
arr: np.ndarray, ExtensionArray, Series
15271527
Input array. If `sorter` is None, then it must be sorted in
15281528
ascending order, otherwise `sorter` must be an array of indices
15291529
that sort it.
1530-
value : array-like
1530+
value : array-like or scalar
15311531
Values to insert into `arr`.
15321532
side : {'left', 'right'}, optional
15331533
If 'left', the index of the first suitable location found is given.

pandas/core/arrays/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -826,8 +826,8 @@ def searchsorted(self, value, side="left", sorter=None):
826826
827827
Parameters
828828
----------
829-
value : array-like
830-
Values to insert into `self`.
829+
value : array-like, list or scalar
830+
Value(s) to insert into `self`.
831831
side : {'left', 'right'}, optional
832832
If 'left', the index of the first suitable location found is given.
833833
If 'right', return the last such index. If there is no suitable

0 commit comments

Comments
 (0)