Skip to content

Commit 9473046

Browse files
committed
Merge branch '1.3.x' of https://github.com/pandas-dev/pandas into 1.3.x
2 parents 7945210 + 538d69a commit 9473046

29 files changed

+282
-62
lines changed

.github/workflows/python-dev.yml

+9-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ on:
1010
paths-ignore:
1111
- "doc/**"
1212

13+
env:
14+
PYTEST_WORKERS: "auto"
15+
PANDAS_CI: 1
16+
PATTERN: "not slow and not network and not clipboard"
17+
COVERAGE: true
18+
1319
jobs:
1420
build:
1521
runs-on: ubuntu-latest
@@ -32,7 +38,7 @@ jobs:
3238
pip install git+https://github.com/numpy/numpy.git
3339
pip install git+https://github.com/pytest-dev/pytest.git
3440
pip install git+https://github.com/nedbat/coveragepy.git
35-
pip install cython python-dateutil pytz hypothesis pytest-xdist
41+
pip install cython python-dateutil pytz hypothesis pytest-xdist pytest-cov
3642
pip list
3743
3844
- name: Build Pandas
@@ -46,7 +52,8 @@ jobs:
4652
4753
- name: Test with pytest
4854
run: |
49-
coverage run -m pytest -m 'not slow and not network and not clipboard' pandas
55+
ci/run_tests.sh
56+
# GH 41935
5057
continue-on-error: true
5158

5259
- name: Publish test results

ci/azure/windows.yml

+2
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ jobs:
1212
ENV_FILE: ci/deps/azure-windows-37.yaml
1313
CONDA_PY: "37"
1414
PATTERN: "not slow and not network"
15+
PYTEST_WORKERS: 2 # GH-42236
1516

1617
py38_np18:
1718
ENV_FILE: ci/deps/azure-windows-38.yaml
1819
CONDA_PY: "38"
1920
PATTERN: "not slow and not network and not high_memory"
21+
PYTEST_WORKERS: 2 # GH-42236
2022

2123
steps:
2224
- powershell: |

doc/source/user_guide/io.rst

+11-1
Original file line numberDiff line numberDiff line change
@@ -5526,13 +5526,23 @@ below and the SQLAlchemy `documentation <https://docs.sqlalchemy.org/en/latest/c
55265526
# Create your engine.
55275527
engine = create_engine("sqlite:///:memory:")
55285528
5529-
If you want to manage your own connections you can pass one of those instead:
5529+
If you want to manage your own connections you can pass one of those instead. The example below opens a
5530+
connection to the database using a Python context manager that automatically closes the connection after
5531+
the block has completed.
5532+
See the `SQLAlchemy docs <https://docs.sqlalchemy.org/en/latest/core/connections.html#basic-usage>`__
5533+
for an explanation of how the database connection is handled.
55305534

55315535
.. code-block:: python
55325536
55335537
with engine.connect() as conn, conn.begin():
55345538
data = pd.read_sql_table("data", conn)
55355539
5540+
.. warning::
5541+
5542+
When you open a connection to a database you are also responsible for closing it.
5543+
Side effects of leaving a connection open may include locking the database or
5544+
other breaking behaviour.
5545+
55365546
Writing DataFrames
55375547
''''''''''''''''''
55385548

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,7 @@ Other API changes
707707
- Added new ``engine`` and ``**engine_kwargs`` parameters to :meth:`DataFrame.to_sql` to support other future "SQL engines". Currently we still only use ``SQLAlchemy`` under the hood, but more engines are planned to be supported such as `turbodbc <https://turbodbc.readthedocs.io/en/latest/>`_ (:issue:`36893`)
708708
- Removed redundant ``freq`` from :class:`PeriodIndex` string representation (:issue:`41653`)
709709
- :meth:`ExtensionDtype.construct_array_type` is now a required method instead of an optional one for :class:`ExtensionDtype` subclasses (:issue:`24860`)
710+
- Calling ``hash`` on non-hashable pandas objects will now raise ``TypeError`` with the built-in error message (e.g. ``unhashable type: 'Series'``). Previously it would raise a custom message such as ``'Series' objects are mutable, thus they cannot be hashed``. Furthermore, ``isinstance(<Series>, abc.collections.Hashable)`` will now return ``False`` (:issue:`40013`)
710711
- :meth:`.Styler.from_custom_template` now has two new arguments for template names, and removed the old ``name``, due to template inheritance having been introducing for better parsing (:issue:`42053`). Subclassing modifications to Styler attributes are also needed.
711712

712713
.. _whatsnew_130.api_breaking.build:

pandas/core/arrays/base.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1296,8 +1296,10 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
12961296
"""
12971297
raise TypeError(f"cannot perform {name} with type {self.dtype}")
12981298

1299-
def __hash__(self) -> int:
1300-
raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
1299+
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
1300+
# Incompatible types in assignment (expression has type "None", base class
1301+
# "object" defined the type as "Callable[[object], int]")
1302+
__hash__: None # type: ignore[assignment]
13011303

13021304
# ------------------------------------------------------------------------
13031305
# Non-Optimized Default Methods

pandas/core/frame.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -6186,7 +6186,10 @@ def f(vals) -> tuple[np.ndarray, int]:
61866186
return labels.astype("i8", copy=False), len(shape)
61876187

61886188
if subset is None:
6189-
subset = self.columns
6189+
# Incompatible types in assignment
6190+
# (expression has type "Index", variable has type "Sequence[Any]")
6191+
# (pending on https://github.com/pandas-dev/pandas/issues/28770)
6192+
subset = self.columns # type: ignore[assignment]
61906193
elif (
61916194
not np.iterable(subset)
61926195
or isinstance(subset, str)

pandas/core/generic.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -1873,11 +1873,10 @@ def _drop_labels_or_levels(self, keys, axis: int = 0):
18731873
# ----------------------------------------------------------------------
18741874
# Iteration
18751875

1876-
def __hash__(self) -> int:
1877-
raise TypeError(
1878-
f"{repr(type(self).__name__)} objects are mutable, "
1879-
f"thus they cannot be hashed"
1880-
)
1876+
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
1877+
# Incompatible types in assignment (expression has type "None", base class
1878+
# "object" defined the type as "Callable[[object], int]")
1879+
__hash__: None # type: ignore[assignment]
18811880

18821881
def __iter__(self):
18831882
"""

pandas/core/indexes/base.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -3411,12 +3411,25 @@ def get_indexer(
34113411
# matched to Interval scalars
34123412
return self._get_indexer_non_comparable(target, method=method, unique=True)
34133413

3414+
if is_categorical_dtype(self.dtype):
3415+
# _maybe_cast_listlike_indexer ensures target has our dtype
3416+
# (could improve perf by doing _should_compare check earlier?)
3417+
assert is_dtype_equal(self.dtype, target.dtype)
3418+
3419+
indexer = self._engine.get_indexer(target.codes)
3420+
if self.hasnans and target.hasnans:
3421+
loc = self.get_loc(np.nan)
3422+
mask = target.isna()
3423+
indexer[mask] = loc
3424+
return indexer
3425+
34143426
if is_categorical_dtype(target.dtype):
34153427
# potential fastpath
34163428
# get an indexer for unique categories then propagate to codes via take_nd
3417-
# Note: calling get_indexer instead of _get_indexer causes
3418-
# RecursionError GH#42088
3419-
categories_indexer = self._get_indexer(target.categories)
3429+
# get_indexer instead of _get_indexer needed for MultiIndex cases
3430+
# e.g. test_append_different_columns_types
3431+
categories_indexer = self.get_indexer(target.categories)
3432+
34203433
indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
34213434

34223435
if (not self._is_multi and self.hasnans) and target.hasnans:
@@ -4527,9 +4540,10 @@ def __contains__(self, key: Any) -> bool:
45274540
except (OverflowError, TypeError, ValueError):
45284541
return False
45294542

4530-
@final
4531-
def __hash__(self):
4532-
raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
4543+
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
4544+
# Incompatible types in assignment (expression has type "None", base class
4545+
# "object" defined the type as "Callable[[object], int]")
4546+
__hash__: None # type: ignore[assignment]
45334547

45344548
@final
45354549
def __setitem__(self, key, value):

pandas/core/indexes/category.py

+15
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,21 @@ def _maybe_cast_indexer(self, key) -> int:
487487
return -1
488488
raise
489489

490+
def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex:
491+
if isinstance(values, CategoricalIndex):
492+
values = values._data
493+
if isinstance(values, Categorical):
494+
# Indexing on codes is more efficient if categories are the same,
495+
# so we can apply some optimizations based on the degree of
496+
# dtype-matching.
497+
cat = self._data._encode_with_my_categories(values)
498+
codes = cat._codes
499+
else:
500+
codes = self.categories.get_indexer(values)
501+
codes = codes.astype(self.codes.dtype, copy=False)
502+
cat = self._data._from_backing_data(codes)
503+
return type(self)._simple_new(cat)
504+
490505
def _get_indexer(
491506
self,
492507
target: Index,

pandas/core/reshape/pivot.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def pivot(
482482
if columns is None:
483483
raise TypeError("pivot() missing 1 required argument: 'columns'")
484484

485-
columns = com.convert_to_list_like(columns)
485+
columns_listlike = com.convert_to_list_like(columns)
486486

487487
if values is None:
488488
if index is not None:
@@ -494,28 +494,27 @@ def pivot(
494494
# error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
495495
# error: Unsupported left operand type for + ("ExtensionArray")
496496
indexed = data.set_index(
497-
cols + columns, append=append # type: ignore[operator]
497+
cols + columns_listlike, append=append # type: ignore[operator]
498498
)
499499
else:
500500
if index is None:
501-
index = [Series(data.index, name=data.index.name)]
501+
index_list = [Series(data.index, name=data.index.name)]
502502
else:
503-
index = com.convert_to_list_like(index)
504-
index = [data[idx] for idx in index]
503+
index_list = [data[idx] for idx in com.convert_to_list_like(index)]
505504

506-
data_columns = [data[col] for col in columns]
507-
index.extend(data_columns)
508-
index = MultiIndex.from_arrays(index)
505+
data_columns = [data[col] for col in columns_listlike]
506+
index_list.extend(data_columns)
507+
multiindex = MultiIndex.from_arrays(index_list)
509508

510509
if is_list_like(values) and not isinstance(values, tuple):
511510
# Exclude tuple because it is seen as a single column name
512511
values = cast(Sequence[Hashable], values)
513512
indexed = data._constructor(
514-
data[values]._values, index=index, columns=values
513+
data[values]._values, index=multiindex, columns=values
515514
)
516515
else:
517-
indexed = data._constructor_sliced(data[values]._values, index=index)
518-
return indexed.unstack(columns)
516+
indexed = data._constructor_sliced(data[values]._values, index=multiindex)
517+
return indexed.unstack(columns_listlike)
519518

520519

521520
def crosstab(

pandas/core/series.py

-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
305305
hasnans = property( # type: ignore[assignment]
306306
base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__
307307
)
308-
__hash__ = generic.NDFrame.__hash__
309308
_mgr: SingleManager
310309
div: Callable[[Series, Any], Series]
311310
rdiv: Callable[[Series, Any], Series]

pandas/io/excel/_odswriter.py

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(
2929
storage_options: StorageOptions = None,
3030
if_sheet_exists: str | None = None,
3131
engine_kwargs: dict[str, Any] | None = None,
32+
**kwargs,
3233
):
3334
from odf.opendocument import OpenDocumentSpreadsheet
3435

pandas/io/excel/_openpyxl.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
BaseExcelReader,
2020
ExcelWriter,
2121
)
22-
from pandas.io.excel._util import validate_freeze_panes
22+
from pandas.io.excel._util import (
23+
combine_kwargs,
24+
validate_freeze_panes,
25+
)
2326

2427
if TYPE_CHECKING:
2528
from openpyxl.descriptors.serialisable import Serialisable
@@ -39,10 +42,13 @@ def __init__(
3942
storage_options: StorageOptions = None,
4043
if_sheet_exists: str | None = None,
4144
engine_kwargs: dict[str, Any] | None = None,
45+
**kwargs,
4246
):
4347
# Use the openpyxl module as the Excel writer.
4448
from openpyxl.workbook import Workbook
4549

50+
engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
51+
4652
super().__init__(
4753
path,
4854
mode=mode,

pandas/io/excel/_util.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from __future__ import annotations
22

3-
from typing import MutableMapping
3+
from typing import (
4+
Any,
5+
MutableMapping,
6+
)
47

58
from pandas.compat._optional import import_optional_dependency
69

@@ -246,3 +249,30 @@ def pop_header_name(row, index_col):
246249
header_name = None if header_name == "" else header_name
247250

248251
return header_name, row[:i] + [""] + row[i + 1 :]
252+
253+
254+
def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict:
255+
"""
256+
Used to combine two sources of kwargs for the backend engine.
257+
258+
Use of kwargs is deprecated, this function is solely for use in 1.3 and should
259+
be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
260+
or kwargs must be None or empty respectively.
261+
262+
Parameters
263+
----------
264+
engine_kwargs: dict
265+
kwargs to be passed through to the engine.
266+
kwargs: dict
267+
kwargs to be psased through to the engine (deprecated)
268+
269+
Returns
270+
-------
271+
engine_kwargs combined with kwargs
272+
"""
273+
if engine_kwargs is None:
274+
result = {}
275+
else:
276+
result = engine_kwargs.copy()
277+
result.update(kwargs)
278+
return result

pandas/io/excel/_xlsxwriter.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
from pandas._typing import StorageOptions
77

88
from pandas.io.excel._base import ExcelWriter
9-
from pandas.io.excel._util import validate_freeze_panes
9+
from pandas.io.excel._util import (
10+
combine_kwargs,
11+
validate_freeze_panes,
12+
)
1013

1114

1215
class _XlsxStyler:
@@ -175,11 +178,12 @@ def __init__(
175178
storage_options: StorageOptions = None,
176179
if_sheet_exists: str | None = None,
177180
engine_kwargs: dict[str, Any] | None = None,
181+
**kwargs,
178182
):
179183
# Use the xlsxwriter module as the Excel writer.
180184
from xlsxwriter import Workbook
181185

182-
engine_kwargs = engine_kwargs or {}
186+
engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
183187

184188
if mode == "a":
185189
raise ValueError("Append mode is not supported with xlsxwriter!")

pandas/io/excel/_xlwt.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
from pandas._typing import StorageOptions
1010

1111
from pandas.io.excel._base import ExcelWriter
12-
from pandas.io.excel._util import validate_freeze_panes
12+
from pandas.io.excel._util import (
13+
combine_kwargs,
14+
validate_freeze_panes,
15+
)
1316

1417
if TYPE_CHECKING:
1518
from xlwt import XFStyle
@@ -30,10 +33,13 @@ def __init__(
3033
storage_options: StorageOptions = None,
3134
if_sheet_exists: str | None = None,
3235
engine_kwargs: dict[str, Any] | None = None,
36+
**kwargs,
3337
):
3438
# Use the xlwt module as the Excel writer.
3539
import xlwt
3640

41+
engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
42+
3743
if mode == "a":
3844
raise ValueError("Append mode is not supported with xlwt!")
3945

pandas/tests/arrays/string_/test_string.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
This module tests the functionality of StringArray and ArrowStringArray.
33
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
44
"""
5-
6-
import re
7-
85
import numpy as np
96
import pytest
107

@@ -314,7 +311,7 @@ def test_astype_int(dtype):
314311
tm.assert_numpy_array_equal(result, expected)
315312

316313
arr = pd.array(["1", pd.NA, "3"], dtype=dtype)
317-
msg = re.escape("int() argument must be a string, a bytes-like object or a number")
314+
msg = r"int\(\) argument must be a string, a bytes-like object or a( real)? number"
318315
with pytest.raises(TypeError, match=msg):
319316
arr.astype("int64")
320317

pandas/tests/extension/test_sparse.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
be added to the array-specific tests in `pandas/tests/arrays/`.
1414
1515
"""
16+
1617
import numpy as np
1718
import pytest
1819

0 commit comments

Comments
 (0)