Skip to content

Commit 3d5c167

Browse files
committed
Merge remote-tracking branch 'upstream/master' into compiled-regex-replace
2 parents a05f3b2 + a3f5c6a commit 3d5c167

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+417
-466
lines changed

asv_bench/benchmarks/index_object.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ def time_datetime_difference_disjoint(self):
5757

5858
class Range:
5959
def setup(self):
60-
self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)
61-
self.idx_dec = RangeIndex(start=10 ** 7, stop=-1, step=-3)
60+
self.idx_inc = RangeIndex(start=0, stop=10 ** 6, step=3)
61+
self.idx_dec = RangeIndex(start=10 ** 6, stop=-1, step=-3)
6262

6363
def time_max(self):
6464
self.idx_inc.max()
@@ -73,15 +73,23 @@ def time_min_trivial(self):
7373
self.idx_inc.min()
7474

7575
def time_get_loc_inc(self):
76-
self.idx_inc.get_loc(900000)
76+
self.idx_inc.get_loc(900_000)
7777

7878
def time_get_loc_dec(self):
79-
self.idx_dec.get_loc(100000)
79+
self.idx_dec.get_loc(100_000)
80+
81+
def time_iter_inc(self):
82+
for _ in self.idx_inc:
83+
pass
84+
85+
def time_iter_dec(self):
86+
for _ in self.idx_dec:
87+
pass
8088

8189

8290
class IndexEquals:
8391
def setup(self):
84-
idx_large_fast = RangeIndex(100000)
92+
idx_large_fast = RangeIndex(100_000)
8593
idx_small_slow = date_range(start="1/1/2012", periods=1)
8694
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
8795

@@ -94,7 +102,7 @@ def time_non_object_equals_multiindex(self):
94102
class IndexAppend:
95103
def setup(self):
96104

97-
N = 10000
105+
N = 10_000
98106
self.range_idx = RangeIndex(0, 100)
99107
self.int_idx = self.range_idx.astype(int)
100108
self.obj_idx = self.int_idx.astype(str)
@@ -168,7 +176,7 @@ def time_get_loc_non_unique_sorted(self, dtype):
168176
class Float64IndexMethod:
169177
# GH 13166
170178
def setup(self):
171-
N = 100000
179+
N = 100_000
172180
a = np.arange(N)
173181
self.ind = Float64Index(a * 4.8000000418824129e-08)
174182

@@ -212,7 +220,7 @@ class GC:
212220
params = [1, 2, 5]
213221

214222
def create_use_drop(self):
215-
idx = Index(list(range(1000 * 1000)))
223+
idx = Index(list(range(1_000_000)))
216224
idx._engine
217225

218226
def peakmem_gc_instances(self, N):

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
121121

122122
# Imports - Check formatting using isort see setup.cfg for settings
123123
MSG='Check import format using isort' ; echo $MSG
124-
ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts"
124+
ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web"
125125
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
126126
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
127127
else

ci/deps/azure-37-locale_slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies:
2424
- pytz=2017.3
2525
- scipy
2626
- sqlalchemy=1.2.8
27-
- xlrd=1.1.0
27+
- xlrd=1.2.0
2828
- xlsxwriter=1.0.2
2929
- xlwt=1.3.0
3030
- html5lib=1.0.1

ci/deps/azure-37-minimum_versions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies:
2525
- pytz=2017.3
2626
- pyarrow=0.15
2727
- scipy=1.2
28-
- xlrd=1.1.0
28+
- xlrd=1.2.0
2929
- xlsxwriter=1.0.2
3030
- xlwt=1.3.0
3131
- html5lib=1.0.1

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ s3fs 0.4.0 Amazon S3 access
287287
tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_)
288288
xarray 0.12.0 pandas-like API for N-dimensional data
289289
xclip Clipboard I/O on linux
290-
xlrd 1.1.0 Excel reading
290+
xlrd 1.2.0 Excel reading
291291
xlwt 1.3.0 Excel writing
292292
xsel Clipboard I/O on linux
293293
zlib Compression for HDF5

doc/source/user_guide/cookbook.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ Timeseries
765765
<https://stackoverflow.com/questions/13893227/vectorized-look-up-of-values-in-pandas-dataframe>`__
766766

767767
`Aggregation and plotting time series
768-
<http://nipunbatra.github.io/2015/06/timeseries/>`__
768+
<https://nipunbatra.github.io/blog/visualisation/2013/05/01/aggregation-timeseries.html>`__
769769

770770
Turn a matrix with hours in columns and days in rows into a continuous row sequence in the form of a time series.
771771
`How to rearrange a Python pandas DataFrame?

doc/source/user_guide/io.rst

+7-4
Original file line numberDiff line numberDiff line change
@@ -287,16 +287,19 @@ Quoting, compression, and file format
287287

288288
compression : {``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``None``, ``dict``}, default ``'infer'``
289289
For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
290-
bz2, zip, or xz if filepath_or_buffer is a string ending in '.gz', '.bz2',
290+
bz2, zip, or xz if ``filepath_or_buffer`` is path-like ending in '.gz', '.bz2',
291291
'.zip', or '.xz', respectively, and no decompression otherwise. If using 'zip',
292292
the ZIP file must contain only one data file to be read in.
293293
Set to ``None`` for no decompression. Can also be a dict with key ``'method'``
294-
set to one of {``'zip'``, ``'gzip'``, ``'bz2'``}, and other keys set to
295-
compression settings. As an example, the following could be passed for
296-
faster compression: ``compression={'method': 'gzip', 'compresslevel': 1}``.
294+
set to one of {``'zip'``, ``'gzip'``, ``'bz2'``} and other key-value pairs are
295+
forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, or ``bz2.BZ2File``.
296+
As an example, the following could be passed for faster compression and to
297+
create a reproducible gzip archive:
298+
``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
297299

298300
.. versionchanged:: 0.24.0 'infer' option added and set to default.
299301
.. versionchanged:: 1.1.0 dict option extended to support ``gzip`` and ``bz2``.
302+
.. versionchanged:: 1.2.0 Previous versions forwarded dict entries for 'gzip' to `gzip.open`.
300303
thousands : str, default ``None``
301304
Thousands separator.
302305
decimal : str, default ``'.'``

doc/source/whatsnew/v1.1.1.rst

+6-2
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,18 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

1818
- Fixed regression where :meth:`DataFrame.to_numpy` would raise a ``RuntimeError`` for mixed dtypes when converting to ``str`` (:issue:`35455`)
19-
- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`).
19+
- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`)
2020
- Fixed regression where :func:`pandas.testing.assert_series_equal` would raise an error when non-numeric dtypes were passed with ``check_exact=True`` (:issue:`35446`)
2121
- Fixed regression in :class:`pandas.core.groupby.RollingGroupby` where column selection was ignored (:issue:`35486`)
2222
- Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`)
23+
- Fixed regression in :meth:`DataFrame.diff` with read-only data (:issue:`35559`)
2324
- Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`)
2425
- Fixed regression in :meth:`DataFrame.apply` where functions that altered the input in-place only operated on a single row (:issue:`35462`)
26+
- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`)
27+
- Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`)
2528
- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)
2629
- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` where compiled regular expressions would be ignored during replacement (:issue:`35680`)
30+
- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`)
2731

2832
.. ---------------------------------------------------------------------------
2933
@@ -32,7 +36,7 @@ Fixed regressions
3236
Bug fixes
3337
~~~~~~~~~
3438

35-
- Bug in ``Styler`` whereby `cell_ids` argument had no effect due to other recent changes (:issue:`35588`).
39+
- Bug in ``Styler`` whereby `cell_ids` argument had no effect due to other recent changes (:issue:`35588`) (:issue:`35663`).
3640

3741
Categorical
3842
^^^^^^^^^^^

doc/source/whatsnew/v1.2.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ Optional libraries below the lowest tested version may still work, but are not c
122122
+-----------------+-----------------+---------+
123123
| xarray | 0.12.0 | X |
124124
+-----------------+-----------------+---------+
125-
| xlrd | 1.1.0 | |
125+
| xlrd | 1.2.0 | X |
126126
+-----------------+-----------------+---------+
127127
| xlsxwriter | 1.0.2 | X |
128128
+-----------------+-----------------+---------+
@@ -172,6 +172,7 @@ Datetimelike
172172
^^^^^^^^^^^^
173173
- Bug in :attr:`DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`)
174174
- Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`)
175+
- Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`)
175176
-
176177

177178
Timedelta
@@ -235,6 +236,7 @@ I/O
235236
- Bug in :meth:`to_csv` caused a ``ValueError`` when it was called with a filename in combination with ``mode`` containing a ``b`` (:issue:`35058`)
236237
- In :meth:`read_csv` `float_precision='round_trip'` now handles `decimal` and `thousands` parameters (:issue:`35365`)
237238
- :meth:`to_pickle` and :meth:`read_pickle` were closing user-provided file objects (:issue:`35679`)
239+
- :meth:`to_csv` passes compression arguments for `'gzip'` always to `gzip.GzipFile` (:issue:`28103`)
238240

239241
Plotting
240242
^^^^^^^^

pandas/_config/localization.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,14 @@ def _valid_locales(locales, normalize):
8888
valid_locales : list
8989
A list of valid locales.
9090
"""
91-
if normalize:
92-
normalizer = lambda x: locale.normalize(x.strip())
93-
else:
94-
normalizer = lambda x: x.strip()
95-
96-
return list(filter(can_set_locale, map(normalizer, locales)))
91+
return [
92+
loc
93+
for loc in (
94+
locale.normalize(loc.strip()) if normalize else loc.strip()
95+
for loc in locales
96+
)
97+
if can_set_locale(loc)
98+
]
9799

98100

99101
def _default_locale_getter():

pandas/_libs/algos.pyx

+4-3
Original file line numberDiff line numberDiff line change
@@ -1200,14 +1200,15 @@ ctypedef fused out_t:
12001200
@cython.boundscheck(False)
12011201
@cython.wraparound(False)
12021202
def diff_2d(
1203-
diff_t[:, :] arr,
1204-
out_t[:, :] out,
1203+
ndarray[diff_t, ndim=2] arr, # TODO(cython 3) update to "const diff_t[:, :] arr"
1204+
ndarray[out_t, ndim=2] out,
12051205
Py_ssize_t periods,
12061206
int axis,
12071207
):
12081208
cdef:
12091209
Py_ssize_t i, j, sx, sy, start, stop
1210-
bint f_contig = arr.is_f_contig()
1210+
bint f_contig = arr.flags.f_contiguous
1211+
# bint f_contig = arr.is_f_contig() # TODO(cython 3)
12111212

12121213
# Disable for unsupported dtype combinations,
12131214
# see https://github.com/cython/cython/issues/2646

pandas/_libs/tslibs/offsets.pyx

-7
Original file line numberDiff line numberDiff line change
@@ -989,13 +989,6 @@ cdef class RelativeDeltaOffset(BaseOffset):
989989
state["_offset"] = state.pop("offset")
990990
state["kwds"]["offset"] = state["_offset"]
991991

992-
if "_offset" in state and not isinstance(state["_offset"], timedelta):
993-
# relativedelta, we need to populate using its kwds
994-
offset = state["_offset"]
995-
odict = offset.__dict__
996-
kwds = {key: odict[key] for key in odict if odict[key]}
997-
state.update(kwds)
998-
999992
self.n = state.pop("n")
1000993
self.normalize = state.pop("normalize")
1001994
self._cache = state.pop("_cache", {})

pandas/_typing.py

+5
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,8 @@
109109

110110
# for arbitrary kwargs passed during reading/writing files
111111
StorageOptions = Optional[Dict[str, Any]]
112+
113+
114+
# compression keywords and compression
115+
CompressionDict = Mapping[str, Optional[Union[str, int, bool]]]
116+
CompressionOptions = Optional[Union[str, CompressionDict]]

pandas/compat/_optional.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
"tables": "3.4.3",
2828
"tabulate": "0.8.3",
2929
"xarray": "0.8.2",
30-
"xlrd": "1.1.0",
30+
"xlrd": "1.2.0",
3131
"xlwt": "1.2.0",
3232
"xlsxwriter": "0.9.8",
3333
"numba": "0.46.0",

pandas/core/computation/expr.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,9 @@ def _is_type(t):
167167

168168
# partition all AST nodes
169169
_all_nodes = frozenset(
170-
filter(
171-
lambda x: isinstance(x, type) and issubclass(x, ast.AST),
172-
(getattr(ast, node) for node in dir(ast)),
173-
)
170+
node
171+
for node in (getattr(ast, name) for name in dir(ast))
172+
if isinstance(node, type) and issubclass(node, ast.AST)
174173
)
175174

176175

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4816,7 +4816,7 @@ def _maybe_casted_values(index, labels=None):
48164816

48174817
# we can have situations where the whole mask is -1,
48184818
# meaning there is nothing found in labels, so make all nan's
4819-
if mask.all():
4819+
if mask.size > 0 and mask.all():
48204820
dtype = index.dtype
48214821
fill_value = na_value_for_dtype(dtype)
48224822
values = construct_1d_arraylike_from_scalar(

pandas/core/generic.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from pandas._libs.tslibs import Tick, Timestamp, to_offset
3636
from pandas._typing import (
3737
Axis,
38+
CompressionOptions,
3839
FilePathOrBuffer,
3940
FrameOrSeries,
4041
JSONSerializable,
@@ -2058,7 +2059,7 @@ def to_json(
20582059
date_unit: str = "ms",
20592060
default_handler: Optional[Callable[[Any], JSONSerializable]] = None,
20602061
lines: bool_t = False,
2061-
compression: Optional[str] = "infer",
2062+
compression: CompressionOptions = "infer",
20622063
index: bool_t = True,
20632064
indent: Optional[int] = None,
20642065
storage_options: StorageOptions = None,
@@ -2646,7 +2647,7 @@ def to_sql(
26462647
def to_pickle(
26472648
self,
26482649
path,
2649-
compression: Optional[str] = "infer",
2650+
compression: CompressionOptions = "infer",
26502651
protocol: int = pickle.HIGHEST_PROTOCOL,
26512652
storage_options: StorageOptions = None,
26522653
) -> None:
@@ -3053,7 +3054,7 @@ def to_csv(
30533054
index_label: Optional[Union[bool_t, str, Sequence[Label]]] = None,
30543055
mode: str = "w",
30553056
encoding: Optional[str] = None,
3056-
compression: Optional[Union[str, Mapping[str, str]]] = "infer",
3057+
compression: CompressionOptions = "infer",
30573058
quoting: Optional[int] = None,
30583059
quotechar: str = '"',
30593060
line_terminator: Optional[str] = None,
@@ -3144,6 +3145,12 @@ def to_csv(
31443145
31453146
Compression is supported for binary file objects.
31463147
3148+
.. versionchanged:: 1.2.0
3149+
3150+
Previous versions forwarded dict entries for 'gzip' to
3151+
`gzip.open` instead of `gzip.GzipFile` which prevented
3152+
setting `mtime`.
3153+
31473154
quoting : optional constant from csv module
31483155
Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
31493156
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC

0 commit comments

Comments
 (0)