pandas-dev
diff --git a/‎asv_bench/benchmarks/index_object.py
+16-8 b/‎asv_bench/benchmarks/index_object.py
+16-8
diff --git a/‎ci/code_checks.sh
+1-1 b/‎ci/code_checks.sh
+1-1
diff --git a/‎ci/deps/azure-37-locale_slow.yaml
+1-1 b/‎ci/deps/azure-37-locale_slow.yaml
+1-1
diff --git a/‎ci/deps/azure-37-minimum_versions.yaml
+1-1 b/‎ci/deps/azure-37-minimum_versions.yaml
+1-1
diff --git a/‎doc/source/getting_started/install.rst
+1-1 b/‎doc/source/getting_started/install.rst
+1-1
diff --git a/‎doc/source/user_guide/cookbook.rst
+1-1 b/‎doc/source/user_guide/cookbook.rst
+1-1
diff --git a/‎doc/source/user_guide/io.rst
+7-4 b/‎doc/source/user_guide/io.rst
+7-4
diff --git a/‎doc/source/whatsnew/v1.1.1.rst
+6-2 b/‎doc/source/whatsnew/v1.1.1.rst
+6-2
diff --git a/‎doc/source/whatsnew/v1.2.0.rst
+3-1 b/‎doc/source/whatsnew/v1.2.0.rst
+3-1
diff --git a/‎pandas/_config/localization.py
+8-6 b/‎pandas/_config/localization.py
+8-6
diff --git a/‎pandas/_libs/algos.pyx
+4-3 b/‎pandas/_libs/algos.pyx
+4-3
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
-7 b/‎pandas/_libs/tslibs/offsets.pyx
-7
diff --git a/‎pandas/_typing.py
+5 b/‎pandas/_typing.py
+5
diff --git a/‎pandas/compat/_optional.py
+1-1 b/‎pandas/compat/_optional.py
+1-1
diff --git a/‎pandas/core/computation/expr.py
+3-4 b/‎pandas/core/computation/expr.py
+3-4
diff --git a/‎pandas/core/frame.py
+1-1 b/‎pandas/core/frame.py
+1-1
diff --git a/‎pandas/core/generic.py
+10-3 b/‎pandas/core/generic.py
+10-3
@@ -57,8 +57,8 @@ def time_datetime_difference_disjoint(self):
 
 class Range:
     def setup(self):
-        self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)
-        self.idx_dec = RangeIndex(start=10 ** 7, stop=-1, step=-3)
+        self.idx_inc = RangeIndex(start=0, stop=10 ** 6, step=3)
+        self.idx_dec = RangeIndex(start=10 ** 6, stop=-1, step=-3)
 
     def time_max(self):
         self.idx_inc.max()
@@ -73,15 +73,23 @@ def time_min_trivial(self):
         self.idx_inc.min()
 
     def time_get_loc_inc(self):
-        self.idx_inc.get_loc(900000)
+        self.idx_inc.get_loc(900_000)
 
     def time_get_loc_dec(self):
-        self.idx_dec.get_loc(100000)
+        self.idx_dec.get_loc(100_000)
+
+    def time_iter_inc(self):
+        for _ in self.idx_inc:
+            pass
+
+    def time_iter_dec(self):
+        for _ in self.idx_dec:
+            pass
 
 
 class IndexEquals:
     def setup(self):
-        idx_large_fast = RangeIndex(100000)
+        idx_large_fast = RangeIndex(100_000)
         idx_small_slow = date_range(start="1/1/2012", periods=1)
         self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
 
@@ -94,7 +102,7 @@ def time_non_object_equals_multiindex(self):
 class IndexAppend:
     def setup(self):
 
-        N = 10000
+        N = 10_000
         self.range_idx = RangeIndex(0, 100)
         self.int_idx = self.range_idx.astype(int)
         self.obj_idx = self.int_idx.astype(str)
@@ -168,7 +176,7 @@ def time_get_loc_non_unique_sorted(self, dtype):
 class Float64IndexMethod:
     # GH 13166
     def setup(self):
-        N = 100000
+        N = 100_000
         a = np.arange(N)
         self.ind = Float64Index(a * 4.8000000418824129e-08)
 
@@ -212,7 +220,7 @@ class GC:
     params = [1, 2, 5]
 
     def create_use_drop(self):
-        idx = Index(list(range(1000 * 1000)))
+        idx = Index(list(range(1_000_000)))
         idx._engine
 
     def peakmem_gc_instances(self, N):
 
@@ -121,7 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
 
     # Imports - Check formatting using isort see setup.cfg for settings
     MSG='Check import format using isort' ; echo $MSG
-    ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts"
+    ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web"
     if [[ "$GITHUB_ACTIONS" == "true" ]]; then
         eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
     else
 
@@ -24,7 +24,7 @@ dependencies:
   - pytz=2017.3
   - scipy
   - sqlalchemy=1.2.8
-  - xlrd=1.1.0
+  - xlrd=1.2.0
   - xlsxwriter=1.0.2
   - xlwt=1.3.0
   - html5lib=1.0.1
@@ -25,7 +25,7 @@ dependencies:
   - pytz=2017.3
   - pyarrow=0.15
   - scipy=1.2
-  - xlrd=1.1.0
+  - xlrd=1.2.0
   - xlsxwriter=1.0.2
   - xlwt=1.3.0
   - html5lib=1.0.1
@@ -287,7 +287,7 @@ s3fs                      0.4.0              Amazon S3 access
 tabulate                  0.8.3              Printing in Markdown-friendly format (see `tabulate`_)
 xarray                    0.12.0             pandas-like API for N-dimensional data
 xclip                                        Clipboard I/O on linux
-xlrd                      1.1.0              Excel reading
+xlrd                      1.2.0              Excel reading
 xlwt                      1.3.0              Excel writing
 xsel                                         Clipboard I/O on linux
 zlib                                         Compression for HDF5
 
@@ -765,7 +765,7 @@ Timeseries
 <https://stackoverflow.com/questions/13893227/vectorized-look-up-of-values-in-pandas-dataframe>`__
 
 `Aggregation and plotting time series
-<http://nipunbatra.github.io/2015/06/timeseries/>`__
+<https://nipunbatra.github.io/blog/visualisation/2013/05/01/aggregation-timeseries.html>`__
 
 Turn a matrix with hours in columns and days in rows into a continuous row sequence in the form of a time series.
 `How to rearrange a Python pandas DataFrame?
 
@@ -287,16 +287,19 @@ Quoting, compression, and file format
 
 compression : {``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``None``, ``dict``}, default ``'infer'``
   For on-the-fly decompression of on-disk data. If 'infer', then use gzip,
-  bz2, zip, or xz if filepath_or_buffer is a string ending in '.gz', '.bz2',
+  bz2, zip, or xz if ``filepath_or_buffer`` is path-like ending in '.gz', '.bz2',
   '.zip', or '.xz', respectively, and no decompression otherwise. If using 'zip',
   the ZIP file must contain only one data file to be read in.
   Set to ``None`` for no decompression. Can also be a dict with key ``'method'``
-  set to one of {``'zip'``, ``'gzip'``, ``'bz2'``}, and other keys set to
-  compression settings. As an example, the following could be passed for
-  faster compression: ``compression={'method': 'gzip', 'compresslevel': 1}``.
+  set to one of {``'zip'``, ``'gzip'``, ``'bz2'``} and other key-value pairs are
+  forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, or ``bz2.BZ2File``.
+  As an example, the following could be passed for faster compression and to
+  create a reproducible gzip archive:
+  ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
 
   .. versionchanged:: 0.24.0 'infer' option added and set to default.
   .. versionchanged:: 1.1.0 dict option extended to support ``gzip`` and ``bz2``.
+  .. versionchanged:: 1.2.0 Previous versions forwarded dict entries for 'gzip' to `gzip.open`.
 thousands : str, default ``None``
   Thousands separator.
 decimal : str, default ``'.'``
 
@@ -16,14 +16,18 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 
 - Fixed regression where :meth:`DataFrame.to_numpy` would raise a ``RuntimeError`` for mixed dtypes when converting to ``str`` (:issue:`35455`)
-- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`).
+- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`)
 - Fixed regression where :func:`pandas.testing.assert_series_equal` would raise an error when non-numeric dtypes were passed with ``check_exact=True`` (:issue:`35446`)
 - Fixed regression in :class:`pandas.core.groupby.RollingGroupby` where column selection was ignored (:issue:`35486`)
 - Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`)
+- Fixed regression in :meth:`DataFrame.diff` with read-only data (:issue:`35559`)
 - Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`)
 - Fixed regression in :meth:`DataFrame.apply` where functions that altered the input in-place only operated on a single row (:issue:`35462`)
+- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`)
+- Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`)
 - Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)
 - Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` where compiled regular expressions would be ignored during replacement (:issue:`35680`)
+- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`)
 
 .. ---------------------------------------------------------------------------
 
@@ -32,7 +36,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 
-- Bug in ``Styler`` whereby `cell_ids` argument had no effect due to other recent changes (:issue:`35588`).
+- Bug in ``Styler`` whereby `cell_ids` argument had no effect due to other recent changes (:issue:`35588`) (:issue:`35663`).
 
 Categorical
 ^^^^^^^^^^^
 
@@ -122,7 +122,7 @@ Optional libraries below the lowest tested version may still work, but are not c
 +-----------------+-----------------+---------+
 | xarray          | 0.12.0          |    X    |
 +-----------------+-----------------+---------+
-| xlrd            | 1.1.0           |         |
+| xlrd            | 1.2.0           |    X    |
 +-----------------+-----------------+---------+
 | xlsxwriter      | 1.0.2           |    X    |
 +-----------------+-----------------+---------+
@@ -172,6 +172,7 @@ Datetimelike
 ^^^^^^^^^^^^
 - Bug in :attr:`DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`)
 - Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`)
+- Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`)
 -
 
 Timedelta
@@ -235,6 +236,7 @@ I/O
 - Bug in :meth:`to_csv` caused a ``ValueError`` when it was called with a filename in combination with ``mode`` containing a ``b`` (:issue:`35058`)
 - In :meth:`read_csv` `float_precision='round_trip'` now handles `decimal` and `thousands` parameters (:issue:`35365`)
 - :meth:`to_pickle` and :meth:`read_pickle` were closing user-provided file objects (:issue:`35679`)
+- :meth:`to_csv` passes compression arguments for `'gzip'` always to `gzip.GzipFile` (:issue:`28103`)
 
 Plotting
 ^^^^^^^^
 
@@ -88,12 +88,14 @@ def _valid_locales(locales, normalize):
     valid_locales : list
         A list of valid locales.
     """
-    if normalize:
-        normalizer = lambda x: locale.normalize(x.strip())
-    else:
-        normalizer = lambda x: x.strip()
-
-    return list(filter(can_set_locale, map(normalizer, locales)))
+    return [
+        loc
+        for loc in (
+            locale.normalize(loc.strip()) if normalize else loc.strip()
+            for loc in locales
+        )
+        if can_set_locale(loc)
+    ]
 
 
 def _default_locale_getter():
 
@@ -1200,14 +1200,15 @@ ctypedef fused out_t:
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def diff_2d(
-    diff_t[:, :] arr,
-    out_t[:, :] out,
+    ndarray[diff_t, ndim=2] arr,  # TODO(cython 3) update to "const diff_t[:, :] arr"
+    ndarray[out_t, ndim=2] out,
     Py_ssize_t periods,
     int axis,
 ):
     cdef:
         Py_ssize_t i, j, sx, sy, start, stop
-        bint f_contig = arr.is_f_contig()
+        bint f_contig = arr.flags.f_contiguous
+        # bint f_contig = arr.is_f_contig()  # TODO(cython 3)
 
     # Disable for unsupported dtype combinations,
     #  see https://github.com/cython/cython/issues/2646
 
@@ -989,13 +989,6 @@ cdef class RelativeDeltaOffset(BaseOffset):
             state["_offset"] = state.pop("offset")
             state["kwds"]["offset"] = state["_offset"]
 
-        if "_offset" in state and not isinstance(state["_offset"], timedelta):
-            # relativedelta, we need to populate using its kwds
-            offset = state["_offset"]
-            odict = offset.__dict__
-            kwds = {key: odict[key] for key in odict if odict[key]}
-            state.update(kwds)
-
         self.n = state.pop("n")
         self.normalize = state.pop("normalize")
         self._cache = state.pop("_cache", {})
 
@@ -109,3 +109,8 @@
 
 # for arbitrary kwargs passed during reading/writing files
 StorageOptions = Optional[Dict[str, Any]]
+
+
+# compression keywords and compression
+CompressionDict = Mapping[str, Optional[Union[str, int, bool]]]
+CompressionOptions = Optional[Union[str, CompressionDict]]
@@ -27,7 +27,7 @@
     "tables": "3.4.3",
     "tabulate": "0.8.3",
     "xarray": "0.8.2",
-    "xlrd": "1.1.0",
+    "xlrd": "1.2.0",
     "xlwt": "1.2.0",
     "xlsxwriter": "0.9.8",
     "numba": "0.46.0",
 
@@ -167,10 +167,9 @@ def _is_type(t):
 
 # partition all AST nodes
 _all_nodes = frozenset(
-    filter(
-        lambda x: isinstance(x, type) and issubclass(x, ast.AST),
-        (getattr(ast, node) for node in dir(ast)),
-    )
+    node
+    for node in (getattr(ast, name) for name in dir(ast))
+    if isinstance(node, type) and issubclass(node, ast.AST)
 )
 
 
 
@@ -4816,7 +4816,7 @@ def _maybe_casted_values(index, labels=None):
 
                 # we can have situations where the whole mask is -1,
                 # meaning there is nothing found in labels, so make all nan's
-                if mask.all():
+                if mask.size > 0 and mask.all():
                     dtype = index.dtype
                     fill_value = na_value_for_dtype(dtype)
                     values = construct_1d_arraylike_from_scalar(
 
@@ -35,6 +35,7 @@
 from pandas._libs.tslibs import Tick, Timestamp, to_offset
 from pandas._typing import (
     Axis,
+    CompressionOptions,
     FilePathOrBuffer,
     FrameOrSeries,
     JSONSerializable,
@@ -2058,7 +2059,7 @@ def to_json(
         date_unit: str = "ms",
         default_handler: Optional[Callable[[Any], JSONSerializable]] = None,
         lines: bool_t = False,
-        compression: Optional[str] = "infer",
+        compression: CompressionOptions = "infer",
         index: bool_t = True,
         indent: Optional[int] = None,
         storage_options: StorageOptions = None,
@@ -2646,7 +2647,7 @@ def to_sql(
     def to_pickle(
         self,
         path,
-        compression: Optional[str] = "infer",
+        compression: CompressionOptions = "infer",
         protocol: int = pickle.HIGHEST_PROTOCOL,
         storage_options: StorageOptions = None,
     ) -> None:
@@ -3053,7 +3054,7 @@ def to_csv(
         index_label: Optional[Union[bool_t, str, Sequence[Label]]] = None,
         mode: str = "w",
         encoding: Optional[str] = None,
-        compression: Optional[Union[str, Mapping[str, str]]] = "infer",
+        compression: CompressionOptions = "infer",
         quoting: Optional[int] = None,
         quotechar: str = '"',
         line_terminator: Optional[str] = None,
@@ -3144,6 +3145,12 @@ def to_csv(
 
                 Compression is supported for binary file objects.
 
+            .. versionchanged:: 1.2.0
+
+                Previous versions forwarded dict entries for 'gzip' to
+                `gzip.open` instead of `gzip.GzipFile` which prevented
+                setting `mtime`.
+
         quoting : optional constant from csv module
             Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
             then floats are converted to strings and thus csv.QUOTE_NONNUMERIC