sthagen · sthagen · Aug 5, 2021 · Aug 4, 2021 · Aug 4, 2021 · Aug 4, 2021
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
@@ -1211,6 +1211,9 @@ The :ref:`Excel <io.excel>` docs
 `Modifying formatting in XlsxWriter output
 <https://pbpython.com/improve-pandas-excel-output.html>`__
 
+`Loading only visible sheets
+<https://github.com/pandas-dev/pandas/issues/19842#issuecomment-892150745>`__
+
 .. _cookbook.html:
 
 HTML

diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst
@@ -30,6 +30,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
+- Bug in :meth:`pandas.read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`)
 - 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`)
 - :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`)
 

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -160,6 +160,8 @@ Deprecations
 - Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`)
 - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
 - Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
+- Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`)
+-
 
 .. ---------------------------------------------------------------------------
 
@@ -234,6 +236,8 @@ Indexing
 - Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
 - Bug in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` when passing an ascending value, failed to raise or incorrectly raising ``ValueError`` (:issue:`41634`)
 - Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
+- Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`)
+-
 
 Missing
 ^^^^^^^

diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py
@@ -49,6 +49,7 @@ def create_valid_python_identifier(name: str) -> str:
             "!": "_EXCLAMATIONMARK_",
             "$": "_DOLLARSIGN_",
             "€": "_EUROSIGN_",
+            "°": "_DEGREESIGN_",
             # Including quotes works, but there are exceptions.
             "'": "_SINGLEQUOTE_",
             '"': "_DOUBLEQUOTE_",

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4840,6 +4840,8 @@ def _reindex_axes(
                 copy=copy,
                 allow_dups=False,
             )
+            # If we've made a copy once, no need to make another one
+            copy = False
 
         return obj
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -3031,15 +3031,19 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         if freq is not None or axis != 0:
             return self.apply(lambda x: x.shift(periods, freq, axis, fill_value))
 
-        return self._get_cythonized_result(
-            "group_shift_indexer",
-            numeric_only=False,
-            cython_dtype=np.dtype(np.int64),
-            needs_ngroups=True,
-            result_is_index=True,
-            periods=periods,
+        ids, _, ngroups = self.grouper.group_info
+        res_indexer = np.zeros(len(ids), dtype=np.int64)
+
+        libgroupby.group_shift_indexer(res_indexer, ids, ngroups, periods)
+
+        obj = self._obj_with_exclusions
+
+        res = obj._reindex_with_indexers(
+            {self.axis: (obj.axes[self.axis], res_indexer)},
             fill_value=fill_value,
+            allow_dups=True,
         )
+        return res
 
     @final
     @Substitution(name="groupby")

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -5839,7 +5839,7 @@ def slice_indexer(
         start: Hashable | None = None,
         end: Hashable | None = None,
         step: int | None = None,
-        kind: str_t | None = None,
+        kind=no_default,
     ) -> slice:
         """
         Compute the slice indexer for input labels and step.
@@ -5855,6 +5855,8 @@ def slice_indexer(
         step : int, default None
         kind : str, default None
 
+            .. deprecated:: 1.4.0
+
         Returns
         -------
         indexer : slice
@@ -5880,6 +5882,8 @@ def slice_indexer(
         >>> idx.slice_indexer(start='b', end=('c', 'g'))
         slice(1, 3, None)
         """
+        self._deprecated_arg(kind, "kind", "slice_indexer")
+
         start_slice, end_slice = self.slice_locs(start, end, step=step)
 
         # return a slice
@@ -5928,6 +5932,8 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default):
         side : {'left', 'right'}
         kind : {'loc', 'getitem'} or None
 
+            .. deprecated:: 1.3.0
+
         Returns
         -------
         label : object
@@ -5962,7 +5968,7 @@ def _searchsorted_monotonic(self, label, side: str_t = "left"):
 
         raise ValueError("index must be monotonic increasing or decreasing")
 
-    def get_slice_bound(self, label, side: str_t, kind=None) -> int:
+    def get_slice_bound(self, label, side: str_t, kind=no_default) -> int:
         """
         Calculate slice bound that corresponds to given label.
 
@@ -5975,12 +5981,15 @@ def get_slice_bound(self, label, side: str_t, kind=None) -> int:
         side : {'left', 'right'}
         kind : {'loc', 'getitem'} or None
 
+            .. deprecated:: 1.4.0
+
         Returns
         -------
         int
             Index of label.
         """
-        assert kind in ["loc", "getitem", None]
+        assert kind in ["loc", "getitem", None, no_default]
+        self._deprecated_arg(kind, "kind", "get_slice_bound")
 
         if side not in ("left", "right"):
             raise ValueError(
@@ -6030,7 +6039,7 @@ def get_slice_bound(self, label, side: str_t, kind=None) -> int:
             else:
                 return slc
 
-    def slice_locs(self, start=None, end=None, step=None, kind=None):
+    def slice_locs(self, start=None, end=None, step=None, kind=no_default):
         """
         Compute slice locations for input labels.
 
@@ -6044,6 +6053,8 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
             If None, defaults to 1.
         kind : {'loc', 'getitem'} or None
 
+            .. deprecated:: 1.4.0
+
         Returns
         -------
         start, end : int
@@ -6062,6 +6073,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
         >>> idx.slice_locs(start='b', end='c')
         (1, 3)
         """
+        self._deprecated_arg(kind, "kind", "slice_locs")
         inc = step is None or step >= 0
 
         if not inc:

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -722,14 +722,14 @@ def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
             if self._is_strictly_monotonic_decreasing and len(self) > 1:
                 return upper if side == "left" else lower
             return lower if side == "left" else upper
-        elif isinstance(label, (self._data._recognized_scalars, date)):
+        elif isinstance(label, self._data._recognized_scalars):
             self._deprecate_mismatched_indexing(label)
         else:
             raise self._invalid_indexer("slice", label)
 
         return self._maybe_cast_for_get_loc(label)
 
-    def slice_indexer(self, start=None, end=None, step=None, kind=None):
+    def slice_indexer(self, start=None, end=None, step=None, kind=lib.no_default):
         """
         Return indexer for specified label slice.
         Index.slice_indexer, customized to handle time slicing.
@@ -743,6 +743,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
           value-based selection in non-monotonic cases.
 
         """
+        self._deprecated_arg(kind, "kind", "slice_indexer")
+
         # For historical reasons DatetimeIndex supports slices between two
         # instances of datetime.time as if it were applying a slice mask to
         # an array of (self.hour, self.minute, self.seconds, self.microsecond).
@@ -800,6 +802,13 @@ def check_str_or_none(point):
         else:
             return indexer
 
+    @doc(Index.get_slice_bound)
+    def get_slice_bound(self, label, side: str, kind=None) -> int:
+        # GH#42855 handle date here instead of _maybe_cast_slice_bound
+        if isinstance(label, date) and not isinstance(label, datetime):
+            label = Timestamp(label).to_pydatetime()
+        return super().get_slice_bound(label, side=side, kind=kind)
+
     # --------------------------------------------------------------------
 
     @property

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -2586,7 +2586,7 @@ def _get_indexer_level_0(self, target) -> np.ndarray:
         return ci.get_indexer_for(target)
 
     def get_slice_bound(
-        self, label: Hashable | Sequence[Hashable], side: str, kind: str | None = None
+        self, label: Hashable | Sequence[Hashable], side: str, kind=lib.no_default
     ) -> int:
         """
         For an ordered MultiIndex, compute slice bound
@@ -2601,6 +2601,8 @@ def get_slice_bound(
         side : {'left', 'right'}
         kind : {'loc', 'getitem', None}
 
+            .. deprecated:: 1.4.0
+
         Returns
         -------
         int
@@ -2632,11 +2634,13 @@ def get_slice_bound(
         MultiIndex.get_locs : Get location for a label/slice/list/mask or a
                               sequence of such.
         """
+        self._deprecated_arg(kind, "kind", "get_slice_bound")
+
         if not isinstance(label, tuple):
             label = (label,)
         return self._partial_tup_index(label, side=side)
 
-    def slice_locs(self, start=None, end=None, step=None, kind=None):
+    def slice_locs(self, start=None, end=None, step=None, kind=lib.no_default):
         """
         For an ordered MultiIndex, compute the slice locations for input
         labels.
@@ -2655,6 +2659,8 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
             Slice step
         kind : string, optional, defaults None
 
+            .. deprecated:: 1.4.0
+
         Returns
         -------
         (start, end) : (int, int)
@@ -2688,6 +2694,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
         MultiIndex.get_locs : Get location for a label/slice/list/mask or a
                               sequence of such.
         """
+        self._deprecated_arg(kind, "kind", "slice_locs")
         # This function adds nothing to its parent implementation (the magic
         # happens in get_slice_bound method), but it adds meaningful doc.
         return super().slice_locs(start, end, step)

diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -244,7 +244,7 @@ def _convert_slice_indexer(self, key: slice, kind: str):
 
             # We always treat __getitem__ slicing as label-based
             # translate to locations
-            return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
+            return self.slice_indexer(key.start, key.stop, key.step)
 
         return super()._convert_slice_indexer(key, kind=kind)
 

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -601,6 +601,8 @@ def _reindex_indexer(
                     )
                 else:
                     arr = self.arrays[i]
+                    if copy:
+                        arr = arr.copy()
                 new_arrays.append(arr)
 
         else:

diff --git a/pandas/core/sample.py b/pandas/core/sample.py
@@ -63,7 +63,11 @@ def preprocess_weights(obj: FrameOrSeries, weights, axis: int) -> np.ndarray:
     if (weights < 0).any():
         raise ValueError("weight vector many not include negative values")
 
-    weights[np.isnan(weights)] = 0
+    missing = np.isnan(weights)
+    if missing.any():
+        # Don't modify weights in place
+        weights = weights.copy()
+        weights[missing] = 0
     return weights
 
 

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -6,11 +6,13 @@
 from collections import abc
 import dataclasses
 import gzip
+import io
 from io import (
     BufferedIOBase,
     BytesIO,
     RawIOBase,
     StringIO,
+    TextIOBase,
     TextIOWrapper,
 )
 import mmap
@@ -50,7 +52,6 @@
 
 lzma = import_lzma()
 
-
 _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
 _VALID_URLS.discard("")
 
@@ -102,7 +103,7 @@ def close(self) -> None:
         avoid closing the potentially user-created buffer.
         """
         if self.is_wrapped:
-            assert isinstance(self.handle, TextIOWrapper)
+            assert isinstance(self.handle, (TextIOWrapper, BytesIOWrapper))
             self.handle.flush()
             self.handle.detach()
             self.created_handles.remove(self.handle)
@@ -712,7 +713,16 @@ def get_handle(
 
     # Convert BytesIO or file objects passed with an encoding
     is_wrapped = False
-    if is_text and (compression or _is_binary_mode(handle, ioargs.mode)):
+    if not is_text and ioargs.mode == "rb" and isinstance(handle, TextIOBase):
+        handle = BytesIOWrapper(
+            handle,
+            encoding=ioargs.encoding,
+        )
+        handles.append(handle)
+        # the (text) handle is always provided by the caller
+        # since get_handle would have opened it in binary mode
+        is_wrapped = True
+    elif is_text and (compression or _is_binary_mode(handle, ioargs.mode)):
         handle = TextIOWrapper(
             # error: Argument 1 to "TextIOWrapper" has incompatible type
             # "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]";
@@ -878,6 +888,46 @@ def __next__(self) -> str:
         return newline.lstrip("\n")
 
 
+# Wrapper that wraps a StringIO buffer and reads bytes from it
+# Created for compat with pyarrow read_csv
+class BytesIOWrapper(io.BytesIO):
+    buffer: StringIO | TextIOBase | None
+
+    def __init__(self, buffer: StringIO | TextIOBase, encoding: str = "utf-8"):
+        self.buffer = buffer
+        self.encoding = encoding
+        # Because a character can be represented by more than 1 byte,
+        # it is possible that reading will produce more bytes than n
+        # We store the extra bytes in this overflow variable, and append the
+        # overflow to the front of the bytestring the next time reading is performed
+        self.overflow = b""
+
+    def __getattr__(self, attr: str):
+        return getattr(self.buffer, attr)
+
+    def read(self, n: int | None = -1) -> bytes:
+        assert self.buffer is not None
+        bytestring = self.buffer.read(n).encode(self.encoding)
+        # When n=-1/n greater than remaining bytes: Read entire file/rest of file
+        combined_bytestring = self.overflow + bytestring
+        if n is None or n < 0 or n >= len(combined_bytestring):
+            self.overflow = b""
+            return combined_bytestring
+        else:
+            to_return = combined_bytestring[:n]
+            self.overflow = combined_bytestring[n:]
+            return to_return
+
+    def detach(self):
+        # Slightly modified from Python's TextIOWrapper detach method
+        if self.buffer is None:
+            raise ValueError("buffer is already detached")
+        self.flush()
+        buffer = self.buffer
+        self.buffer = None
+        return buffer
+
+
 def _maybe_memory_map(
     handle: FileOrBuffer,
     memory_map: bool,