pandas-dev
diff --git a/‎.github/workflows/wheels.yml
+1-1 b/‎.github/workflows/wheels.yml
+1-1
diff --git a/‎.gitpod.yml
+1 b/‎.gitpod.yml
+1
diff --git a/‎doc/source/whatsnew/index.rst
+1 b/‎doc/source/whatsnew/index.rst
+1
diff --git a/‎doc/source/whatsnew/v2.0.2.rst
+40 b/‎doc/source/whatsnew/v2.0.2.rst
+40
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
+3 b/‎doc/source/whatsnew/v2.1.0.rst
+3
diff --git a/‎pandas/_typing.py
+1 b/‎pandas/_typing.py
+1
diff --git a/‎pandas/core/generic.py
+5-4 b/‎pandas/core/generic.py
+5-4
diff --git a/‎pandas/core/groupby/groupby.py
+1-1 b/‎pandas/core/groupby/groupby.py
+1-1
diff --git a/‎pandas/core/interchange/from_dataframe.py
+34-70 b/‎pandas/core/interchange/from_dataframe.py
+34-70
diff --git a/‎pandas/core/nanops.py
+14-1 b/‎pandas/core/nanops.py
+14-1
diff --git a/‎pandas/io/formats/format.py
-32 b/‎pandas/io/formats/format.py
-32
@@ -71,7 +71,7 @@ jobs:
           fetch-depth: 0
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].3
         env:
           CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
 
 
@@ -15,6 +15,7 @@ tasks:
       git fetch --tags
       python setup.py build_ext --inplace -j 4
       echo "🛠 Completed rebuilding Pandas!! 🛠 "
+      pre-commit install
       echo "✨ Pre-build complete! You can close this terminal ✨ "
 
 # --------------------------------------------------------
 
@@ -24,6 +24,7 @@ Version 2.0
 .. toctree::
    :maxdepth: 2
 
+   v2.0.2
    v2.0.1
    v2.0.0
 
 
@@ -0,0 +1,40 @@
+.. _whatsnew_202:
+
+What's new in 2.0.2 (May ..., 2023)
+-----------------------------------
+
+These are the changes in pandas 2.0.2. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_202.regressions:
+
+Fixed regressions
+~~~~~~~~~~~~~~~~~
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_202.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
+- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_202.other:
+
+Other
+~~~~~
+-
+
+.. ---------------------------------------------------------------------------
+.. _whatsnew_202.contributors:
+
+Contributors
+~~~~~~~~~~~~
+
+.. contributors:: v2.0.1..v2.0.2|HEAD
@@ -313,8 +313,11 @@ Timezones
 
 Numeric
 ^^^^^^^
+- Bug in :meth:`Series.corr` and :meth:`Series.cov` raising ``AttributeError`` for masked dtypes (:issue:`51422`)
+- Bug in :meth:`Series.mean`, :meth:`DataFrame.mean` with object-dtype values containing strings that can be converted to numbers (e.g. "2") returning incorrect numeric results; these now raise ``TypeError`` (:issue:`36703`, :issue:`44008`)
 - Bug in :meth:`DataFrame.corrwith` raising ``NotImplementedError`` for pyarrow-backed dtypes (:issue:`52314`)
 - Bug in :meth:`Series.corr` and :meth:`Series.cov` raising ``AttributeError`` for masked dtypes (:issue:`51422`)
+- Bug in :meth:`Series.median` and :meth:`DataFrame.median` with object-dtype values containing strings that can be converted to numbers (e.g. "2") returning incorrect numeric results; these now raise ``TypeError`` (:issue:`34671`)
 -
 
 Conversion
 
@@ -419,6 +419,7 @@ def closed(self) -> bool:
 AlignJoin = Literal["outer", "inner", "left", "right"]
 DtypeBackend = Literal["pyarrow", "numpy_nullable"]
 
+TimeUnit = Literal["s", "ms", "us", "ns"]
 OpenFileErrors = Literal[
     "strict",
     "ignore",
 
@@ -82,6 +82,7 @@
     TimedeltaConvertibleTypes,
     TimeNonexistent,
     TimestampConvertibleTypes,
+    TimeUnit,
     ValueKeyFunc,
     WriteBuffer,
     WriteExcelBuffer,
@@ -2284,7 +2285,7 @@ def to_json(
         date_format: str | None = None,
         double_precision: int = 10,
         force_ascii: bool_t = True,
-        date_unit: str = "ms",
+        date_unit: TimeUnit = "ms",
         default_handler: Callable[[Any], JSONSerializable] | None = None,
         lines: bool_t = False,
         compression: CompressionOptions = "infer",
@@ -2564,11 +2565,11 @@ def to_hdf(
         self,
         path_or_buf: FilePath | HDFStore,
         key: str,
-        mode: str = "a",
+        mode: Literal["a", "w", "r+"] = "a",
         complevel: int | None = None,
-        complib: str | None = None,
+        complib: Literal["zlib", "lzo", "bzip2", "blosc"] | None = None,
         append: bool_t = False,
-        format: str | None = None,
+        format: Literal["fixed", "table"] | None = None,
         index: bool_t = True,
         min_itemsize: int | dict[str, int] | None = None,
         nan_rep=None,
 
@@ -4349,7 +4349,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
 _apply_groupings_depr = (
     "{}.apply operated on the grouping columns. This behavior is deprecated, "
     "and in a future version of pandas the grouping columns will be excluded "
-    "from the operation. Select the columns to operate on after groupby to"
+    "from the operation. Select the columns to operate on after groupby to "
     "either explicitly include or exclude the groupings and silence "
     "this warning."
 )
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+from pandas.compat._optional import import_optional_dependency
+
 import pandas as pd
 from pandas.core.interchange.dataframe_protocol import (
     Buffer,
@@ -23,7 +25,7 @@
     DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
     DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},
     DtypeKind.FLOAT: {32: np.float32, 64: np.float64},
-    DtypeKind.BOOL: {8: bool},
+    DtypeKind.BOOL: {1: bool, 8: bool},
 }
 
 
@@ -154,7 +156,9 @@ def primitive_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
     buffers = col.get_buffers()
 
     data_buff, data_dtype = buffers["data"]
-    data = buffer_to_ndarray(data_buff, data_dtype, col.offset, col.size())
+    data = buffer_to_ndarray(
+        data_buff, data_dtype, offset=col.offset, length=col.size()
+    )
 
     data = set_nulls(data, col, buffers["validity"])
     return data, buffers
@@ -192,7 +196,9 @@ def categorical_column_to_series(col: Column) -> tuple[pd.Series, Any]:
     buffers = col.get_buffers()
 
     codes_buff, codes_dtype = buffers["data"]
-    codes = buffer_to_ndarray(codes_buff, codes_dtype, col.offset, col.size())
+    codes = buffer_to_ndarray(
+        codes_buff, codes_dtype, offset=col.offset, length=col.size()
+    )
 
     # Doing module in order to not get ``IndexError`` for
     # out-of-bounds sentinel values in `codes`
@@ -252,7 +258,7 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
         Endianness.NATIVE,
     )
     # Specify zero offset as we don't want to chunk the string data
-    data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=col.size())
+    data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=data_buff.bufsize)
 
     # Retrieve the offsets buffer containing the index offsets demarcating
     # the beginning and the ending of each string
@@ -261,14 +267,16 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
     # meaning that it has more elements than in the data buffer, do `col.size() + 1`
     # here to pass a proper offsets buffer size
     offsets = buffer_to_ndarray(
-        offset_buff, offset_dtype, col.offset, length=col.size() + 1
+        offset_buff, offset_dtype, offset=col.offset, length=col.size() + 1
     )
 
     null_pos = None
     if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
         assert buffers["validity"], "Validity buffers cannot be empty for masks"
         valid_buff, valid_dtype = buffers["validity"]
-        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
+        null_pos = buffer_to_ndarray(
+            valid_buff, valid_dtype, offset=col.offset, length=col.size()
+        )
         if sentinel_val == 0:
             null_pos = ~null_pos
 
@@ -356,8 +364,8 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
             getattr(ArrowCTypes, f"UINT{dtype[1]}"),
             Endianness.NATIVE,
         ),
-        col.offset,
-        col.size(),
+        offset=col.offset,
+        length=col.size(),
     )
 
     data = parse_datetime_format_str(format_str, data)
@@ -368,8 +376,9 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
 def buffer_to_ndarray(
     buffer: Buffer,
     dtype: tuple[DtypeKind, int, str, str],
+    *,
+    length: int,
     offset: int = 0,
-    length: int | None = None,
 ) -> np.ndarray:
     """
     Build a NumPy array from the passed buffer.
@@ -406,74 +415,27 @@ def buffer_to_ndarray(
     # and size in the buffer plus the dtype on the column. Use DLPack as NumPy supports
     # it since https://github.com/numpy/numpy/pull/19083
     ctypes_type = np.ctypeslib.as_ctypes_type(column_dtype)
-    data_pointer = ctypes.cast(
-        buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type)
-    )
 
     if bit_width == 1:
         assert length is not None, "`length` must be specified for a bit-mask buffer."
-        arr = np.ctypeslib.as_array(data_pointer, shape=(buffer.bufsize,))
-        return bitmask_to_bool_ndarray(arr, length, first_byte_offset=offset % 8)
+        pa = import_optional_dependency("pyarrow")
+        arr = pa.BooleanArray.from_buffers(
+            pa.bool_(),
+            length,
+            [None, pa.foreign_buffer(buffer.ptr, length)],
+            offset=offset,
+        )
+        return np.asarray(arr)
     else:
+        data_pointer = ctypes.cast(
+            buffer.ptr + (offset * bit_width // 8), ctypes.POINTER(ctypes_type)
+        )
         return np.ctypeslib.as_array(
-            data_pointer, shape=(buffer.bufsize // (bit_width // 8),)
+            data_pointer,
+            shape=(length,),
         )
 
 
-def bitmask_to_bool_ndarray(
-    bitmask: np.ndarray, mask_length: int, first_byte_offset: int = 0
-) -> np.ndarray:
-    """
-    Convert bit-mask to a boolean NumPy array.
-
-    Parameters
-    ----------
-    bitmask : np.ndarray[uint8]
-        NumPy array of uint8 dtype representing the bitmask.
-    mask_length : int
-        Number of elements in the mask to interpret.
-    first_byte_offset : int, default: 0
-        Number of elements to offset from the start of the first byte.
-
-    Returns
-    -------
-    np.ndarray[bool]
-    """
-    bytes_to_skip = first_byte_offset // 8
-    bitmask = bitmask[bytes_to_skip:]
-    first_byte_offset %= 8
-
-    bool_mask = np.zeros(mask_length, dtype=bool)
-
-    # Processing the first byte separately as it has its own offset
-    val = bitmask[0]
-    mask_idx = 0
-    bits_in_first_byte = min(8 - first_byte_offset, mask_length)
-    for j in range(bits_in_first_byte):
-        if val & (1 << (j + first_byte_offset)):
-            bool_mask[mask_idx] = True
-        mask_idx += 1
-
-    # `mask_length // 8` describes how many full bytes to process
-    for i in range((mask_length - bits_in_first_byte) // 8):
-        # doing `+ 1` as we already processed the first byte
-        val = bitmask[i + 1]
-        for j in range(8):
-            if val & (1 << j):
-                bool_mask[mask_idx] = True
-            mask_idx += 1
-
-    if len(bitmask) > 1:
-        # Processing reminder of last byte
-        val = bitmask[-1]
-        for j in range(len(bool_mask) - mask_idx):
-            if val & (1 << j):
-                bool_mask[mask_idx] = True
-            mask_idx += 1
-
-    return bool_mask
-
-
 def set_nulls(
     data: np.ndarray | pd.Series,
     col: Column,
@@ -509,7 +471,9 @@ def set_nulls(
     elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
         assert validity, "Expected to have a validity buffer for the mask"
         valid_buff, valid_dtype = validity
-        null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size())
+        null_pos = buffer_to_ndarray(
+            valid_buff, valid_dtype, offset=col.offset, length=col.size()
+        )
         if sentinel_val == 0:
             null_pos = ~null_pos
     elif null_kind in (ColumnNullType.NON_NULLABLE, ColumnNullType.USE_NAN):
 
@@ -716,7 +716,8 @@ def nanmean(
         dtype_count = dtype
 
     count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
-    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
+    the_sum = values.sum(axis, dtype=dtype_sum)
+    the_sum = _ensure_numeric(the_sum)
 
     if axis is not None and getattr(the_sum, "ndim", False):
         count = cast(np.ndarray, count)
@@ -775,6 +776,11 @@ def get_median(x, _mask=None):
     dtype = values.dtype
     values, mask = _get_values(values, skipna, mask=mask, fill_value=0)
     if values.dtype.kind != "f":
+        if values.dtype == object:
+            # GH#34671 avoid casting strings to numeric
+            inferred = lib.infer_dtype(values)
+            if inferred in ["string", "mixed"]:
+                raise TypeError(f"Cannot convert {values} to numeric")
         try:
             values = values.astype("f8")
         except ValueError as err:
@@ -1659,6 +1665,10 @@ def _ensure_numeric(x):
         if x.dtype.kind in "biu":
             x = x.astype(np.float64)
         elif x.dtype == object:
+            inferred = lib.infer_dtype(x)
+            if inferred in ["string", "mixed"]:
+                # GH#44008, GH#36703 avoid casting e.g. strings to numeric
+                raise TypeError(f"Could not convert {x} to numeric")
             try:
                 x = x.astype(np.complex128)
             except (TypeError, ValueError):
@@ -1671,6 +1681,9 @@ def _ensure_numeric(x):
                 if not np.any(np.imag(x)):
                     x = x.real
     elif not (is_float(x) or is_integer(x) or is_complex(x)):
+        if isinstance(x, str):
+            # GH#44008, GH#36703 avoid casting e.g. strings to numeric
+            raise TypeError(f"Could not convert string '{x}' to numeric")
         try:
             x = float(x)
         except (TypeError, ValueError):
 
@@ -1022,38 +1022,6 @@ class DataFrameRenderer:
     def __init__(self, fmt: DataFrameFormatter) -> None:
         self.fmt = fmt
 
-    def to_latex(
-        self,
-        buf: FilePath | WriteBuffer[str] | None = None,
-        column_format: str | None = None,
-        longtable: bool = False,
-        encoding: str | None = None,
-        multicolumn: bool = False,
-        multicolumn_format: str | None = None,
-        multirow: bool = False,
-        caption: str | tuple[str, str] | None = None,
-        label: str | None = None,
-        position: str | None = None,
-    ) -> str | None:
-        """
-        Render a DataFrame to a LaTeX tabular/longtable environment output.
-        """
-        from pandas.io.formats.latex import LatexFormatter
-
-        latex_formatter = LatexFormatter(
-            self.fmt,
-            longtable=longtable,
-            column_format=column_format,
-            multicolumn=multicolumn,
-            multicolumn_format=multicolumn_format,
-            multirow=multirow,
-            caption=caption,
-            label=label,
-            position=position,
-        )
-        string = latex_formatter.to_string()
-        return save_to_buffer(string, buf=buf, encoding=encoding)
-
     def to_html(
         self,
         buf: FilePath | WriteBuffer[str] | None = None,
Original file line number	Diff line number	Diff line change
`@@ -4349,7 +4349,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde`
`4349`	`4349`	`_apply_groupings_depr = (`
`4350`	`4350`	`"{}.apply operated on the grouping columns. This behavior is deprecated, "`
`4351`	`4351`	`"and in a future version of pandas the grouping columns will be excluded "`
`4352`		`- "from the operation. Select the columns to operate on after groupby to"`
	`4352`	`+ "from the operation. Select the columns to operate on after groupby to "`
`4353`	`4353`	`"either explicitly include or exclude the groupings and silence "`
`4354`	`4354`	`"this warning."`
`4355`	`4355`	`)`