Merge branch 'main' into bugfix/57539-fix-to-sql

shabab477 · web-flow · commit e88a9cbade52 · 2024-03-27T23:22:08.000+01:00
diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst
@@ -15,13 +15,14 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`)
 - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`)
--
+- Fixed regression in precision of :func:`to_datetime` with string and ``unit`` input (:issue:`57051`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_222.bug_fixes:
 
 Bug fixes
 ~~~~~~~~~
+- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`)
 - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`)
 - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`)
 - :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -206,6 +206,7 @@ Removal of prior version deprecations/changes
 - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
 - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
 - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
+- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`)
 - Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
 - Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
 - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -275,7 +275,7 @@ def array_with_unit_to_datetime(
         bint is_raise = errors == "raise"
         ndarray[int64_t] iresult
         tzinfo tz = None
-        float fval
+        double fval
 
     assert is_coerce or is_raise
 
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -54,7 +54,6 @@
     cache_readonly,
     doc,
 )
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     ensure_object,
@@ -135,11 +134,6 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin):  # type: ignore[misc]
     dtype : PeriodDtype, optional
         A PeriodDtype instance from which to extract a `freq`. If both
         `freq` and `dtype` are specified, then the frequencies must match.
-    freq : str or DateOffset
-        The `freq` to use for the array. Mostly applicable when `values`
-        is an ndarray of integers, when `freq` is required. When `values`
-        is a PeriodArray (or box around), it's checked that ``values.freq``
-        matches `freq`.
     copy : bool, default False
         Whether to copy the ordinals before storing.
 
@@ -224,20 +218,7 @@ def _scalar_type(self) -> type[Period]:
     # --------------------------------------------------------------------
     # Constructors
 
-    def __init__(
-        self, values, dtype: Dtype | None = None, freq=None, copy: bool = False
-    ) -> None:
-        if freq is not None:
-            # GH#52462
-            warnings.warn(
-                "The 'freq' keyword in the PeriodArray constructor is deprecated "
-                "and will be removed in a future version. Pass 'dtype' instead",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            freq = validate_dtype_freq(dtype, freq)
-            dtype = PeriodDtype(freq)
-
+    def __init__(self, values, dtype: Dtype | None = None, copy: bool = False) -> None:
         if dtype is not None:
             dtype = pandas_dtype(dtype)
             if not isinstance(dtype, PeriodDtype):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9660,13 +9660,7 @@ def _where(
 
         # make sure we are boolean
         fill_value = bool(inplace)
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore",
-                "Downcasting object dtype arrays",
-                category=FutureWarning,
-            )
-            cond = cond.fillna(fill_value)
+        cond = cond.fillna(fill_value)
         cond = cond.infer_objects()
 
         msg = "Boolean array expected for the condition, not {dtype}"
diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py
@@ -144,6 +144,9 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:
     elif isinstance(dtype, DatetimeTZDtype):
         return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz)
 
+    elif isinstance(dtype, pd.BooleanDtype):
+        return ArrowCTypes.BOOL
+
     raise NotImplementedError(
         f"Conversion of {dtype} to Arrow C format string is not implemented."
     )
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -932,14 +932,18 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
         if len(frame.columns) == 1:
             data = frame.copy()
         else:
-            # Take the data from frame corresponding to this idx value
-            if len(level) == 1:
-                idx = (idx,)
-            gen = iter(idx)
-            column_indexer = tuple(
-                next(gen) if k in set_levels else slice(None)
-                for k in range(frame.columns.nlevels)
-            )
+            if not isinstance(frame.columns, MultiIndex) and not isinstance(idx, tuple):
+                # GH#57750 - if the frame is an Index with tuples, .loc below will fail
+                column_indexer = idx
+            else:
+                # Take the data from frame corresponding to this idx value
+                if len(level) == 1:
+                    idx = (idx,)
+                gen = iter(idx)
+                column_indexer = tuple(
+                    next(gen) if k in set_levels else slice(None)
+                    for k in range(frame.columns.nlevels)
+                )
             data = frame.loc[:, column_indexer]
 
         if len(level) < frame.columns.nlevels:
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
@@ -11,7 +11,6 @@
     Any,
     final,
 )
-import warnings
 
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import (
@@ -208,13 +207,7 @@ def _process_dataframe(self) -> dict[int | str, dict[str, Any]]:
             df = df.reset_index()
 
         if self.na_rep is not None:
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore",
-                    "Downcasting object dtype arrays",
-                    category=FutureWarning,
-                )
-                df = df.fillna(self.na_rep)
+            df = df.fillna(self.na_rep)
 
         return df.to_dict(orient="index")
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -16,7 +16,6 @@
     final,
     overload,
 )
-import warnings
 
 import numpy as np
 
@@ -1173,13 +1172,7 @@ def _try_convert_data(
                 if all(notna(data)):
                     return data, False
 
-                with warnings.catch_warnings():
-                    warnings.filterwarnings(
-                        "ignore",
-                        "Downcasting object dtype arrays",
-                        category=FutureWarning,
-                    )
-                    filled = data.fillna(np.nan)
+                filled = data.fillna(np.nan)
 
                 return filled, True
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -2887,13 +2887,7 @@ def _prepare_data(self) -> np.rec.recarray:
         for i, col in enumerate(data):
             typ = typlist[i]
             if typ <= self._max_string_length:
-                with warnings.catch_warnings():
-                    warnings.filterwarnings(
-                        "ignore",
-                        "Downcasting object dtype arrays",
-                        category=FutureWarning,
-                    )
-                    dc = data[col].fillna("")
+                dc = data[col].fillna("")
                 data[col] = dc.apply(_pad_bytes, args=(typ,))
                 stype = f"S{typ}"
                 dtypes[col] = stype
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
@@ -1725,13 +1725,7 @@ def _kind(self) -> Literal["area"]:
 
     def __init__(self, data, **kwargs) -> None:
         kwargs.setdefault("stacked", True)
-        with warnings.catch_warnings():
-            warnings.filterwarnings(
-                "ignore",
-                "Downcasting object dtype arrays",
-                category=FutureWarning,
-            )
-            data = data.fillna(value=0)
+        data = data.fillna(value=0)
         LinePlot.__init__(self, data, **kwargs)
 
         if not self.stacked:
diff --git a/pandas/tests/arrays/period/test_constructors.py b/pandas/tests/arrays/period/test_constructors.py
@@ -135,17 +135,6 @@ def test_from_td64nat_sequence_raises():
         pd.DataFrame(arr, dtype=dtype)
 
 
-def test_freq_deprecated():
-    # GH#52462
-    data = np.arange(5).astype(np.int64)
-    msg = "The 'freq' keyword in the PeriodArray constructor is deprecated"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        res = PeriodArray(data, freq="M")
-
-    expected = PeriodArray(data, dtype="period[M]")
-    tm.assert_equal(res, expected)
-
-
 def test_period_array_from_datetime64():
     arr = np.array(
         ["2020-01-01T00:00:00", "2020-02-02T00:00:00"], dtype="datetime64[ns]"
diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
@@ -14,8 +14,6 @@
 
 """
 
-import warnings
-
 import numpy as np
 import pytest
 
@@ -215,13 +213,7 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
 
         if sdtype.kind in "iu":
             if op_name in ("__rtruediv__", "__truediv__", "__div__"):
-                with warnings.catch_warnings():
-                    warnings.filterwarnings(
-                        "ignore",
-                        "Downcasting object dtype arrays",
-                        category=FutureWarning,
-                    )
-                    filled = expected.fillna(np.nan)
+                filled = expected.fillna(np.nan)
                 expected = filled.astype("Float64")
             else:
                 # combine method result in 'biggest' (int64) dtype
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -96,7 +96,6 @@ def test_where_upcasting(self):
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
     def test_where_alignment(self, where_frame, float_string_frame):
         # aligning
         def _check_align(df, cond, other, check_dtypes=True):
@@ -171,7 +170,6 @@ def test_where_invalid(self):
         with pytest.raises(ValueError, match=msg):
             df.mask(0)
 
-    @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
     def test_where_set(self, where_frame, float_string_frame, mixed_int_frame):
         # where inplace
 
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -1272,7 +1272,6 @@ def test_any_all_bool_with_na(
     ):
         getattr(bool_frame_with_na, all_boolean_reductions)(axis=axis, bool_only=False)
 
-    @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
     def test_any_all_bool_frame(self, all_boolean_reductions, bool_frame_with_na):
         # GH#12863: numpy gives back non-boolean data for object type
         # so fill NaNs to compare with pandas behavior
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1223,7 +1223,6 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack):
     @pytest.mark.filterwarnings(
         "ignore:The previous implementation of stack is deprecated"
     )
-    @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
     @pytest.mark.parametrize(
         "index",
         [
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
@@ -310,7 +310,6 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
             method(*args, **kwargs)
 
 
-@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
 @pytest.mark.parametrize("dtype", [bool, int, float, object])
 def test_deprecate_numeric_only_series(dtype, groupby_func, request):
     # GH#46560
diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
@@ -459,6 +459,7 @@ def test_non_str_names_w_duplicates():
         ),
         ([1.0, 2.25, None], "Float32", "float32"),
         ([1.0, 2.25, None], "Float32[pyarrow]", "float32"),
+        ([True, False, None], "boolean", "bool"),
         ([True, False, None], "boolean[pyarrow]", "bool"),
         (["much ado", "about", None], "string[pyarrow_numpy]", "large_string"),
         (["much ado", "about", None], "string[pyarrow]", "large_string"),
@@ -521,6 +522,7 @@ def test_pandas_nullable_with_missing_values(
         ),
         ([1.0, 2.25, 5.0], "Float32", "float32"),
         ([1.0, 2.25, 5.0], "Float32[pyarrow]", "float32"),
+        ([True, False, False], "boolean", "bool"),
         ([True, False, False], "boolean[pyarrow]", "bool"),
         (["much ado", "about", "nothing"], "string[pyarrow_numpy]", "large_string"),
         (["much ado", "about", "nothing"], "string[pyarrow]", "large_string"),
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -195,7 +195,6 @@ def test_series_datetimelike_attribute_access_invalid(self):
         with pytest.raises(AttributeError, match=msg):
             ser.weekday
 
-    @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
     @pytest.mark.parametrize(
         "kernel, has_numeric_only",
         [
diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
@@ -15,7 +15,6 @@
 
 
 class TestSeriesLogicalOps:
-    @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
     @pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
     def test_bool_operators_with_nas(self, bool_op):
         # boolean &, |, ^ should work with object arrays and propagate NAs
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -1735,6 +1735,14 @@ def test_unit(self, cache):
         with pytest.raises(ValueError, match=msg):
             to_datetime([1], unit="D", format="%Y%m%d", cache=cache)
 
+    def test_unit_str(self, cache):
+        # GH 57051
+        # Test that strs aren't dropping precision to 32-bit accidentally.
+        with tm.assert_produces_warning(FutureWarning):
+            res = to_datetime(["1704660000"], unit="s", origin="unix")
+        expected = to_datetime([1704660000], unit="s", origin="unix")
+        tm.assert_index_equal(res, expected)
+
     def test_unit_array_mixed_nans(self, cache):
         values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""]
 
diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md
@@ -82,6 +82,20 @@ pd.set_option("plotting.backend", "pandas_bokeh")
 It is very similar to the matplotlib plotting backend, but provides
 interactive web-based charts and maps.
 
+### [pygwalker](https://github.com/Kanaries/pygwalker)
+
+PyGWalker is an interactive data visualization and
+exploratory data analysis tool built upon Graphic Walker
+with support for visualization, cleaning, and annotation workflows.
+
+pygwalker can save interactively created charts
+to Graphic-Walker and Vega-Lite JSON.
+
+```
+import pygwalker as pyg
+pyg.walk(df)
+```
+
 ### [seaborn](https://seaborn.pydata.org)
 
 Seaborn is a Python visualization library based on
@@ -94,6 +108,11 @@ pandas with the option to perform statistical estimation while plotting,
 aggregating across observations and visualizing the fit of statistical
 models to emphasize patterns in a dataset.
 
+```
+import seaborn as sns
+sns.set_theme()
+```
+
 ### [plotnine](https://github.com/has2k1/plotnine/)
 
 Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a

Original file line number	Diff line number	Diff line change
`@@ -144,6 +144,9 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str:`
`144`	`144`	`elif isinstance(dtype, DatetimeTZDtype):`
`145`	`145`	`return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz)`
`146`	`146`
	`147`	`+ elif isinstance(dtype, pd.BooleanDtype):`
	`148`	`+ return ArrowCTypes.BOOL`
	`149`	`+`
`147`	`150`	`raise NotImplementedError(`
`148`	`151`	`f"Conversion of {dtype} to Arrow C format string is not implemented."`
`149`	`152`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1223,7 +1223,6 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack):`
`1223`	`1223`	`@pytest.mark.filterwarnings(`
`1224`	`1224`	`"ignore:The previous implementation of stack is deprecated"`
`1225`	`1225`	`)`
`1226`		`- @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")`
`1227`	`1226`	`@pytest.mark.parametrize(`
`1228`	`1227`	`"index",`
`1229`	`1228`	`[`
Original file line number	Diff line number	Diff line change
`@@ -195,7 +195,6 @@ def test_series_datetimelike_attribute_access_invalid(self):`
`195`	`195`	`with pytest.raises(AttributeError, match=msg):`
`196`	`196`	`ser.weekday`
`197`	`197`
`198`		`- @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")`
`199`	`198`	`@pytest.mark.parametrize(`
`200`	`199`	`"kernel, has_numeric_only",`
`201`	`200`	`[`