From 9d40d0572426a1c13416972e9fd192b57941b682 Mon Sep 17 00:00:00 2001
From: Rob <124158982+rob-sil@users.noreply.github.com>
Date: Sun, 4 Aug 2024 09:30:29 -0700
Subject: [PATCH 1/2] Handle floating point boundaries

---
 doc/source/whatsnew/v3.0.0.rst      |  1 +
 pandas/core/array_algos/quantile.py | 35 +++++++++--------------------
 pandas/core/reshape/tile.py         | 11 ++++++++-
 pandas/tests/reshape/test_qcut.py   | 12 ++++++++++
 4 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index e3c4e69db7cbd..df6aa3232ad1c 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -615,6 +615,7 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
+- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
 - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
index 5c933294fb944..9be478d6baed8 100644
--- a/pandas/core/array_algos/quantile.py
+++ b/pandas/core/array_algos/quantile.py
@@ -94,9 +94,9 @@ def quantile_with_mask(
         flat = np.array([fill_value] * len(qs))
         result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
     else:
-        result = _nanpercentile(
+        result = _nanquantile(
             values,
-            qs * 100.0,
+            qs,
             na_value=fill_value,
             mask=mask,
             interpolation=interpolation,
@@ -108,7 +108,7 @@ def quantile_with_mask(
     return result
 
 
-def _nanpercentile_1d(
+def _nanquantile_1d(
     values: np.ndarray,
     mask: npt.NDArray[np.bool_],
     qs: npt.NDArray[np.float64],
@@ -116,7 +116,7 @@ def _nanpercentile_1d(
     interpolation: str,
 ) -> Scalar | np.ndarray:
     """
-    Wrapper for np.percentile that skips missing values, specialized to
+    Wrapper for np.quantile that skips missing values, specialized to
     1-dimensional case.
 
     Parameters
@@ -142,17 +142,10 @@ def _nanpercentile_1d(
         # equiv: 'np.array([na_value] * len(qs))' but much faster
         return np.full(len(qs), na_value)
 
-    return np.percentile(
-        values,
-        qs,
-        # error: No overload variant of "percentile" matches argument
-        # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]"
-        # , "Dict[str, str]"  [call-overload]
-        method=interpolation,  # type: ignore[call-overload]
-    )
+    return np.quantile(values, qs, method=interpolation)
 
 
-def _nanpercentile(
+def _nanquantile(
     values: np.ndarray,
     qs: npt.NDArray[np.float64],
     *,
@@ -161,7 +154,7 @@ def _nanpercentile(
     interpolation: str,
 ):
     """
-    Wrapper for np.percentile that skips missing values.
+    Wrapper for np.quantile that skips missing values.
 
     Parameters
     ----------
@@ -180,7 +173,7 @@ def _nanpercentile(
 
     if values.dtype.kind in "mM":
         # need to cast to integer to avoid rounding errors in numpy
-        result = _nanpercentile(
+        result = _nanquantile(
             values.view("i8"),
             qs=qs,
             na_value=na_value.view("i8"),
@@ -196,7 +189,7 @@ def _nanpercentile(
         # Caller is responsible for ensuring mask shape match
         assert mask.shape == values.shape
         result = [
-            _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
+            _nanquantile_1d(val, m, qs, na_value, interpolation=interpolation)
             for (val, m) in zip(list(values), list(mask))
         ]
         if values.dtype.kind == "f":
@@ -215,12 +208,4 @@ def _nanpercentile(
                 result = result.astype(values.dtype, copy=False)
         return result
     else:
-        return np.percentile(
-            values,
-            qs,
-            axis=1,
-            # error: No overload variant of "percentile" matches argument types
-            # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]",
-            # "int", "Dict[str, str]"  [call-overload]
-            method=interpolation,  # type: ignore[call-overload]
-        )
+        return np.quantile(values, qs, axis=1, method=interpolation)
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 18517199f073c..b3f946f289891 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -358,7 +358,16 @@ def qcut(
     x_idx = _preprocess_for_cut(x)
     x_idx, _ = _coerce_to_type(x_idx)
 
-    quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q
+    if is_integer(q):
+        quantiles = np.linspace(0, 1, q + 1)
+        # Round up rather than to nearest if not representable in base 2
+        np.putmask(
+            quantiles,
+            q * quantiles != np.arange(q + 1),
+            np.nextafter(quantiles, 1),
+        )
+    else:
+        quantiles = q
 
     bins = x_idx.to_series().dropna().quantile(quantiles)
 
diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py
index 5f769db7f8acf..b2e9f26e1c407 100644
--- a/pandas/tests/reshape/test_qcut.py
+++ b/pandas/tests/reshape/test_qcut.py
@@ -307,3 +307,15 @@ def test_qcut_nullable_integer(q, any_numeric_ea_dtype):
     expected = qcut(arr.astype(float), q)
 
     tm.assert_categorical_equal(result, expected)
+
+
+@pytest.mark.parametrize("scale", [1.0, 1 / 3, 17.0])
+@pytest.mark.parametrize("q", [3, 7, 9])
+@pytest.mark.parametrize("precision", [1, 3, 16])
+def test_qcut_contains(scale, q, precision):
+    # GH-59355
+    arr = (scale * np.arange(q + 1)).round(precision)
+    result = qcut(arr, q, precision=precision)
+
+    for value, bucket in zip(arr, result):
+        assert value in bucket

From eef3366aebd82e58971df34b613f6e3ea57bed1e Mon Sep 17 00:00:00 2001
From: Rob <124158982+rob-sil@users.noreply.github.com>
Date: Thu, 8 Aug 2024 19:49:50 -0700
Subject: [PATCH 2/2] Mypy errors don't go away just because I didn't check
 them

---
 pandas/core/array_algos/quantile.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
index 9be478d6baed8..b2f78182b9bf0 100644
--- a/pandas/core/array_algos/quantile.py
+++ b/pandas/core/array_algos/quantile.py
@@ -142,7 +142,14 @@ def _nanquantile_1d(
         # equiv: 'np.array([na_value] * len(qs))' but much faster
         return np.full(len(qs), na_value)
 
-    return np.quantile(values, qs, method=interpolation)
+    return np.quantile(
+        values,
+        qs,
+        # error: No overload variant of "percentile" matches argument
+        # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]"
+        # , "Dict[str, str]"  [call-overload]
+        method=interpolation,  # type: ignore[call-overload]
+    )
 
 
 def _nanquantile(
@@ -208,4 +215,12 @@ def _nanquantile(
                 result = result.astype(values.dtype, copy=False)
         return result
     else:
-        return np.quantile(values, qs, axis=1, method=interpolation)
+        return np.quantile(
+            values,
+            qs,
+            axis=1,
+            # error: No overload variant of "percentile" matches argument types
+            # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]",
+            # "int", "Dict[str, str]"  [call-overload]
+            method=interpolation,  # type: ignore[call-overload]
+        )