pandas-dev · jreback · Jul 28, 2021 · Jul 8, 2021 · Jul 13, 2021 · Jul 16, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -265,6 +265,7 @@ Groupby/resample/rolling
 Reshaping
 ^^^^^^^^^
 - :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
+- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
 -
 
 Sparse

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -384,7 +384,7 @@ def qcut(
 
 def _bins_to_cuts(
     x,
-    bins,
+    bins: np.ndarray,
     right: bool = True,
     labels=None,
     precision: int = 3,
@@ -421,7 +421,7 @@ def _bins_to_cuts(
     ids = ensure_platform_int(bins.searchsorted(x, side=side))
 
     if include_lowest:
-        ids[x == bins[0]] = 1
+        ids[np.asarray(x) == bins[0]] = 1
 
     na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
     has_nas = na_mask.any()

diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
@@ -691,3 +691,48 @@ def test_cut_no_warnings():
     labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)]
     with tm.assert_produces_warning(False):
         df["group"] = cut(df.value, range(0, 105, 10), right=False, labels=labels)
+
+
+def test_cut_with_duplicated_index_lowest_included():
+    # GH 42185
+    expected = Series(
+        [Interval(-0.001, 2, closed="right")] * 3
+        + [Interval(2, 4, closed="right"), Interval(-0.001, 2, closed="right")],
+        index=[0, 1, 2, 3, 0],
+        dtype="category",
+    ).cat.as_ordered()
+
+    s = Series([0, 1, 2, 3, 0], index=[0, 1, 2, 3, 0])
+    result = cut(s, bins=[0, 2, 4], include_lowest=True)
+    tm.assert_series_equal(result, expected)
+
+
+def test_cut_with_nonexact_categorical_indices():
+    # GH 42424
+
+    ser = Series(range(0, 100))
+    ser1 = cut(ser, 10).value_counts().head(5)
+    ser2 = cut(ser, 10).value_counts().tail(5)
+    result = DataFrame({"1": ser1, "2": ser2})
+
+    index = pd.CategoricalIndex(
+        [
+            Interval(-0.099, 9.9, closed="right"),
+            Interval(9.9, 19.8, closed="right"),
+            Interval(19.8, 29.7, closed="right"),
+            Interval(29.7, 39.6, closed="right"),
+            Interval(39.6, 49.5, closed="right"),
+            Interval(49.5, 59.4, closed="right"),
+            Interval(59.4, 69.3, closed="right"),
+            Interval(69.3, 79.2, closed="right"),
+            Interval(79.2, 89.1, closed="right"),
+            Interval(89.1, 99, closed="right"),
+        ],
+        ordered=True,
+    )
+
+    expected = DataFrame(
+        {"1": [10] * 5 + [np.nan] * 5, "2": [np.nan] * 5 + [10] * 5}, index=index
+    )
+
+    tm.assert_frame_equal(expected, result)