From 905327fcac09debe5b3f2e2b800f767f3c6f6dd8 Mon Sep 17 00:00:00 2001 From: AshaHolla Date: Thu, 19 Oct 2023 00:32:45 +0530 Subject: [PATCH 1/2] include_lowest modifying the dtype of the bins and the changing the lower bound of the range is appropriately documented --- pandas/core/reshape/tile.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 980e8aa41669f..8c935cddfa414 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -125,6 +125,22 @@ def cut( Categorical for all other inputs. The values stored within are whatever the type in the sequence is. + Note that due to the .1% extension on the range x, the lower bound + shifts by 0.001 which makes the dtype of bin intervals to change from + int64 to float64 + + Examples + ------------------------- + In: + pd.cut(np.array([0, 1, 7]), bins=[0, 3, 6, 8], include_lowest=True) + + Out: + [(-0.001, 3.0], (-0.001, 3.0], (6.0, 8.0]] + Categories (3, interval[float64]): [(-0.001, 3.0] < (3.0, 6.0] < (6.0, 8.0]] + + The lowermost interval changes from 0 to -0.001 after .1% adjustment + which results in change of dtype from int64 to float64 + * False : returns an ndarray of integers. bins : numpy.ndarray or IntervalIndex. From 8fbf43327ec2a451ba14964f43b90b46fecd72b2 Mon Sep 17 00:00:00 2001 From: AshaHolla Date: Thu, 19 Oct 2023 00:45:02 +0530 Subject: [PATCH 2/2] include_lowest modifying the dtype of the bins and the changing the lower bound of the range is appropriately documented --- pandas/core/reshape/tile.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 8c935cddfa414..fa7a1feccc9c9 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -102,7 +102,7 @@ def cut( precision : int, default 3 The precision at which to store and display the bins labels. include_lowest : bool, default False - Whether the first interval should be left-inclusive or not. + Whether the first interval should be left-inclusive or not. duplicates : {default 'raise', 'drop'}, optional If bin edges are not unique, raise ValueError or drop non-uniques. ordered : bool, default True @@ -111,6 +111,24 @@ def cut( the resulting categorical will be ordered. If False, the resulting categorical will be unordered (labels must be provided). + Notes + ------- + Using include_lowest shifts the lower bound of x by -0.001, + due to the .1% extension on the range x, which makes the + dtype of bin intervals to change from int64 to float64 + + Examples + -------- + In: + >>> pd.cut(np.array([0, 1, 7]), bins=[0, 3, 6, 8], include_lowest=True) + ... # doctest: +ELLIPSIS + Out: + [(-0.001, 3.0], (-0.001, 3.0], (6.0, 8.0]] + Categories (3, interval[float64]): [(-0.001, 3.0] < (3.0, 6.0] < (6.0, 8.0]] + + The lowermost interval changes from 0 to -0.001 after .1% adjustment + which results in change of dtype from int64 to float64 + Returns ------- out : Categorical, Series, or ndarray @@ -125,22 +143,6 @@ def cut( Categorical for all other inputs. The values stored within are whatever the type in the sequence is. - Note that due to the .1% extension on the range x, the lower bound - shifts by 0.001 which makes the dtype of bin intervals to change from - int64 to float64 - - Examples - ------------------------- - In: - pd.cut(np.array([0, 1, 7]), bins=[0, 3, 6, 8], include_lowest=True) - - Out: - [(-0.001, 3.0], (-0.001, 3.0], (6.0, 8.0]] - Categories (3, interval[float64]): [(-0.001, 3.0] < (3.0, 6.0] < (6.0, 8.0]] - - The lowermost interval changes from 0 to -0.001 after .1% adjustment - which results in change of dtype from int64 to float64 - * False : returns an ndarray of integers. bins : numpy.ndarray or IntervalIndex.