From 2448809c9b542b43454b2f3f5981573edec30c6f Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 10:52:01 -0500 Subject: [PATCH 01/25] docstring for clip --- pandas/core/generic.py | 64 +++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f2112729a503..70472aacbc45b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5618,36 +5618,42 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Examples -------- >>> df - 0 1 - 0 0.335232 -1.256177 - 1 -1.367855 0.746646 - 2 0.027753 -1.176076 - 3 0.230930 -0.679613 - 4 1.261967 0.570967 - - >>> df.clip(-1.0, 0.5) - 0 1 - 0 0.335232 -1.000000 - 1 -1.000000 0.500000 - 2 0.027753 -1.000000 - 3 0.230930 -0.679613 - 4 0.500000 0.500000 - - >>> t - 0 -0.3 - 1 -0.2 - 2 -0.1 - 3 0.0 - 4 0.1 - dtype: float64 + a b c + 0 1 4 7 + 1 2 5 8 + 2 3 6 9001 + + + >>> df.clip( lower = 1 , upper = 9 ) + a b c + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + + You can clip each column with different thresholds by passing a Series to the lower/upper argument. + + >>> df + A B + foo -19 1 + bar 12 100 + bizz -5 -5 + + >>> df.clip( lower = pd.Series({'A': -10, 'B': 10}) , axis = 1 ) + + A B + foo -10 10 + bar 12 100 + bizz -5 10 + + Use the axis argument to clip by column or rows. + + >>> df.clip( lower = pd.Series({'foo':-10,'bar':0, 'bizz':10}) , axis = 0 ) + A B + foo -10 1 + bar 12 100 + bizz 10 10 - >>> df.clip(t, t + 1, axis=0) - 0 1 - 0 0.335232 -0.300000 - 1 -0.200000 0.746646 - 2 0.027753 -0.100000 - 3 0.230930 0.000000 - 4 1.100000 0.570967 """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From aaeb2dcfd35beb5a5b01caff8fece74797116845 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 10:56:09 -0500 Subject: [PATCH 02/25] clip docstring --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 70472aacbc45b..26ba8f57b6d6f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5655,6 +5655,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, bizz 10 10 """ + if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From 62afcabf3bee4e813f73d106d0384e7c1e45dc72 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 12:19:31 -0500 Subject: [PATCH 03/25] Edit Docstring --- pandas/core/generic.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 26ba8f57b6d6f..0f75f0b0e89fc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5601,6 +5601,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, """ Trim values at input threshold(s). + Elements above the upper threshold will be changed to upper threshold. + Elements below the lower threshold will be changed to lower threshold. + Parameters ---------- lower : float or array_like, default None @@ -5609,53 +5612,69 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Align object with lower and upper along the given axis. inplace : boolean, default False Whether to perform the operation in place on the data - .. versionadded:: 0.21.0 + .. versionadded:: 0.21.0. Returns ------- clipped : Series + Notes + ----- + Clipping data is a method for dealing with dubious elements. + If some elements are too large or too small, clipping is one way to + transform the data into a reasonable range. + + See Also + -------- + pandas.DataFrame.clip_upper : Return copy of input with values + above given value(s) truncated. + pandas.DataFrame.clip_lower : Return copy of input with values + below given value(s) truncated. + Examples -------- + >>> df = pd.DataFrame({'a':[1, 2, 3], 'b':[4, 5, 6], 'c':[7, 8, 9001]}) >>> df a b c 0 1 4 7 1 2 5 8 2 3 6 9001 - - >>> df.clip( lower = 1 , upper = 9 ) + >>> df.clip(lower = 1, upper = 9) a b c 0 1 4 7 1 2 5 8 2 3 6 9 - - You can clip each column with different thresholds by passing a Series to the lower/upper argument. + You can clip each column with different thresholds by passing + a Series to the lower/upper argument. + >>> some_data = {'A':[-19, 12, -5],'B':[1, 100, -5]} + >>> df = pd.DataFrame(data = some_data, index = ['foo', 'bar', 'bizz']) >>> df A B foo -19 1 bar 12 100 bizz -5 -5 - - >>> df.clip( lower = pd.Series({'A': -10, 'B': 10}) , axis = 1 ) + Use the axis argument to clip by column or rows. Clip column A with + lower threshold of -10 and column B has lower threshold of 10. + + >>> df.clip(lower = pd.Series({'A':-10, 'B':10}), axis = 1) A B foo -10 10 bar 12 100 bizz -5 10 - Use the axis argument to clip by column or rows. + Clip the foo, bar, and bizz rows with lower thresholds -10, 0, and 10. - >>> df.clip( lower = pd.Series({'foo':-10,'bar':0, 'bizz':10}) , axis = 0 ) + >>> row_thresh = pd.Series({'foo':-10, 'bar':0, 'bizz':10}) + >>> df.clip(lower = row_thresh ,axis = 0) A B foo -10 1 bar 12 100 bizz 10 10 - """ - if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From 33af7ce175d59cb43af3f051919f78b64b171284 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 12:37:30 -0500 Subject: [PATCH 04/25] docstrings clip --- pandas/core/generic.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0f75f0b0e89fc..b6e5c6f2e83a4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5616,7 +5616,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Returns ------- - clipped : Series + clipped : DataFrame/Series Notes ----- @@ -5640,17 +5640,17 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, 1 2 5 8 2 3 6 9001 - >>> df.clip(lower = 1, upper = 9) + >>> df.clip(lower=1, upper=9) a b c 0 1 4 7 1 2 5 8 2 3 6 9 You can clip each column with different thresholds by passing - a Series to the lower/upper argument. + a ``Series`` to the lower/upper argument. >>> some_data = {'A':[-19, 12, -5],'B':[1, 100, -5]} - >>> df = pd.DataFrame(data = some_data, index = ['foo', 'bar', 'bizz']) + >>> df = pd.DataFrame(data=some_data, index=['foo', 'bar', 'bizz']) >>> df A B foo -19 1 @@ -5660,7 +5660,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Use the axis argument to clip by column or rows. Clip column A with lower threshold of -10 and column B has lower threshold of 10. - >>> df.clip(lower = pd.Series({'A':-10, 'B':10}), axis = 1) + >>> df.clip(lower=pd.Series({'A':-10, 'B':10}), axis=1) A B foo -10 10 bar 12 100 @@ -5668,8 +5668,8 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Clip the foo, bar, and bizz rows with lower thresholds -10, 0, and 10. - >>> row_thresh = pd.Series({'foo':-10, 'bar':0, 'bizz':10}) - >>> df.clip(lower = row_thresh ,axis = 0) + >>> row_thresh=pd.Series({'foo':-10, 'bar':0, 'bizz':10}) + >>> df.clip(lower=row_thresh, axis=0) A B foo -10 1 bar 12 100 From cb8d7ab2c24bc01e44c88d11e5866eb91e4e54f8 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 13:08:43 -0500 Subject: [PATCH 05/25] clip docstring Added winsoriztion example. --- pandas/core/generic.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b6e5c6f2e83a4..06da176cb4195 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5633,7 +5633,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Examples -------- - >>> df = pd.DataFrame({'a':[1, 2, 3], 'b':[4, 5, 6], 'c':[7, 8, 9001]}) + >>> df=pd.DataFrame({'a':[1, 2, 3], 'b':[4, 5, 6], 'c':[7, 8, 9001]}) >>> df a b c 0 1 4 7 @@ -5646,11 +5646,11 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, 1 2 5 8 2 3 6 9 - You can clip each column with different thresholds by passing + You can clip each column or row with different thresholds by passing a ``Series`` to the lower/upper argument. >>> some_data = {'A':[-19, 12, -5],'B':[1, 100, -5]} - >>> df = pd.DataFrame(data=some_data, index=['foo', 'bar', 'bizz']) + >>> df=pd.DataFrame(data=some_data, index=['foo', 'bar', 'bizz']) >>> df A B foo -19 1 @@ -5674,6 +5674,18 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, foo -10 1 bar 12 100 bizz 10 10 + + `Winsorizing `__ is a way + of removing outliers from data. Columns of a DataFrame can be + winsorized by using clip. + + >>> import numpy as np + >>> x=np.random.normal(size=(1000,3)) + >>> df=pd.DataFrame(x, columns=['a','b','c']) + >>> #Winsorize columns at 5% and 95% + >>> U=df.quantile(0.95) + >>> L=df.quantile(0.5) + >>> winsorized_df=df.clip(lower=L, upper=U, axis = 1) """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From e062a88e3f81c868664ce62cc59c3dcacb9d19a3 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 13:11:15 -0500 Subject: [PATCH 06/25] clip docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 06da176cb4195..680b953e9d6a2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5649,7 +5649,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, You can clip each column or row with different thresholds by passing a ``Series`` to the lower/upper argument. - >>> some_data = {'A':[-19, 12, -5],'B':[1, 100, -5]} + >>> some_data={'A':[-19, 12, -5],'B':[1, 100, -5]} >>> df=pd.DataFrame(data=some_data, index=['foo', 'bar', 'bizz']) >>> df A B From 87f8b5b83d95cd5394c84e83cfee5d4cd31c7435 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 13:20:01 -0500 Subject: [PATCH 07/25] clip docstring --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 680b953e9d6a2..628eccb0d7009 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5617,6 +5617,8 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Returns ------- clipped : DataFrame/Series + Elements above or below the upper and lower thresholds converted to + threshold values. Notes ----- From 1371a237c247983e1d3a8875d79dbb9677e9a69f Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 13:28:17 -0500 Subject: [PATCH 08/25] docstring for pandas.DataFrame.clip --- pandas/core/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 628eccb0d7009..633732930ec79 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5607,7 +5607,11 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Parameters ---------- lower : float or array_like, default None + Lower threshold for clipping. Values smaller than upper will be + converted to lower. upper : float or array_like, default None + Upper threshold for clipping. Values larger than upper will be + converted to upper. axis : int or string axis name, optional Align object with lower and upper along the given axis. inplace : boolean, default False From 674ffb6adf80a0e2edcb50d77a609c8e57a45cf4 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 13:36:30 -0500 Subject: [PATCH 09/25] clip docstring --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 633732930ec79..17faa62725910 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5617,6 +5617,8 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, inplace : boolean, default False Whether to perform the operation in place on the data .. versionadded:: 0.21.0. + args : dictionary of arguments arguments passed to pandas.compat.numpy + kwargs : dictionary of keyword arguments passed to pandas.compat.numpy Returns ------- From 495847d13a5f831e00336d7e33d9c542bbd8666f Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 15:49:30 -0500 Subject: [PATCH 10/25] docstring clip --- pandas/core/generic.py | 53 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 17faa62725910..e41cc5820bc3b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5601,34 +5601,34 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, """ Trim values at input threshold(s). - Elements above the upper threshold will be changed to upper threshold. - Elements below the lower threshold will be changed to lower threshold. + Elements above/below the upper'lower thresholds will be changed to + upper/lower thresholds. Parameters ---------- - lower : float or array_like, default None - Lower threshold for clipping. Values smaller than upper will be - converted to lower. - upper : float or array_like, default None - Upper threshold for clipping. Values larger than upper will be - converted to upper. - axis : int or string axis name, optional + lower : float, array-like or None, default None + Lower threshold for clipping. Values smaller than `lower` will be + converted to `lower`. + upper : float, array-like or None, default None + Upper threshold for clipping. Values larger than `upper` will be + converted to `upper`. + axis : {0 or 'index', 1 or 'columns', None}, default None Align object with lower and upper along the given axis. inplace : boolean, default False Whether to perform the operation in place on the data .. versionadded:: 0.21.0. - args : dictionary of arguments arguments passed to pandas.compat.numpy - kwargs : dictionary of keyword arguments passed to pandas.compat.numpy + *args : arguments passed to pandas.compat.numpy + **kwargs : keyword arguments passed to pandas.compat.numpy Returns ------- - clipped : DataFrame/Series + clipped : `Series` or `DataFrame`. Elements above or below the upper and lower thresholds converted to threshold values. Notes ----- - Clipping data is a method for dealing with dubious elements. + Clipping data is a method for dealing with out-of-range elements. If some elements are too large or too small, clipping is one way to transform the data into a reasonable range. @@ -5638,10 +5638,12 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, above given value(s) truncated. pandas.DataFrame.clip_lower : Return copy of input with values below given value(s) truncated. + pandas.Series.clip : Trim values at input threshold(s). Examples -------- - >>> df=pd.DataFrame({'a':[1, 2, 3], 'b':[4, 5, 6], 'c':[7, 8, 9001]}) + >>> some_data = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9001]} + >>> df = pd.DataFrame(some_data) >>> df a b c 0 1 4 7 @@ -5657,8 +5659,8 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, You can clip each column or row with different thresholds by passing a ``Series`` to the lower/upper argument. - >>> some_data={'A':[-19, 12, -5],'B':[1, 100, -5]} - >>> df=pd.DataFrame(data=some_data, index=['foo', 'bar', 'bizz']) + >>> some_data = {'A': [-19, 12, -5], 'B': [1, 100, -5]} + >>> df = pd.DataFrame(data=some_data, index=['foo', 'bar', 'bizz']) >>> df A B foo -19 1 @@ -5668,7 +5670,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Use the axis argument to clip by column or rows. Clip column A with lower threshold of -10 and column B has lower threshold of 10. - >>> df.clip(lower=pd.Series({'A':-10, 'B':10}), axis=1) + >>> df.clip(lower=pd.Series({'A': -10, 'B': 10}), axis=1) A B foo -10 10 bar 12 100 @@ -5676,7 +5678,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Clip the foo, bar, and bizz rows with lower thresholds -10, 0, and 10. - >>> row_thresh=pd.Series({'foo':-10, 'bar':0, 'bizz':10}) + >>> row_thresh = pd.Series({'foo': -10, 'bar': 0, 'bizz': 10}) >>> df.clip(lower=row_thresh, axis=0) A B foo -10 1 @@ -5685,15 +5687,12 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, `Winsorizing `__ is a way of removing outliers from data. Columns of a DataFrame can be - winsorized by using clip. - - >>> import numpy as np - >>> x=np.random.normal(size=(1000,3)) - >>> df=pd.DataFrame(x, columns=['a','b','c']) - >>> #Winsorize columns at 5% and 95% - >>> U=df.quantile(0.95) - >>> L=df.quantile(0.5) - >>> winsorized_df=df.clip(lower=L, upper=U, axis = 1) + winsorized at 5th and 95th percentile by using clip. + + >>> x = np.random.normal(size=(1000,3)) + >>> U = df.quantile(0.95) + >>> L = df.quantile(0.5) + >>> winsorized_df = df.clip(lower=L, upper=U, axis = 1) """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From 89a7690048d9cdddba93fe82268a310e37240c29 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 15:52:30 -0500 Subject: [PATCH 11/25] docstring clip --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e41cc5820bc3b..8197d71f1990b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5601,7 +5601,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, """ Trim values at input threshold(s). - Elements above/below the upper'lower thresholds will be changed to + Elements above/below the upper/lower thresholds will be changed to upper/lower thresholds. Parameters From 131c052b56588ca313d6c7e1b8c2bccf1416da86 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 17:16:40 -0500 Subject: [PATCH 12/25] clip docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8197d71f1990b..48dab1ed92961 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5692,7 +5692,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, >>> x = np.random.normal(size=(1000,3)) >>> U = df.quantile(0.95) >>> L = df.quantile(0.5) - >>> winsorized_df = df.clip(lower=L, upper=U, axis = 1) + >>> winsorized_df = df.clip(lower=L, upper=U, axis=1) """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From 089e882d3594df756d49e428a8bba88f97ed95f2 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 17:40:39 -0500 Subject: [PATCH 13/25] docstring clip Possible documentation for args and kwargs --- pandas/core/generic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 48dab1ed92961..2bb1d9aac7e45 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5617,9 +5617,10 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, inplace : boolean, default False Whether to perform the operation in place on the data .. versionadded:: 0.21.0. - *args : arguments passed to pandas.compat.numpy - **kwargs : keyword arguments passed to pandas.compat.numpy - + *args : Additional keywords have no effect but might be accepted + for compatibility with numpy. + **kwargs : Additional keywords have no effect but might be accepted + for compatibility with numpy. Returns ------- clipped : `Series` or `DataFrame`. From 1089460dc8562c247b697007c0c461dd444d56d7 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 23:26:50 -0500 Subject: [PATCH 14/25] clip docstring --- pandas/core/generic.py | 95 +++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 56 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2bb1d9aac7e45..f703fc02f69be 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5619,81 +5619,64 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, .. versionadded:: 0.21.0. *args : Additional keywords have no effect but might be accepted for compatibility with numpy. - **kwargs : Additional keywords have no effect but might be accepted + **kwargs : Additional keywords have no effect but might be accepted for compatibility with numpy. + Returns ------- - clipped : `Series` or `DataFrame`. - Elements above or below the upper and lower thresholds converted to - threshold values. - - Notes - ----- - Clipping data is a method for dealing with out-of-range elements. - If some elements are too large or too small, clipping is one way to - transform the data into a reasonable range. + `Series` or `DataFrame`. + DataFrame is returned with those values above/below the + `upper`/`'lower` thresholds set to the threshold values. See Also -------- - pandas.DataFrame.clip_upper : Return copy of input with values - above given value(s) truncated. - pandas.DataFrame.clip_lower : Return copy of input with values - below given value(s) truncated. pandas.Series.clip : Trim values at input threshold(s). Examples -------- >>> some_data = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9001]} - >>> df = pd.DataFrame(some_data) + >>> df = pd.DataFrame(some_data, index = ['foo','bar','foobar']) >>> df a b c - 0 1 4 7 - 1 2 5 8 - 2 3 6 9001 + foo 1 4 7 + bar 2 5 8 + foobar 3 6 9001 >>> df.clip(lower=1, upper=9) a b c - 0 1 4 7 - 1 2 5 8 - 2 3 6 9 + foo 1 4 7 + bar 2 5 8 + foobar 3 6 9 You can clip each column or row with different thresholds by passing - a ``Series`` to the lower/upper argument. + a ``Series`` to the lower/upper argument. Use the axis argument to clip + by column or rows. - >>> some_data = {'A': [-19, 12, -5], 'B': [1, 100, -5]} - >>> df = pd.DataFrame(data=some_data, index=['foo', 'bar', 'bizz']) - >>> df - A B - foo -19 1 - bar 12 100 - bizz -5 -5 - - Use the axis argument to clip by column or rows. Clip column A with - lower threshold of -10 and column B has lower threshold of 10. - - >>> df.clip(lower=pd.Series({'A': -10, 'B': 10}), axis=1) - A B - foo -10 10 - bar 12 100 - bizz -5 10 - - Clip the foo, bar, and bizz rows with lower thresholds -10, 0, and 10. - - >>> row_thresh = pd.Series({'foo': -10, 'bar': 0, 'bizz': 10}) - >>> df.clip(lower=row_thresh, axis=0) - A B - foo -10 1 - bar 12 100 - bizz 10 10 - - `Winsorizing `__ is a way - of removing outliers from data. Columns of a DataFrame can be - winsorized at 5th and 95th percentile by using clip. - - >>> x = np.random.normal(size=(1000,3)) - >>> U = df.quantile(0.95) - >>> L = df.quantile(0.5) - >>> winsorized_df = df.clip(lower=L, upper=U, axis=1) + >>> col_thresh = pd.Series({'a':4, 'b':5, 'c':6}) + >>> df.clip(lower=col_thresh, axis='columns') + a b c + foo 4 5 7 + bar 4 5 8 + foobar 4 6 9001 + + Clip the foo, bar, and foobar rows with lower thresholds 5, 7, and 10. + + >>> row_thresh = pd.Series({'foo': 5, 'bar': 7, 'foobar': 10}) + >>> df.clip(lower=row_thresh, axis='index') + a b c + foo 5 5 7 + bar 7 7 8 + foobar 10 10 9001 + + Clipping data is a method for dealing with out-of-range elements. + If some elements are too large or too small, clipping is one way to + transform the data into a reasonable range. + `Winsorizing `__ is a related + method, whereby the data are clipped at the 5th and 95th percentiles. + + >>> lwr_thresh = df.quantile(0.05) + >>> upr_thresh = df.quantile(0.95) + >>> df_win = df.clip(lower=lwr_thresh, upper=upr_thresh, axis='columns') """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From 60c34db3386ff00ab41b52a8b2945cd664df034d Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 23:30:13 -0500 Subject: [PATCH 15/25] docstring clip --- pandas/core/generic.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f703fc02f69be..346d47b409759 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5625,7 +5625,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Returns ------- `Series` or `DataFrame`. - DataFrame is returned with those values above/below the + DataFrame is returned with those values above/below the `upper`/`'lower` thresholds set to the threshold values. See Also @@ -5649,7 +5649,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, foobar 3 6 9 You can clip each column or row with different thresholds by passing - a ``Series`` to the lower/upper argument. Use the axis argument to clip + a ``Series`` to the lower/upper argument. Use the axis argument to clip by column or rows. >>> col_thresh = pd.Series({'a':4, 'b':5, 'c':6}) @@ -5671,12 +5671,13 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Clipping data is a method for dealing with out-of-range elements. If some elements are too large or too small, clipping is one way to transform the data into a reasonable range. - `Winsorizing `__ is a related - method, whereby the data are clipped at the 5th and 95th percentiles. + `Winsorizing `__ is a + related method, whereby the data are clipped at + the 5th and 95th percentiles. >>> lwr_thresh = df.quantile(0.05) >>> upr_thresh = df.quantile(0.95) - >>> df_win = df.clip(lower=lwr_thresh, upper=upr_thresh, axis='columns') + >>> dfw = df.clip(lower=lwr_thresh, upper=upr_thresh, axis='columns') """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From 9aeda8e909ab7279a3fe4dbb395a782f3927ea6f Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 23:31:39 -0500 Subject: [PATCH 16/25] clip docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 346d47b409759..b3ef20871a31c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5613,7 +5613,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Upper threshold for clipping. Values larger than `upper` will be converted to `upper`. axis : {0 or 'index', 1 or 'columns', None}, default None - Align object with lower and upper along the given axis. + Apply clip by index (i.e. by rows) or columns. inplace : boolean, default False Whether to perform the operation in place on the data .. versionadded:: 0.21.0. From 75eecd18229d75af60b5a8bbaf7f499c7cd168d5 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sat, 10 Mar 2018 23:39:01 -0500 Subject: [PATCH 17/25] clip docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b3ef20871a31c..c45117470ea42 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5635,7 +5635,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Examples -------- >>> some_data = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9001]} - >>> df = pd.DataFrame(some_data, index = ['foo','bar','foobar']) + >>> df = pd.DataFrame(some_data, index = ['foo', 'bar', 'foobar']) >>> df a b c foo 1 4 7 From 504389fa617cacc6cad9724c20dedee8c4466f65 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sun, 11 Mar 2018 17:29:11 -0400 Subject: [PATCH 18/25] clip docstring --- pandas/core/generic.py | 73 ++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c45117470ea42..74d0f7f106555 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5602,7 +5602,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Trim values at input threshold(s). Elements above/below the upper/lower thresholds will be changed to - upper/lower thresholds. + upper/lower thresholds. Clipping data is a method for dealing with + out-of-range elements. If some elements are too large or too small, + clipping is one way to transform the data into a reasonable range. Parameters ---------- @@ -5617,67 +5619,70 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, inplace : boolean, default False Whether to perform the operation in place on the data .. versionadded:: 0.21.0. - *args : Additional keywords have no effect but might be accepted - for compatibility with numpy. - **kwargs : Additional keywords have no effect but might be accepted + *args, **kwargs + Additional keywords have no effect but might be accepted for compatibility with numpy. Returns ------- `Series` or `DataFrame`. - DataFrame is returned with those values above/below the + Original input with those values above/below the `upper`/`'lower` thresholds set to the threshold values. See Also -------- - pandas.Series.clip : Trim values at input threshold(s). + Series.clip : Trim values at input threshold(s). + DataFrame.clip_upper : Return copy of input with values above given + value(s) truncated. + DataFrame.clip_lower : Return copy of the input with values below given + value(s) truncated. Examples -------- - >>> some_data = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9001]} - >>> df = pd.DataFrame(some_data, index = ['foo', 'bar', 'foobar']) + >>> some_data = {'a': [-1, -2, -100], 'b': [1, 2, 100]} + >>> df=pd.DataFrame(some_data, index = ['foo', 'bar', 'foobar']) >>> df - a b c - foo 1 4 7 - bar 2 5 8 - foobar 3 6 9001 + a b + foo -1 1 + bar -2 2 + foobar -100 100 - >>> df.clip(lower=1, upper=9) - a b c - foo 1 4 7 - bar 2 5 8 - foobar 3 6 9 + >>> df.clip(lower=-10, upper=10) + a b + foo -1 1 + bar -2 2 + foobar -10 10 You can clip each column or row with different thresholds by passing a ``Series`` to the lower/upper argument. Use the axis argument to clip by column or rows. - >>> col_thresh = pd.Series({'a':4, 'b':5, 'c':6}) + >>> col_thresh=pd.Series({'a':-5, 'b':5}) >>> df.clip(lower=col_thresh, axis='columns') - a b c - foo 4 5 7 - bar 4 5 8 - foobar 4 6 9001 + a b + foo -1 5 + bar -2 5 + foobar -5 100 Clip the foo, bar, and foobar rows with lower thresholds 5, 7, and 10. - >>> row_thresh = pd.Series({'foo': 5, 'bar': 7, 'foobar': 10}) + >>> row_thresh=pd.Series({'foo': 0, 'bar': 1, 'foobar': 10}) >>> df.clip(lower=row_thresh, axis='index') - a b c - foo 5 5 7 - bar 7 7 8 - foobar 10 10 9001 - - Clipping data is a method for dealing with out-of-range elements. - If some elements are too large or too small, clipping is one way to - transform the data into a reasonable range. + a b + foo 0 1 + bar 1 2 + foobar 10 100 + `Winsorizing `__ is a related method, whereby the data are clipped at the 5th and 95th percentiles. - >>> lwr_thresh = df.quantile(0.05) - >>> upr_thresh = df.quantile(0.95) - >>> dfw = df.clip(lower=lwr_thresh, upper=upr_thresh, axis='columns') + >>> lower, upper = df.quantile(0.05), df.quantile(0.95) + >>> df.clip(lower=lower, upper=upper, axis='columns') + a b + foo -1.1 1.1 + bar -2.0 2.0 + foobar -90.2 90.2 """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") From 694f9fbc48d9cc5e7e4768d72ceda29f0de56c87 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sun, 11 Mar 2018 17:32:12 -0400 Subject: [PATCH 19/25] clip docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 74d0f7f106555..095067e706353 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5639,7 +5639,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Examples -------- - >>> some_data = {'a': [-1, -2, -100], 'b': [1, 2, 100]} + >>> some_data={'a': [-1, -2, -100], 'b': [1, 2, 100]} >>> df=pd.DataFrame(some_data, index = ['foo', 'bar', 'foobar']) >>> df a b From af991cc6932efe158ef0776a846d8ef37b62724e Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sun, 11 Mar 2018 17:51:04 -0400 Subject: [PATCH 20/25] clip docstring PEP8 --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 095067e706353..2460e7a9a696c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5639,8 +5639,8 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Examples -------- - >>> some_data={'a': [-1, -2, -100], 'b': [1, 2, 100]} - >>> df=pd.DataFrame(some_data, index = ['foo', 'bar', 'foobar']) + >>> some_data = {'a': [-1, -2, -100], 'b': [1, 2, 100]} + >>> df = pd.DataFrame(some_data, index = ['foo', 'bar', 'foobar']) >>> df a b foo -1 1 @@ -5657,7 +5657,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, a ``Series`` to the lower/upper argument. Use the axis argument to clip by column or rows. - >>> col_thresh=pd.Series({'a':-5, 'b':5}) + >>> col_thresh = pd.Series({'a': -5, 'b': 5}) >>> df.clip(lower=col_thresh, axis='columns') a b foo -1 5 @@ -5666,7 +5666,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Clip the foo, bar, and foobar rows with lower thresholds 5, 7, and 10. - >>> row_thresh=pd.Series({'foo': 0, 'bar': 1, 'foobar': 10}) + >>> row_thresh = pd.Series({'foo': 0, 'bar': 1, 'foobar': 10}) >>> df.clip(lower=row_thresh, axis='index') a b foo 0 1 From 141e8c92dbd83dabce655d3dbc96fc969d60cf3d Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sun, 11 Mar 2018 18:01:40 -0400 Subject: [PATCH 21/25] clip docstring --- pandas/core/generic.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2460e7a9a696c..cd23e1183b161 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5631,11 +5631,18 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, See Also -------- + DataFrame.clip : Trim values at input threshold(s). Series.clip : Trim values at input threshold(s). - DataFrame.clip_upper : Return copy of input with values above given + Series.clip_lower : Return copy of the input with values below given + value(s) truncated. + Series.clip_upper : Return copy of input with values above given value(s) truncated. DataFrame.clip_lower : Return copy of the input with values below given value(s) truncated. + DataFrame.clip_upper : Return copy of input with values above given + value(s) truncated. + DataFrame.quantile : Return values at the given quantile over requested + axis, a la numpy.percentile. Examples -------- From 383c56d0ee750ec4d9131e680785415a246aa70c Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sun, 11 Mar 2018 18:43:57 -0400 Subject: [PATCH 22/25] docstring clip --- pandas/core/generic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cd23e1183b161..f463c1e60f13d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5646,8 +5646,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Examples -------- - >>> some_data = {'a': [-1, -2, -100], 'b': [1, 2, 100]} - >>> df = pd.DataFrame(some_data, index = ['foo', 'bar', 'foobar']) + >>> df = pd.DataFrame({'a': [-1, -2, -100], + ... 'b': [1, 2, 100]}, + ... index=['foo', 'bar', 'foobar']) >>> df a b foo -1 1 @@ -5682,7 +5683,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, `Winsorizing `__ is a related method, whereby the data are clipped at - the 5th and 95th percentiles. + the 5th and 95th percentiles. The ``DataFrame.quantile`` method returns + a ``Series`` with column names as index and the quantiles as values. + Use ``axis='columns'`` to apply clipping to columns. >>> lower, upper = df.quantile(0.05), df.quantile(0.95) >>> df.clip(lower=lower, upper=upper, axis='columns') From 3f1c7854ede5300def99e061373b46a78630b489 Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Sun, 11 Mar 2018 18:44:54 -0400 Subject: [PATCH 23/25] docstring clip --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f463c1e60f13d..58fdddd51ed11 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5627,7 +5627,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, ------- `Series` or `DataFrame`. Original input with those values above/below the - `upper`/`'lower` thresholds set to the threshold values. + `upper`/`lower` thresholds set to the threshold values. See Also -------- From 90d8268c8b0d1886bb955b5c20917c7000c25a6e Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Tue, 13 Mar 2018 11:14:21 -0400 Subject: [PATCH 24/25] docstring clip --- pandas/core/generic.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 58fdddd51ed11..aa41ebb779106 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5629,6 +5629,12 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Original input with those values above/below the `upper`/`lower` thresholds set to the threshold values. + Notes + ----- + + .. [1] Tukey, John W. "The future of data analysis." The annals of + mathematical statistics 33.1 (1962): 1-67. + See Also -------- DataFrame.clip : Trim values at input threshold(s). @@ -5650,13 +5656,13 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, ... 'b': [1, 2, 100]}, ... index=['foo', 'bar', 'foobar']) >>> df - a b + a b foo -1 1 bar -2 2 foobar -100 100 >>> df.clip(lower=-10, upper=10) - a b + a b foo -1 1 bar -2 2 foobar -10 10 @@ -5667,7 +5673,7 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, >>> col_thresh = pd.Series({'a': -5, 'b': 5}) >>> df.clip(lower=col_thresh, axis='columns') - a b + a b foo -1 5 bar -2 5 foobar -5 100 @@ -5681,15 +5687,14 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, bar 1 2 foobar 10 100 - `Winsorizing `__ is a - related method, whereby the data are clipped at + Winsorizing [1]_ is a related method, whereby the data are clipped at the 5th and 95th percentiles. The ``DataFrame.quantile`` method returns a ``Series`` with column names as index and the quantiles as values. Use ``axis='columns'`` to apply clipping to columns. >>> lower, upper = df.quantile(0.05), df.quantile(0.95) >>> df.clip(lower=lower, upper=upper, axis='columns') - a b + a b foo -1.1 1.1 bar -2.0 2.0 foobar -90.2 90.2 From bd26d0f8efe2732774701865ae6b3e85a378ba8c Mon Sep 17 00:00:00 2001 From: Demetri Pananos Date: Tue, 13 Mar 2018 11:28:53 -0400 Subject: [PATCH 25/25] docstring clip --- pandas/core/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index aa41ebb779106..6b79ed8028d24 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5629,9 +5629,8 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, Original input with those values above/below the `upper`/`lower` thresholds set to the threshold values. - Notes + References ----- - .. [1] Tukey, John W. "The future of data analysis." The annals of mathematical statistics 33.1 (1962): 1-67.