From 83a7c88eafdb1215faa8072e22022dd2cf01ee9c Mon Sep 17 00:00:00 2001 From: Jonathan Enders Date: Sat, 10 Mar 2018 13:43:30 -0500 Subject: [PATCH 1/5] DataFrame.count docstring reworked, examples added --- pandas/core/frame.py | 52 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..5f32726864acf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5592,22 +5592,58 @@ def corrwith(self, other, axis=0, drop=False): def count(self, axis=0, level=None, numeric_only=False): """ - Return Series with number of non-NA/null observations over requested - axis. Works with non-floating point data as well (detects NaN and None) + Count non-NA cells for each column or row. + + Return Series with number of non-NA observations over requested + axis. Works with non-floating point data as well (detects `NaN` and + `None`) Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 - 0 or 'index' for row-wise, 1 or 'columns' for column-wise - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a DataFrame + If equal 0 or 'index' counts are generated for each column. + If equal 1 or 'columns' counts are generated for each row. + level : int or str, optional + If the axis is a `MultiIndex` (hierarchical), count along a + particular level, collapsing into a `DataFrame`. + A `str` specifies the level name. numeric_only : boolean, default False - Include only float, int, boolean data + Include only `float`, `int` or `boolean` data. Returns ------- - count : Series (or DataFrame if level specified) + Series or DataFrame + For each column/row the number of non-NA/null entries. + If level is specified returns a `DataFrame`. + + See Also + -------- + Series.count: number of non-NA elements in a Series + DataFrame.shape: number of DataFrame rows and columns (including NA + elements) + DataFrame.isnull: boolean same-sized DataFrame showing places of NA + elements + + Examples + -------- + >>> df=pd.DataFrame({ "Person":["John","Myla",None], + ... "Age":[24.,np.nan,21.], + ... "Single":[False,True,True] }) + >>> df + Person Age Single + 0 John 24.0 False + 1 Myla NaN True + 2 None 21.0 True + >>> df.count() + Person 2 + Age 2 + Single 3 + dtype: int64 + >>> df.count(axis=1) + 0 3 + 1 2 + 2 2 + dtype: int64 """ axis = self._get_axis_number(axis) if level is not None: From 8d76f60adc4af740eb6fc44c59753ab84ca93f5a Mon Sep 17 00:00:00 2001 From: Jonathan Enders Date: Sat, 10 Mar 2018 16:52:02 -0500 Subject: [PATCH 2/5] review corrections --- pandas/core/frame.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5f32726864acf..c83e67796a2b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5595,14 +5595,14 @@ def count(self, axis=0, level=None, numeric_only=False): Count non-NA cells for each column or row. Return Series with number of non-NA observations over requested - axis. Works with non-floating point data as well (detects `NaN` and - `None`) + axis. Works with non-floating point data as well (detects `None`, + `NaN` and `NaT`) Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 - If equal 0 or 'index' counts are generated for each column. - If equal 1 or 'columns' counts are generated for each row. + If 0 or 'index' counts are generated for each column. + If 1 or 'columns' counts are generated for each row. level : int or str, optional If the axis is a `MultiIndex` (hierarchical), count along a particular level, collapsing into a `DataFrame`. @@ -5626,24 +5626,34 @@ def count(self, axis=0, level=None, numeric_only=False): Examples -------- - >>> df=pd.DataFrame({ "Person":["John","Myla",None], - ... "Age":[24.,np.nan,21.], - ... "Single":[False,True,True] }) + >>> df = pd.DataFrame({"Person": + ... ["John", "Myla", None, "John", "Myla"], + ... "Age": [24., np.nan, 21., 33, 26], + ... "Single": [False, True, True, True, False]}) >>> df Person Age Single 0 John 24.0 False 1 Myla NaN True 2 None 21.0 True + 3 John 33.0 True + 4 Myla 26.0 False >>> df.count() - Person 2 - Age 2 - Single 3 + Person 4 + Age 4 + Single 5 dtype: int64 >>> df.count(axis=1) 0 3 1 2 2 2 + 3 3 + 4 3 dtype: int64 + >>> df.set_index(["Person", "Single"]).count(level="Person") + Age + Person + John 2 + Myla 1 """ axis = self._get_axis_number(axis) if level is not None: From fd1116732a1475051addadfac495cd41a7d73602 Mon Sep 17 00:00:00 2001 From: Jonathan Enders Date: Sun, 11 Mar 2018 12:29:01 -0400 Subject: [PATCH 3/5] review fixes + better description --- pandas/core/frame.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c83e67796a2b6..9b572050f5d8d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5602,7 +5602,7 @@ def count(self, axis=0, level=None, numeric_only=False): ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 If 0 or 'index' counts are generated for each column. - If 1 or 'columns' counts are generated for each row. + If 1 or 'columns' counts are generated for each **row**. level : int or str, optional If the axis is a `MultiIndex` (hierarchical), count along a particular level, collapsing into a `DataFrame`. @@ -5621,11 +5621,13 @@ def count(self, axis=0, level=None, numeric_only=False): Series.count: number of non-NA elements in a Series DataFrame.shape: number of DataFrame rows and columns (including NA elements) - DataFrame.isnull: boolean same-sized DataFrame showing places of NA + DataFrame.isna: boolean same-sized DataFrame showing places of NA elements Examples -------- + Constructing DataFrame from a dictionary: + >>> df = pd.DataFrame({"Person": ... ["John", "Myla", None, "John", "Myla"], ... "Age": [24., np.nan, 21., 33, 26], @@ -5637,18 +5639,27 @@ def count(self, axis=0, level=None, numeric_only=False): 2 None 21.0 True 3 John 33.0 True 4 Myla 26.0 False + + Notice the uncounted NA values: + >>> df.count() Person 4 Age 4 Single 5 dtype: int64 - >>> df.count(axis=1) + + Counts for each **row**: + + >>> df.count(axis='columns') 0 3 1 2 2 2 3 3 4 3 dtype: int64 + + Counts for one level of a `MultiIndex`: + >>> df.set_index(["Person", "Single"]).count(level="Person") Age Person From bbe96aa2668911ba19ea1d1e1dcd49255e091ba9 Mon Sep 17 00:00:00 2001 From: Jonathan Enders Date: Sun, 11 Mar 2018 13:10:57 -0400 Subject: [PATCH 4/5] review fixes, full stop and backticks --- pandas/core/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9b572050f5d8d..d9683ac957b4c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5596,7 +5596,7 @@ def count(self, axis=0, level=None, numeric_only=False): Return Series with number of non-NA observations over requested axis. Works with non-floating point data as well (detects `None`, - `NaN` and `NaT`) + `NaN` and `NaT`). Parameters ---------- @@ -5605,7 +5605,7 @@ def count(self, axis=0, level=None, numeric_only=False): If 1 or 'columns' counts are generated for each **row**. level : int or str, optional If the axis is a `MultiIndex` (hierarchical), count along a - particular level, collapsing into a `DataFrame`. + particular `level`, collapsing into a `DataFrame`. A `str` specifies the level name. numeric_only : boolean, default False Include only `float`, `int` or `boolean` data. @@ -5614,7 +5614,7 @@ def count(self, axis=0, level=None, numeric_only=False): ------- Series or DataFrame For each column/row the number of non-NA/null entries. - If level is specified returns a `DataFrame`. + If `level` is specified returns a `DataFrame`. See Also -------- From dbb84ebcb95df87be72d59e443e7cf81ec138192 Mon Sep 17 00:00:00 2001 From: Jonathan Enders Date: Mon, 12 Mar 2018 09:13:36 -0400 Subject: [PATCH 5/5] review fix: changed summary, added quoting --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d9683ac957b4c..33156bcd381d3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5594,9 +5594,8 @@ def count(self, axis=0, level=None, numeric_only=False): """ Count non-NA cells for each column or row. - Return Series with number of non-NA observations over requested - axis. Works with non-floating point data as well (detects `None`, - `NaN` and `NaT`). + The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending + on `pandas.options.mode.use_inf_as_na`) are considered NA. Parameters ----------