From 87972a5a645ec78695a2eee4e33c8c24cbcbecbb Mon Sep 17 00:00:00 2001 From: William Entriken Date: Fri, 15 Sep 2017 18:28:52 +0100 Subject: [PATCH] Implement mode(dropna), for #17534 --- pandas/core/algorithms.py | 7 +++++-- pandas/core/frame.py | 6 ++++-- pandas/core/series.py | 7 ++++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9f712a1cf039b..bcb5f1dff551c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -634,14 +634,16 @@ def duplicated(values, keep='first'): return f(values, keep=keep) -def mode(values): +def mode(values, dropna=True): """ Returns the mode(s) of an array. Parameters ---------- values : array-like - Array over which to check for duplicate values. + Array over which to check for duplicate values + dropna : boolean, default True + Don't include NaN values. Returns ------- @@ -666,6 +668,7 @@ def mode(values): ndtype = 'object' values = _ensure_object(values) +### TODO: IN HERE IMPLEMENT THE DROPNA PARAMETER f = getattr(htable, "mode_{dtype}".format(dtype=ndtype)) result = f(values) try: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c6f7f9ea2eb37..4e3fd32c71615 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5840,7 +5840,7 @@ def _get_agg_axis(self, axis_num): else: raise ValueError('Axis must be 0 or 1 (got %r)' % axis_num) - def mode(self, axis=0, numeric_only=False): + def mode(self, axis=0, numeric_only=False, dropna=True): """ Gets the mode(s) of each element along the axis selected. Adds a row for each mode per label, fills in gaps with nan. @@ -5858,6 +5858,8 @@ def mode(self, axis=0, numeric_only=False): * 1 or 'columns' : get mode of each row numeric_only : boolean, default False if True, only apply to numeric columns + dropna : boolean, default True + Don't include NaN values. Returns ------- @@ -5874,7 +5876,7 @@ def mode(self, axis=0, numeric_only=False): data = self if not numeric_only else self._get_numeric_data() def f(s): - return s.mode() + return s.mode(dropna=dropna) return data.apply(f, axis=axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1c92c4b8850ee..15d5f9e61ef16 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1266,11 +1266,16 @@ def count(self, level=None): return self._constructor(out, index=lev, dtype='int64').__finalize__(self) - def mode(self): + def mode(self, dropna=True): """Return the mode(s) of the dataset. Always returns Series even if only one value is returned. + Parameters + ---------- + dropna : boolean, default True + Don't include NaN values. + Returns ------- modes : Series (sorted)