|
15 | 15 | from pandas.compat import lrange, product, PY35
|
16 | 16 | from pandas import (compat, isna, notna, DataFrame, Series,
|
17 | 17 | MultiIndex, date_range, Timestamp, Categorical,
|
18 |
| - _np_version_under1p12, _np_version_under1p15) |
| 18 | + _np_version_under1p12, _np_version_under1p15, |
| 19 | + to_datetime, to_timedelta) |
19 | 20 | import pandas as pd
|
20 | 21 | import pandas.core.nanops as nanops
|
21 | 22 | import pandas.core.algorithms as algorithms
|
22 |
| -import pandas.io.formats.printing as printing |
23 | 23 |
|
24 | 24 | import pandas.util.testing as tm
|
25 | 25 | import pandas.util._test_decorators as td
|
@@ -840,54 +840,74 @@ def wrapper(x):
|
840 | 840 | expected = pd.Series(unit, index=r1.index, dtype=r1.dtype)
|
841 | 841 | tm.assert_series_equal(r1, expected)
|
842 | 842 |
|
843 |
| - def test_mode(self): |
844 |
| - df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11], |
845 |
| - "B": [10, 10, 10, np.nan, 3, 4], |
846 |
| - "C": [8, 8, 8, 9, 9, 9], |
847 |
| - "D": np.arange(6, dtype='int64'), |
848 |
| - "E": [8, 8, 1, 1, 3, 3]}) |
849 |
| - tm.assert_frame_equal(df[["A"]].mode(), |
850 |
| - pd.DataFrame({"A": [12]})) |
851 |
| - expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ |
852 |
| - to_frame() |
853 |
| - tm.assert_frame_equal(df[["D"]].mode(), expected) |
854 |
| - expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() |
855 |
| - tm.assert_frame_equal(df[["E"]].mode(), expected) |
856 |
| - tm.assert_frame_equal(df[["A", "B"]].mode(), |
857 |
| - pd.DataFrame({"A": [12], "B": [10.]})) |
858 |
| - tm.assert_frame_equal(df.mode(), |
859 |
| - pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, |
860 |
| - np.nan, np.nan], |
861 |
| - "B": [10, np.nan, np.nan, np.nan, |
862 |
| - np.nan, np.nan], |
863 |
| - "C": [8, 9, np.nan, np.nan, np.nan, |
864 |
| - np.nan], |
865 |
| - "D": [0, 1, 2, 3, 4, 5], |
866 |
| - "E": [1, 3, 8, np.nan, np.nan, |
867 |
| - np.nan]})) |
868 |
| - |
869 |
| - # outputs in sorted order |
870 |
| - df["C"] = list(reversed(df["C"])) |
871 |
| - printing.pprint_thing(df["C"]) |
872 |
| - printing.pprint_thing(df["C"].mode()) |
873 |
| - a, b = (df[["A", "B", "C"]].mode(), |
874 |
| - pd.DataFrame({"A": [12, np.nan], |
875 |
| - "B": [10, np.nan], |
876 |
| - "C": [8, 9]})) |
877 |
| - printing.pprint_thing(a) |
878 |
| - printing.pprint_thing(b) |
879 |
| - tm.assert_frame_equal(a, b) |
880 |
| - # should work with heterogeneous types |
881 |
| - df = pd.DataFrame({"A": np.arange(6, dtype='int64'), |
882 |
| - "B": pd.date_range('2011', periods=6), |
883 |
| - "C": list('abcdef')}) |
884 |
| - exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), |
885 |
| - dtype=df["A"].dtype), |
886 |
| - "B": pd.Series(pd.date_range('2011', periods=6), |
887 |
| - dtype=df["B"].dtype), |
888 |
| - "C": pd.Series(list('abcdef'), |
889 |
| - dtype=df["C"].dtype)}) |
890 |
| - tm.assert_frame_equal(df.mode(), exp) |
| 843 | + @pytest.mark.parametrize("dropna, expected", [ |
| 844 | + (True, {'A': [12], |
| 845 | + 'B': [10.0], |
| 846 | + 'C': [1.0], |
| 847 | + 'D': ['a'], |
| 848 | + 'E': Categorical(['a'], categories=['a']), |
| 849 | + 'F': to_datetime(['2000-1-2']), |
| 850 | + 'G': to_timedelta(['1 days'])}), |
| 851 | + (False, {'A': [12], |
| 852 | + 'B': [10.0], |
| 853 | + 'C': [np.nan], |
| 854 | + 'D': np.array([np.nan], dtype=object), |
| 855 | + 'E': Categorical([np.nan], categories=['a']), |
| 856 | + 'F': [pd.NaT], |
| 857 | + 'G': to_timedelta([pd.NaT])}), |
| 858 | + (True, {'H': [8, 9, np.nan, np.nan], |
| 859 | + 'I': [8, 9, np.nan, np.nan], |
| 860 | + 'J': [1, np.nan, np.nan, np.nan], |
| 861 | + 'K': Categorical(['a', np.nan, np.nan, np.nan], |
| 862 | + categories=['a']), |
| 863 | + 'L': to_datetime(['2000-1-2', 'NaT', 'NaT', 'NaT']), |
| 864 | + 'M': to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
| 865 | + 'N': [0, 1, 2, 3]}), |
| 866 | + (False, {'H': [8, 9, np.nan, np.nan], |
| 867 | + 'I': [8, 9, np.nan, np.nan], |
| 868 | + 'J': [1, np.nan, np.nan, np.nan], |
| 869 | + 'K': Categorical([np.nan, 'a', np.nan, np.nan], |
| 870 | + categories=['a']), |
| 871 | + 'L': to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
| 872 | + 'M': to_timedelta(['nan', '1 days', 'nan', 'nan']), |
| 873 | + 'N': [0, 1, 2, 3]}) |
| 874 | + ]) |
| 875 | + def test_mode_dropna(self, dropna, expected): |
| 876 | + |
| 877 | + df = DataFrame({"A": [12, 12, 19, 11], |
| 878 | + "B": [10, 10, np.nan, 3], |
| 879 | + "C": [1, np.nan, np.nan, np.nan], |
| 880 | + "D": [np.nan, np.nan, 'a', np.nan], |
| 881 | + "E": Categorical([np.nan, np.nan, 'a', np.nan]), |
| 882 | + "F": to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
| 883 | + "G": to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
| 884 | + "H": [8, 8, 9, 9], |
| 885 | + "I": [9, 9, 8, 8], |
| 886 | + "J": [1, 1, np.nan, np.nan], |
| 887 | + "K": Categorical(['a', np.nan, 'a', np.nan]), |
| 888 | + "L": to_datetime(['2000-1-2', '2000-1-2', |
| 889 | + 'NaT', 'NaT']), |
| 890 | + "M": to_timedelta(['1 days', 'nan', |
| 891 | + '1 days', 'nan']), |
| 892 | + "N": np.arange(4, dtype='int64')}) |
| 893 | + |
| 894 | + result = df[sorted(list(expected.keys()))].mode(dropna=dropna) |
| 895 | + expected = DataFrame(expected) |
| 896 | + tm.assert_frame_equal(result, expected) |
| 897 | + |
| 898 | + @pytest.mark.skipif(not compat.PY3, reason="only PY3") |
| 899 | + def test_mode_sortwarning(self): |
| 900 | + # Check for the warning that is raised when the mode |
| 901 | + # results cannot be sorted |
| 902 | + |
| 903 | + df = DataFrame({"A": [np.nan, np.nan, 'a', 'a']}) |
| 904 | + expected = DataFrame({'A': ['a', np.nan]}) |
| 905 | + |
| 906 | + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): |
| 907 | + result = df.mode(dropna=False) |
| 908 | + result = result.sort_values(by='A').reset_index(drop=True) |
| 909 | + |
| 910 | + tm.assert_frame_equal(result, expected) |
891 | 911 |
|
892 | 912 | def test_operators_timedelta64(self):
|
893 | 913 | from datetime import timedelta
|
|
0 commit comments