|
20 | 20 | import pandas as pd
|
21 | 21 | import pandas.core.nanops as nanops
|
22 | 22 | import pandas.core.algorithms as algorithms
|
23 |
| -import pandas.io.formats.printing as printing |
24 | 23 |
|
25 | 24 | import pandas.util.testing as tm
|
26 | 25 | import pandas.util._test_decorators as td
|
@@ -841,87 +840,62 @@ def wrapper(x):
|
841 | 840 | expected = pd.Series(unit, index=r1.index, dtype=r1.dtype)
|
842 | 841 | tm.assert_series_equal(r1, expected)
|
843 | 842 |
|
844 |
| - def test_mode(self): |
845 |
| - df = pd.DataFrame({"A": [12, 12, 11, 12, 19, 11], |
846 |
| - "B": [10, 10, 10, np.nan, 3, 4], |
847 |
| - "C": [8, 8, 8, 9, 9, 9], |
848 |
| - "D": np.arange(6, dtype='int64'), |
849 |
| - "E": [8, 8, 1, 1, 3, 3]}) |
850 |
| - tm.assert_frame_equal(df[["A"]].mode(), |
851 |
| - pd.DataFrame({"A": [12]})) |
852 |
| - expected = pd.Series([0, 1, 2, 3, 4, 5], dtype='int64', name='D').\ |
853 |
| - to_frame() |
854 |
| - tm.assert_frame_equal(df[["D"]].mode(), expected) |
855 |
| - expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame() |
856 |
| - tm.assert_frame_equal(df[["E"]].mode(), expected) |
857 |
| - tm.assert_frame_equal(df[["A", "B"]].mode(), |
858 |
| - pd.DataFrame({"A": [12], "B": [10.]})) |
859 |
| - tm.assert_frame_equal(df.mode(), |
860 |
| - pd.DataFrame({"A": [12, np.nan, np.nan, np.nan, |
861 |
| - np.nan, np.nan], |
862 |
| - "B": [10, np.nan, np.nan, np.nan, |
863 |
| - np.nan, np.nan], |
864 |
| - "C": [8, 9, np.nan, np.nan, np.nan, |
865 |
| - np.nan], |
866 |
| - "D": [0, 1, 2, 3, 4, 5], |
867 |
| - "E": [1, 3, 8, np.nan, np.nan, |
868 |
| - np.nan]})) |
869 |
| - |
870 |
| - # outputs in sorted order |
871 |
| - df["C"] = list(reversed(df["C"])) |
872 |
| - printing.pprint_thing(df["C"]) |
873 |
| - printing.pprint_thing(df["C"].mode()) |
874 |
| - a, b = (df[["A", "B", "C"]].mode(), |
875 |
| - pd.DataFrame({"A": [12, np.nan], |
876 |
| - "B": [10, np.nan], |
877 |
| - "C": [8, 9]})) |
878 |
| - printing.pprint_thing(a) |
879 |
| - printing.pprint_thing(b) |
880 |
| - tm.assert_frame_equal(a, b) |
881 |
| - # should work with heterogeneous types |
882 |
| - df = pd.DataFrame({"A": np.arange(6, dtype='int64'), |
883 |
| - "B": pd.date_range('2011', periods=6), |
884 |
| - "C": list('abcdef')}) |
885 |
| - exp = pd.DataFrame({"A": pd.Series(np.arange(6, dtype='int64'), |
886 |
| - dtype=df["A"].dtype), |
887 |
| - "B": pd.Series(pd.date_range('2011', periods=6), |
888 |
| - dtype=df["B"].dtype), |
889 |
| - "C": pd.Series(list('abcdef'), |
890 |
| - dtype=df["C"].dtype)}) |
891 |
| - tm.assert_frame_equal(df.mode(), exp) |
892 |
| - |
893 |
| - def test_mode_dropna(self): |
894 |
| - # GH 17534 |
895 |
| - # Test the dropna=False parameter for mode |
896 |
| - |
897 |
| - df = pd.DataFrame({"A": [1, np.nan, np.nan, np.nan], |
898 |
| - "B": [np.nan, np.nan, 'a', np.nan], |
899 |
| - "C": Categorical([np.nan, np.nan, 'a', np.nan]), |
900 |
| - "D": to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
901 |
| - "E": to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
902 |
| - "F": [1, 1, np.nan, np.nan], |
903 |
| - "G": [np.nan, np.nan, 'a', 'a'], |
904 |
| - "H": Categorical(['a', np.nan, 'a', np.nan]), |
905 |
| - "I": to_datetime(['2000-1-2', '2000-1-2', |
| 843 | + @pytest.mark.parametrize("dropna, expected", [ |
| 844 | + (True, {'A': [12], |
| 845 | + 'B': [10.0], |
| 846 | + 'C': [1.0], |
| 847 | + 'D': ['a'], |
| 848 | + 'E': Categorical(['a'], categories=['a']), |
| 849 | + 'F': to_datetime(['2000-1-2']), |
| 850 | + 'G': to_timedelta(['1 days'])}), |
| 851 | + (False, {'A': [12], |
| 852 | + 'B': [10.0], |
| 853 | + 'C': [np.nan], |
| 854 | + 'D': np.array([np.nan], dtype=object), |
| 855 | + 'E': Categorical([np.nan], categories=['a']), |
| 856 | + 'F': [pd.NaT], |
| 857 | + 'G': to_timedelta([pd.NaT])}), |
| 858 | + (True, {'H': [8, 9, np.nan, np.nan], |
| 859 | + 'I': [8, 9, np.nan, np.nan], |
| 860 | + 'J': [1, np.nan, np.nan, np.nan], |
| 861 | + 'K': ['a', np.nan, np.nan, np.nan], |
| 862 | + 'L': Categorical(['a', np.nan, np.nan, np.nan], |
| 863 | + categories=['a']), |
| 864 | + 'M': to_datetime(['2000-1-2', 'NaT', 'NaT', 'NaT']), |
| 865 | + 'N': to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
| 866 | + 'O': [0, 1, 2, 3]}), |
| 867 | + (False, {'H': [8, 9, np.nan, np.nan], |
| 868 | + 'I': [8, 9, np.nan, np.nan], |
| 869 | + 'J': [1, np.nan, np.nan, np.nan], |
| 870 | + 'K': [np.nan, 'a', np.nan, np.nan], |
| 871 | + 'L': Categorical([np.nan, 'a', np.nan, np.nan], |
| 872 | + categories=['a']), |
| 873 | + 'M': to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
| 874 | + 'N': to_timedelta(['nan', '1 days', 'nan', 'nan']), |
| 875 | + 'O': [0, 1, 2, 3]}) |
| 876 | + ]) |
| 877 | + def test_mode_dropna(self, dropna, expected): |
| 878 | + |
| 879 | + df = pd.DataFrame({"A": [12, 12, 19, 11], |
| 880 | + "B": [10, 10, np.nan, 3], |
| 881 | + "C": [1, np.nan, np.nan, np.nan], |
| 882 | + "D": [np.nan, np.nan, 'a', np.nan], |
| 883 | + "E": Categorical([np.nan, np.nan, 'a', np.nan]), |
| 884 | + "F": to_datetime(['NaT', '2000-1-2', 'NaT', 'NaT']), |
| 885 | + "G": to_timedelta(['1 days', 'nan', 'nan', 'nan']), |
| 886 | + "H": [8, 8, 9, 9], |
| 887 | + "I": [9, 9, 8, 8], |
| 888 | + "J": [1, 1, np.nan, np.nan], |
| 889 | + "K": [np.nan, np.nan, 'a', 'a'], |
| 890 | + "L": Categorical(['a', np.nan, 'a', np.nan]), |
| 891 | + "M": to_datetime(['2000-1-2', '2000-1-2', |
906 | 892 | 'NaT', 'NaT']),
|
907 |
| - "J": to_timedelta(['1 days', 'nan', |
908 |
| - '1 days', 'nan'])}) |
909 |
| - |
910 |
| - result = df.loc[:, 'A':'E'].mode(dropna=False) |
911 |
| - expected = pd.DataFrame({'A': [np.nan], |
912 |
| - 'B': np.array([np.nan], dtype=object), |
913 |
| - 'C': Categorical([np.nan], categories=['a']), |
914 |
| - 'D': [pd.NaT], |
915 |
| - 'E': to_timedelta([pd.NaT])}) |
916 |
| - tm.assert_frame_equal(result, expected) |
917 |
| - |
918 |
| - result = df.loc[:, 'F':'J'].mode(dropna=False) |
919 |
| - expected = pd.DataFrame({'F': [1, np.nan], |
920 |
| - 'G': [np.nan, 'a'], |
921 |
| - 'H': Categorical([np.nan, 'a'], |
922 |
| - categories=['a']), |
923 |
| - 'I': to_datetime(['NaT', '2000-1-2']), |
924 |
| - 'J': to_timedelta(['nan', '1 days'])}) |
| 893 | + "N": to_timedelta(['1 days', 'nan', |
| 894 | + '1 days', 'nan']), |
| 895 | + "O": np.arange(4, dtype='int64')}) |
| 896 | + |
| 897 | + result = df[sorted(list(expected.keys()))].mode(dropna=dropna) |
| 898 | + expected = pd.DataFrame(expected) |
925 | 899 | tm.assert_frame_equal(result, expected)
|
926 | 900 |
|
927 | 901 | def test_operators_timedelta64(self):
|
|
0 commit comments