From 64b341670eaf64c5a5fe25ae7bb2dd0ddaca67aa Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 3 Jan 2020 15:16:37 +0800 Subject: [PATCH 1/3] Add tests for solved issues --- pandas/tests/frame/test_constructors.py | 10 +++++- pandas/tests/frame/test_missing.py | 13 +++++++ pandas/tests/groupby/test_apply.py | 38 ++++++++++++++++++++ pandas/tests/groupby/test_categorical.py | 12 +++++++ pandas/tests/groupby/test_groupby.py | 7 ++++ pandas/tests/indexing/multiindex/test_loc.py | 31 ++++++++++++++++ pandas/tests/indexing/test_loc.py | 19 ++++++++++ pandas/tests/reshape/test_concat.py | 9 +++++ pandas/tests/reshape/test_pivot.py | 25 +++++++++++++ 9 files changed, 163 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f3cc11cb7027d..febb28273987b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1,5 +1,5 @@ from collections import OrderedDict, abc -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta import functools import itertools @@ -2425,6 +2425,14 @@ def test_constructor_with_extension_array(self, extension_arr): result = DataFrame(extension_arr) tm.assert_frame_equal(result, expected) + def test_datetime_date_tuple_columns_from_dict(self): + # GH 10863 + v = date.today() + tup = v, v + result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup]) + expected = DataFrame([0, 1, 2], columns=pd.Index(pd.Series([tup]))) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index f9a2061aa1ff4..594bbe02bd749 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -970,3 +970,16 @@ def test_interp_ignore_all_good(self): # all good result = df[["B", "D"]].interpolate(downcast=None) tm.assert_frame_equal(result, df[["B", "D"]]) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_interp_time_inplace_axis(self, axis): + # GH 9687 + periods = 5 + idx = pd.date_range(start="2014-01-01", periods=periods) + data = np.random.rand(periods, periods) + data[data < 0.5] = np.nan + expected = pd.DataFrame(index=idx, columns=idx, data=data) + + result = expected.interpolate(axis=0, method="time") + expected.interpolate(axis=0, method="time", inplace=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0e62569fffeb6..d1c9eb96bb0dd 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -703,3 +703,41 @@ def test_apply_datetime_issue(group_column_dtlike): ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] ) tm.assert_frame_equal(result, expected) + + +def test_apply_series_return_dataframe_groups(): + # GH 10078 + tdf = DataFrame( + { + "day": { + 0: pd.Timestamp("2015-02-24 00:00:00"), + 1: pd.Timestamp("2015-02-24 00:00:00"), + 2: pd.Timestamp("2015-02-24 00:00:00"), + 3: pd.Timestamp("2015-02-24 00:00:00"), + 4: pd.Timestamp("2015-02-24 00:00:00"), + }, + "userAgent": { + 0: "some UA string", + 1: "some UA string", + 2: "some UA string", + 3: "another UA string", + 4: "some UA string", + }, + "userId": { + 0: "17661101", + 1: "17661101", + 2: "17661101", + 3: "17661101", + 4: "17661101", + }, + } + ) + + def most_common_values(df): + return Series({c: s.value_counts().index[0] for c, s in df.iteritems()}) + + result = tdf.groupby("day").apply(most_common_values)["userId"] + expected = pd.Series( + ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId" + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 89ffcd9ee313e..debb8eb619228 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1330,3 +1330,15 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(func, zero_o # If we expect unobserved values to be zero, we also expect the dtype to be int if zero_or_nan == 0: assert np.issubdtype(result.dtype, np.integer) + + +def test_series_groupby_categorical_aggregation_getitem(): + # GH 8870 + d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]} + df = pd.DataFrame(d) + cat = pd.cut(df["foo"], np.linspace(0, 20, 5)) + df["range"] = cat + groups = df.groupby(["range", "baz"], as_index=True, sort=True) + result = groups["foo"].agg("mean") + expected = groups.agg("mean")["foo"] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8f88f68c69f2b..a220856310c3f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2011,3 +2011,10 @@ def test_groupby_crash_on_nunique(axis): expected = expected.T tm.assert_frame_equal(result, expected) + + +def test_groupby_list_level(): + # GH 9790 + expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3)) + result = expected.groupby(level=[0]).mean() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index da7d89a15125b..bb4ca5395871e 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -437,3 +437,34 @@ def test_loc_nan_multiindex(): columns=Index(["d1", "d2", "d3", "d4"], dtype="object"), ) tm.assert_frame_equal(result, expected) + + +def test_loc_period_string_indexing(): + # GH 9892 + a = pd.period_range("2013Q1", "2013Q4", freq="Q") + i = (1111, 2222, 3333) + idx = pd.MultiIndex.from_product((a, i), names=("Periode", "CVR")) + df = pd.DataFrame( + index=idx, + columns=( + "OMS", + "OMK", + "RES", + "DRIFT_IND", + "OEVRIG_IND", + "FIN_IND", + "VARE_UD", + "LOEN_UD", + "FIN_UD", + ), + ) + result = df.loc[("2013Q1", 1111), "OMS"] + expected = pd.Series( + [np.nan], + dtype=object, + name="OMS", + index=pd.MultiIndex.from_tuples( + [(pd.Period("2013Q1"), 1111)], names=["Periode", "CVR"] + ), + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8b3620e8cd843..fe34a9986fa26 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -966,3 +966,22 @@ def test_loc_getitem_label_list_integer_labels( expected = df.iloc[:, expected_columns] result = df.loc[["A", "B", "C"], column_key] tm.assert_frame_equal(result, expected, check_column_type=check_column_type) + + +def test_loc_axis_1_slice(): + # GH 10586 + cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]] + df = pd.DataFrame( + np.ones((10, 8)), + index=tuple("ABCDEFGHIJ"), + columns=pd.MultiIndex.from_tuples(cols), + ) + result = df.loc(axis=1)[(2014, 9):(2015, 8)] + expected = pd.DataFrame( + np.ones((10, 4)), + index=tuple("ABCDEFGHIJ"), + columns=pd.MultiIndex.from_tuples( + [(2014, 9), (2014, 10), (2015, 7), (2015, 8)] + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 05193c00f0649..9fa69a4e081d1 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2730,3 +2730,12 @@ def test_concat_datetimeindex_freq(): expected = pd.DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) expected.index._data.freq = None tm.assert_frame_equal(result, expected) + + +def test_concat_empty_df_object_dtype(): + # GH 9149 + df_1 = pd.DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) + df_2 = pd.DataFrame(columns=df_1.columns) + result = pd.concat([df_1, df_2], axis=0) + expected = df_1.astype(object) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index bd1d3d2d5bb63..fa542f1d10280 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1966,6 +1966,31 @@ def test_pivot_table_aggfunc_scalar_dropna(self, dropna): tm.assert_frame_equal(result, expected) + def test_pivot_table_empty_aggfunc(self): + # GH 9186 + df = pd.DataFrame( + { + "A": [2, 2, 3, 3, 2], + "id": [5, 6, 7, 8, 9], + "C": ["p", "q", "q", "p", "q"], + "D": [None, None, None, None, None], + } + ) + result = df.pivot_table(index="A", columns="D", values="id", aggfunc=np.size) + expected = pd.DataFrame() + tm.assert_frame_equal(result, expected) + + def test_pivot_table_no_column(self): + # GH 10326 + def agg(l): + return np.mean(l) + + foo = pd.DataFrame( + {"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]} + ) + with pytest.raises(KeyError, match="notpresent"): + foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) + class TestCrosstab: def setup_method(self, method): From a531d9e2277ebce4ae6c38b79b9571643b0c84e2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 3 Jan 2020 15:32:38 +0800 Subject: [PATCH 2/3] add another test --- pandas/tests/io/parser/test_index_col.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 66e00f4eb6c1c..cce7b827ad0e5 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -5,6 +5,7 @@ """ from io import StringIO +import numpy as np import pytest from pandas import DataFrame, Index, MultiIndex @@ -172,3 +173,14 @@ def test_multi_index_naming_not_all_at_beginning(all_parsers): ), ) tm.assert_frame_equal(result, expected) + + +def test_no_multi_index_level_names_empty(all_parsers): + # GH 10984 + parser = all_parsers + midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) + expected = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + with tm.ensure_clean() as path: + expected.to_csv(path) + result = parser.read_csv(path, index_col=[0, 1, 2]) + tm.assert_frame_equal(result, expected) From 3ee7386173c5de2a7e468b3b910309250b22b9e2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 3 Jan 2020 14:06:30 -0800 Subject: [PATCH 3/3] Clarify test name --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3a88d2e11e708..743fc50c87e96 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1979,7 +1979,7 @@ def test_pivot_table_empty_aggfunc(self): expected = pd.DataFrame() tm.assert_frame_equal(result, expected) - def test_pivot_table_no_column(self): + def test_pivot_table_no_column_raises(self): # GH 10326 def agg(l): return np.mean(l)