TST: Regression testing for fixed issues (#30646)

mroeschke · web-flow · commit 0ce0bb54d452 · 2020-01-03T15:23:42.000-08:00
* Add tests for solved issues

* add another test

* Clarify test name
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1,5 +1,5 @@
 from collections import OrderedDict, abc
-from datetime import datetime, timedelta
+from datetime import date, datetime, timedelta
 import functools
 import itertools
 
@@ -2425,6 +2425,14 @@ def test_constructor_with_extension_array(self, extension_arr):
         result = DataFrame(extension_arr)
         tm.assert_frame_equal(result, expected)
 
+    def test_datetime_date_tuple_columns_from_dict(self):
+        # GH 10863
+        v = date.today()
+        tup = v, v
+        result = DataFrame({tup: Series(range(3), index=range(3))}, columns=[tup])
+        expected = DataFrame([0, 1, 2], columns=pd.Index(pd.Series([tup])))
+        tm.assert_frame_equal(result, expected)
+
 
 class TestDataFrameConstructorWithDatetimeTZ:
     def test_from_dict(self):
diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
@@ -970,3 +970,16 @@ def test_interp_ignore_all_good(self):
         # all good
         result = df[["B", "D"]].interpolate(downcast=None)
         tm.assert_frame_equal(result, df[["B", "D"]])
+
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_interp_time_inplace_axis(self, axis):
+        # GH 9687
+        periods = 5
+        idx = pd.date_range(start="2014-01-01", periods=periods)
+        data = np.random.rand(periods, periods)
+        data[data < 0.5] = np.nan
+        expected = pd.DataFrame(index=idx, columns=idx, data=data)
+
+        result = expected.interpolate(axis=0, method="time")
+        expected.interpolate(axis=0, method="time", inplace=True)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -714,3 +714,41 @@ def test_apply_datetime_issue(group_column_dtlike):
         ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42]
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_apply_series_return_dataframe_groups():
+    # GH 10078
+    tdf = DataFrame(
+        {
+            "day": {
+                0: pd.Timestamp("2015-02-24 00:00:00"),
+                1: pd.Timestamp("2015-02-24 00:00:00"),
+                2: pd.Timestamp("2015-02-24 00:00:00"),
+                3: pd.Timestamp("2015-02-24 00:00:00"),
+                4: pd.Timestamp("2015-02-24 00:00:00"),
+            },
+            "userAgent": {
+                0: "some UA string",
+                1: "some UA string",
+                2: "some UA string",
+                3: "another UA string",
+                4: "some UA string",
+            },
+            "userId": {
+                0: "17661101",
+                1: "17661101",
+                2: "17661101",
+                3: "17661101",
+                4: "17661101",
+            },
+        }
+    )
+
+    def most_common_values(df):
+        return Series({c: s.value_counts().index[0] for c, s in df.iteritems()})
+
+    result = tdf.groupby("day").apply(most_common_values)["userId"]
+    expected = pd.Series(
+        ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId"
+    )
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -1330,3 +1330,15 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(func, zero_o
     # If we expect unobserved values to be zero, we also expect the dtype to be int
     if zero_or_nan == 0:
         assert np.issubdtype(result.dtype, np.integer)
+
+
+def test_series_groupby_categorical_aggregation_getitem():
+    # GH 8870
+    d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]}
+    df = pd.DataFrame(d)
+    cat = pd.cut(df["foo"], np.linspace(0, 20, 5))
+    df["range"] = cat
+    groups = df.groupby(["range", "baz"], as_index=True, sort=True)
+    result = groups["foo"].agg("mean")
+    expected = groups.agg("mean")["foo"]
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2023,3 +2023,10 @@ def test_groupby_crash_on_nunique(axis):
         expected = expected.T
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_list_level():
+    # GH 9790
+    expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3))
+    result = expected.groupby(level=[0]).mean()
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
@@ -437,3 +437,34 @@ def test_loc_nan_multiindex():
         columns=Index(["d1", "d2", "d3", "d4"], dtype="object"),
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_loc_period_string_indexing():
+    # GH 9892
+    a = pd.period_range("2013Q1", "2013Q4", freq="Q")
+    i = (1111, 2222, 3333)
+    idx = pd.MultiIndex.from_product((a, i), names=("Periode", "CVR"))
+    df = pd.DataFrame(
+        index=idx,
+        columns=(
+            "OMS",
+            "OMK",
+            "RES",
+            "DRIFT_IND",
+            "OEVRIG_IND",
+            "FIN_IND",
+            "VARE_UD",
+            "LOEN_UD",
+            "FIN_UD",
+        ),
+    )
+    result = df.loc[("2013Q1", 1111), "OMS"]
+    expected = pd.Series(
+        [np.nan],
+        dtype=object,
+        name="OMS",
+        index=pd.MultiIndex.from_tuples(
+            [(pd.Period("2013Q1"), 1111)], names=["Periode", "CVR"]
+        ),
+    )
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -983,3 +983,22 @@ def test_loc_setitem_float_intindex():
     result = pd.DataFrame(rand_data)
     result.loc[:, 0.5] = np.nan
     tm.assert_frame_equal(result, expected)
+
+
+def test_loc_axis_1_slice():
+    # GH 10586
+    cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]]
+    df = pd.DataFrame(
+        np.ones((10, 8)),
+        index=tuple("ABCDEFGHIJ"),
+        columns=pd.MultiIndex.from_tuples(cols),
+    )
+    result = df.loc(axis=1)[(2014, 9):(2015, 8)]
+    expected = pd.DataFrame(
+        np.ones((10, 4)),
+        index=tuple("ABCDEFGHIJ"),
+        columns=pd.MultiIndex.from_tuples(
+            [(2014, 9), (2014, 10), (2015, 7), (2015, 8)]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
@@ -5,6 +5,7 @@
 """
 from io import StringIO
 
+import numpy as np
 import pytest
 
 from pandas import DataFrame, Index, MultiIndex
@@ -172,3 +173,14 @@ def test_multi_index_naming_not_all_at_beginning(all_parsers):
         ),
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_no_multi_index_level_names_empty(all_parsers):
+    # GH 10984
+    parser = all_parsers
+    midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)])
+    expected = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"])
+    with tm.ensure_clean() as path:
+        expected.to_csv(path)
+        result = parser.read_csv(path, index_col=[0, 1, 2])
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -2730,3 +2730,12 @@ def test_concat_datetimeindex_freq():
     expected = pd.DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50]))
     expected.index._data.freq = None
     tm.assert_frame_equal(result, expected)
+
+
+def test_concat_empty_df_object_dtype():
+    # GH 9149
+    df_1 = pd.DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
+    df_2 = pd.DataFrame(columns=df_1.columns)
+    result = pd.concat([df_1, df_2], axis=0)
+    expected = df_1.astype(object)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -1965,6 +1965,31 @@ def test_pivot_table_aggfunc_scalar_dropna(self, dropna):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_pivot_table_empty_aggfunc(self):
+        # GH 9186
+        df = pd.DataFrame(
+            {
+                "A": [2, 2, 3, 3, 2],
+                "id": [5, 6, 7, 8, 9],
+                "C": ["p", "q", "q", "p", "q"],
+                "D": [None, None, None, None, None],
+            }
+        )
+        result = df.pivot_table(index="A", columns="D", values="id", aggfunc=np.size)
+        expected = pd.DataFrame()
+        tm.assert_frame_equal(result, expected)
+
+    def test_pivot_table_no_column_raises(self):
+        # GH 10326
+        def agg(l):
+            return np.mean(l)
+
+        foo = pd.DataFrame(
+            {"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]}
+        )
+        with pytest.raises(KeyError, match="notpresent"):
+            foo.pivot_table("notpresent", "X", "Y", aggfunc=agg)
+
 
 class TestCrosstab:
     def setup_method(self, method):