Merge remote-tracking branch 'upstream/master' into docfix-multiindex-set_levels

hweecat · hweecat · commit a76adb3f5f51 · 2020-01-02T07:18:07.000+08:00
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -586,6 +586,20 @@ def test_groupby_multiple_columns(df, op):
     tm.assert_series_equal(result, expected)
 
 
+def test_as_index_select_column():
+    # GH 5764
+    df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"])
+    result = df.groupby("A", as_index=False)["B"].get_group(1)
+    expected = pd.Series([2, 4], name="B")
+    tm.assert_series_equal(result, expected)
+
+    result = df.groupby("A", as_index=False)["B"].apply(lambda x: x.cumsum())
+    expected = pd.Series(
+        [2, 6, 6], name="B", index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)])
+    )
+    tm.assert_series_equal(result, expected)
+
+
 def test_groupby_as_index_agg(df):
     grouped = df.groupby("A", as_index=False)
 
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
@@ -1103,3 +1103,70 @@ def test_transform_lambda_with_datetimetz():
         name="time",
     )
     tm.assert_series_equal(result, expected)
+
+
+def test_transform_fastpath_raises():
+    # GH#29631 case where fastpath defined in groupby.generic _choose_path
+    #  raises, but slow_path does not
+
+    df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]})
+    gb = df.groupby("A")
+
+    def func(grp):
+        # we want a function such that func(frame) fails but func.apply(frame)
+        #  works
+        if grp.ndim == 2:
+            # Ensure that fast_path fails
+            raise NotImplementedError("Don't cross the streams")
+        return grp * 2
+
+    # Check that the fastpath raises, see _transform_general
+    obj = gb._obj_with_exclusions
+    gen = gb.grouper.get_iterator(obj, axis=gb.axis)
+    fast_path, slow_path = gb._define_paths(func)
+    _, group = next(gen)
+
+    with pytest.raises(NotImplementedError, match="Don't cross the streams"):
+        fast_path(group)
+
+    result = gb.transform(func)
+
+    expected = pd.DataFrame([2, -2, 2, 4], columns=["B"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_transform_lambda_indexing():
+    # GH 7883
+    df = pd.DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"],
+            "B": ["one", "one", "two", "three", "two", "six", "five", "three"],
+            "C": range(8),
+            "D": range(8),
+            "E": range(8),
+        }
+    )
+    df = df.set_index(["A", "B"])
+    df = df.sort_index()
+    result = df.groupby(level="A").transform(lambda x: x.iloc[-1])
+    expected = DataFrame(
+        {
+            "C": [3, 3, 7, 7, 4, 4, 4, 4],
+            "D": [3, 3, 7, 7, 4, 4, 4, 4],
+            "E": [3, 3, 7, 7, 4, 4, 4, 4],
+        },
+        index=MultiIndex.from_tuples(
+            [
+                ("bar", "one"),
+                ("bar", "three"),
+                ("flux", "six"),
+                ("flux", "three"),
+                ("foo", "five"),
+                ("foo", "one"),
+                ("foo", "two"),
+                ("foo", "two"),
+            ],
+            names=["A", "B"],
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -736,6 +736,12 @@ def test_get_indexer(self):
         expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected)
 
+    def test_get_indexer_nan(self):
+        # GH 7820
+        result = Index([1, 2, np.nan]).get_indexer([np.nan])
+        expected = np.array([2], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_intersection(self):
         index = self.create_index()
         other = Index([1, 2, 3, 4, 5])
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -966,3 +966,17 @@ def test_loc_getitem_label_list_integer_labels(
     expected = df.iloc[:, expected_columns]
     result = df.loc[["A", "B", "C"], column_key]
     tm.assert_frame_equal(result, expected, check_column_type=check_column_type)
+
+
+def test_loc_setitem_float_intindex():
+    # GH 8720
+    rand_data = np.random.randn(8, 4)
+    result = pd.DataFrame(rand_data)
+    result.loc[:, 0.5] = np.nan
+    expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1)))
+    expected = pd.DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5])
+    tm.assert_frame_equal(result, expected)
+
+    result = pd.DataFrame(rand_data)
+    result.loc[:, 0.5] = np.nan
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -1,3 +1,4 @@
+import io
 import os
 import sys
 
@@ -571,3 +572,17 @@ def test_to_csv_na_rep_long_string(self, df_new_type):
         result = df.to_csv(index=False, na_rep="mynull", encoding="ascii")
 
         assert expected == result
+
+    def test_to_csv_timedelta_precision(self):
+        # GH 6783
+        s = pd.Series([1, 1]).astype("timedelta64[ns]")
+        buf = io.StringIO()
+        s.to_csv(buf)
+        result = buf.getvalue()
+        expected_rows = [
+            ",0",
+            "0,0 days 00:00:00.000000001",
+            "1,0 days 00:00:00.000000001",
+        ]
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert result == expected
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
@@ -2207,3 +2207,13 @@ def test_first_row_bom(all_parsers):
     result = parser.read_csv(StringIO(data), delimiter="\t")
     expected = DataFrame(columns=["Head1", "Head2", "Head3"])
     tm.assert_frame_equal(result, expected)
+
+
+def test_integer_precision(all_parsers):
+    # Gh 7072
+    s = """1,1;0;0;0;1;1;3844;3844;3844;1;1;1;1;1;1;0;0;1;1;0;0,,,4321583677327450765
+5,1;0;0;0;1;1;843;843;843;1;1;1;1;1;1;0;0;1;1;0;0,64.0,;,4321113141090630389"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(s), header=None)[4]
+    expected = Series([4321583677327450765, 4321113141090630389], name=4)
+    tm.assert_series_equal(result, expected)