pandas-dev · jreback · Oct 7, 2020 · Sep 28, 2020 · Sep 28, 2020 · Sep 28, 2020
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -100,6 +100,7 @@
     is_dict_like,
     is_dtype_equal,
     is_extension_array_dtype,
+    is_float,
     is_float_dtype,
     is_hashable,
     is_integer,
@@ -4458,7 +4459,34 @@ def _replace_columnwise(
         return res.__finalize__(self)
 
     @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])
-    def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> DataFrame:
+    def shift(
+        self, periods=1, freq=None, axis=0, fill_value=lib.no_default
+    ) -> DataFrame:
+        axis = self._get_axis_number(axis)
+
+        ncols = len(self.columns)
+        if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0:
+            # We will infer fill_value to match the closest column
+
+            if periods > 0:
+                result = self.iloc[:, :-periods]
+                for col in range(min(ncols, abs(periods))):
+                    # TODO(EA2D): doing this in a loop unnecessary with 2D EAs
+                    # Define filler inside loop so we get a copy
+                    filler = self.iloc[:, 0].shift(len(self))
+                    result.insert(0, col, filler, allow_duplicates=True)
+            else:
+                result = self.iloc[:, -periods:]
+                for col in range(min(ncols, abs(periods))):
+                    # Define filler inside loop so we get a copy
+                    filler = self.iloc[:, -1].shift(len(self))
+                    result.insert(
+                        len(result.columns), col, filler, allow_duplicates=True
+                    )
+
+            result.columns = self.columns.copy()
+            return result
+
         return super().shift(
             periods=periods, freq=freq, axis=axis, fill_value=fill_value
         )
@@ -7208,13 +7236,13 @@ def melt(
         Difference with previous column
 
         >>> df.diff(axis=1)
-            a    b     c
-        0 NaN  0.0   0.0
-        1 NaN -1.0   3.0
-        2 NaN -1.0   7.0
-        3 NaN -1.0  13.0
-        4 NaN  0.0  20.0
-        5 NaN  2.0  28.0
+            a  b   c
+        0 NaN  0   0
+        1 NaN -1   3
+        2 NaN -1   7
+        3 NaN -1  13
+        4 NaN  0  20
+        5 NaN  2  28
 
         Difference with 3rd previous row
 
@@ -7248,12 +7276,15 @@ def melt(
         ),
     )
     def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame:
+        if not isinstance(periods, int):
+            if not (is_float(periods) and periods.is_integer()):
+                raise ValueError("periods must be an integer")
+            periods = int(periods)
 
         bm_axis = self._get_block_manager_axis(axis)
-        self._consolidate_inplace()
 
         if bm_axis == 0 and periods != 0:
-            return self.T.diff(periods, axis=0).T
+            return self - self.shift(periods, axis=axis)  # type: ignore[operator]
 
         new_data = self._mgr.diff(n=periods, axis=bm_axis)
         return self._constructor(new_data)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9244,11 +9244,11 @@ def shift(
 
         >>> df.shift(periods=1, axis="columns")
                     Col1  Col2  Col3
-        2020-01-01   NaN  10.0  13.0
-        2020-01-02   NaN  20.0  23.0
-        2020-01-03   NaN  15.0  18.0
-        2020-01-04   NaN  30.0  33.0
-        2020-01-05   NaN  45.0  48.0
+        2020-01-01   NaN    10    13
+        2020-01-02   NaN    20    23
+        2020-01-03   NaN    15    18
+        2020-01-04   NaN    30    33
+        2020-01-05   NaN    45    48
 
         >>> df.shift(periods=3, fill_value=0)
                     Col1  Col2  Col3

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -557,8 +557,12 @@ def interpolate(self, **kwargs) -> "BlockManager":
         return self.apply("interpolate", **kwargs)
 
     def shift(self, periods: int, axis: int, fill_value) -> "BlockManager":
+        if fill_value is lib.no_default:
+            fill_value = None
+
         if axis == 0 and self.ndim == 2 and self.nblocks > 1:
             # GH#35488 we need to watch out for multi-block cases
+            # We only get here with fill_value not-lib.no_default
             ncols = self.shape[0]
             if periods > 0:
                 indexer = [-1] * periods + list(range(ncols - periods))

diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -444,7 +444,7 @@ def wide_to_long(
     8      3      3     2.1     2.9
 
     >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age',
-    ...                     sep='_', suffix='\w+')
+    ...                     sep='_', suffix=r'\w+')
     >>> l
     ... # doctest: +NORMALIZE_WHITESPACE
                       ht

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -291,7 +291,7 @@ def str_count(arr, pat, flags=0):
     Escape ``'$'`` to find the literal dollar sign.
 
     >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
-    >>> s.str.count('\\$')
+    >>> s.str.count(r'\\$')
     0    1
     1    0
     2    1
@@ -410,7 +410,7 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
 
     Returning any digit using regular expression.
 
-    >>> s1.str.contains('\\d', regex=True)
+    >>> s1.str.contains(r'\\d', regex=True)
     0    False
     1    False
     2    False

diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py
@@ -7,6 +7,11 @@
 
 
 class TestDataFrameDiff:
+    def test_diff_requires_integer(self):
+        df = pd.DataFrame(np.random.randn(2, 2))
+        with pytest.raises(ValueError, match="periods must be an integer"):
+            df.diff(1.5)
+
     def test_diff(self, datetime_frame):
         the_diff = datetime_frame.diff(1)
 
@@ -31,9 +36,7 @@ def test_diff(self, datetime_frame):
         df = pd.DataFrame({"y": pd.Series([2]), "z": pd.Series([3])})
         df.insert(0, "x", 1)
         result = df.diff(axis=1)
-        expected = pd.DataFrame(
-            {"x": np.nan, "y": pd.Series(1), "z": pd.Series(1)}
-        ).astype("float64")
+        expected = pd.DataFrame({"x": np.nan, "y": pd.Series(1), "z": pd.Series(1)})
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("tz", [None, "UTC"])
@@ -116,19 +119,13 @@ def test_diff_axis(self):
             df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])
         )
 
-    @pytest.mark.xfail(
-        reason="GH#32995 needs to operate column-wise or do inference",
-        raises=AssertionError,
-    )
     def test_diff_period(self):
         # GH#32995 Don't pass an incorrect axis
-        #  TODO(EA2D): this bug wouldn't have happened with 2D EA
         pi = pd.date_range("2016-01-01", periods=3).to_period("D")
         df = pd.DataFrame({"A": pi})
 
         result = df.diff(1, axis=1)
 
-        # TODO: should we make Block.diff do type inference?  or maybe algos.diff?
         expected = (df - pd.NaT).astype(object)
         tm.assert_frame_equal(result, expected)
 
@@ -141,6 +138,14 @@ def test_diff_axis1_mixed_dtypes(self):
         result = df.diff(axis=1)
         tm.assert_frame_equal(result, expected)
 
+        # GH#21437 mixed-float-dtypes
+        df = pd.DataFrame(
+            {"a": np.arange(3, dtype="float32"), "b": np.arange(3, dtype="float64")}
+        )
+        result = df.diff(axis=1)
+        expected = pd.DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0})
+        tm.assert_frame_equal(result, expected)
+
     def test_diff_axis1_mixed_dtypes_large_periods(self):
         # GH#32995 operate column-wise when we have mixed dtypes and axis=1
         df = pd.DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})