Modified test cases and added detailed explanation in v1.1.0.rst

Santhosh18 · Santhosh18 · commit bea26f788c07 · 2020-06-30T12:30:50.000+05:30
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -661,6 +661,38 @@ apply and applymap on ``DataFrame`` evaluates first row/column only once
 
     df.apply(func, axis=1)
 
+.. _whatsnew_110.api_breaking.explode_infer_dtype:
+
+Infer dtypes in explode method for Dataframe and Series
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Using :meth:`DataFrame.explode` and :meth:`Series.explode` would always return an object for the column being exploded. Now the dtype of the column would be inferred and returned accordingly. (:issue:`34923`)
+
+.. ipython:: python
+
+    s = pd.Series([1,2,3])
+    df = pd.DataFrame({'A': [s, s, s, s], 'B': 1})
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [3]: df.explode("A").dtypes
+    Out[3]:
+    A    object
+    B     int64
+    dtype: object
+
+*New behavior*:
+
+.. code-block:: ipython
+
+    In [3]: df.explode("A").dtypes
+    Out[3]:
+    A    int64
+    B    int64
+    dtype: object
+
 .. _whatsnew_110.api.other:
 
 Other API changes
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3838,7 +3838,7 @@ def explode(self, ignore_index: bool = False) -> "Series":
         else:
             index = self.index.repeat(counts)
 
-        result = self._constructor(values, index=index, name=self.name)
+        result = self._constructor(values, index=index, name=self.name).infer_objects()
 
         return result
 
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
@@ -25,7 +25,7 @@ def test_basic():
     expected = pd.DataFrame(
         {
             "A": pd.Series(
-                [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
+                [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=np.float64
             ),
             "B": 1,
         }
@@ -55,7 +55,7 @@ def test_multi_index_rows():
                         ("b", 2),
                     ]
                 ),
-                dtype=object,
+                dtype=np.float64,
             ),
             "B": 1,
         }
@@ -74,7 +74,7 @@ def test_multi_index_columns():
             ("A", 1): pd.Series(
                 [0, 1, 2, np.nan, np.nan, 3, 4],
                 index=pd.Index([0, 0, 0, 1, 2, 3, 3]),
-                dtype=object,
+                dtype=np.float64,
             ),
             ("A", 2): 1,
         }
@@ -93,7 +93,7 @@ def test_usecase():
     expected = pd.DataFrame(
         {
             "A": [11, 11, 11, 11, 11, 22, 22, 22],
-            "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object),
+            "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=np.int64),
             "C": [10, 10, 10, 10, 10, 20, 20, 20],
         },
         columns=list("ABC"),
@@ -160,7 +160,22 @@ def test_duplicate_index(input_dict, input_index, expected_dict, expected_index)
     # GH 28005
     df = pd.DataFrame(input_dict, index=input_index)
     result = df.explode("col1")
-    expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
+    expected = pd.DataFrame(expected_dict, index=expected_index, dtype=np.int64)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_inferred_dtype():
+    # GH 34923
+    s = pd.Series([1, None, 3])
+    df = pd.DataFrame({'A': [s, s], "B": 1})
+    result = df.explode("A")
+    expected = pd.DataFrame(
+        {
+            "A": np.array([1, np.nan, 3, 1, np.nan, 3], dtype=np.float64),
+            "B": np.array([1, 1, 1, 1, 1, 1], dtype=np.int64)
+        },
+        index=[0, 0, 0, 1, 1, 1]
+    )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py
@@ -7,9 +7,9 @@
 
 def test_basic():
     s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
-    result = s. explode()
+    result = s.explode()
     expected = pd.Series(
-        [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
+        [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=np.float64, name="foo"
     )
     tm.assert_series_equal(result, expected)
 
@@ -54,7 +54,7 @@ def test_multi_index():
         names=["foo", "bar"],
     )
     expected = pd.Series(
-        [0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
+        [0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=np.float64, name="foo"
     )
     tm.assert_series_equal(result, expected)
 
@@ -116,14 +116,14 @@ def test_duplicate_index():
     # GH 28005
     s = pd.Series([[1, 2], [3, 4]], index=[0, 0])
     result = s.explode()
-    expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
+    expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=np.int64)
     tm.assert_series_equal(result, expected)
 
 
 def test_ignore_index():
     # GH 34932
     s = pd.Series([[1, 2], [3, 4]])
     result = s.explode(ignore_index=True)
-    expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
+    expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=np.int64)
     tm.assert_series_equal(result, expected)