Modified test cases and added detailed explanation in v1.1.0.rst

Santhosh18 · Santhosh18 · commit 4d70a71a13cb · 2020-07-17T16:53:19.000+05:30
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -715,6 +715,76 @@ apply and applymap on ``DataFrame`` evaluates first row/column only once
 
     df.apply(func, axis=1)
 
+
+.. _whatsnew_110.api_breaking.explode_infer_dtype:
+
+Infer dtypes in explode method for Dataframe and Series
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Using :meth:`DataFrame.explode` and :meth:`Series.explode` would always return an object for the column being exploded. Now the dtype of the column would be inferred and returned accordingly. (:issue:`34923`)
+
+.. ipython:: python
+
+    s = pd.Series([1,2,3])
+    df = pd.DataFrame({'A': [s, s, s, s], 'B': 1})
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [3]: df.explode("A").dtypes
+    Out[3]:
+    A    object
+    B     int64
+    dtype: object
+
+*New behavior*:
+
+.. ipython:: ipython
+
+    In [3]: df.explode("A").dtypes
+    Out[3]:
+    A    int64
+    B    int64
+    dtype: object
+
+.. _whatsnew_110.api.other:
+
+Other API changes
+^^^^^^^^^^^^^^^^^
+
+- :meth:`Series.describe` will now show distribution percentiles for ``datetime`` dtypes, statistics ``first`` and ``last``
+  will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`)
+- Added :meth:`DataFrame.value_counts` (:issue:`5377`)
+- :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`)
+- ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`)
+- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew``,  ``cov``, ``corr`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`)
+- Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations.
+- Added a :func:`pandas.api.indexers.NonFixedVariableWindowIndexer` class to support ``rolling`` operations with non-fixed offsets (:issue:`34994`)
+- Added :class:`pandas.errors.InvalidIndexError` (:issue:`34570`).
+- :meth:`DataFrame.swaplevels` now raises a  ``TypeError`` if the axis is not a :class:`MultiIndex`.
+  Previously an ``AttributeError`` was raised (:issue:`31126`)
+- :meth:`DataFrame.xs` now raises a  ``TypeError`` if a ``level`` keyword is supplied and the axis is not a :class:`MultiIndex`.
+  Previously an ``AttributeError`` was raised (:issue:`33610`)
+- :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std` and :meth:`~DataFrameGroupby.var`)
+  now raise a  ``TypeError`` if a not-accepted keyword argument is passed into it.
+  Previously a ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`)
+- :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`)
+- Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`)
+- Passing an invalid ``fill_value`` to :meth:`Categorical.take` raises a ``ValueError`` instead of ``TypeError`` (:issue:`33660`)
+- Combining a ``Categorical`` with integer categories and which contains missing values
+  with a float dtype column in operations such as :func:`concat` or :meth:`~DataFrame.append`
+  will now result in a float column instead of an object dtyped column (:issue:`33607`)
+- :meth:`Series.to_timestamp` now raises a ``TypeError`` if the axis is not a :class:`PeriodIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`)
+- :meth:`Series.to_period` now raises a ``TypeError`` if the axis is not a :class:`DatetimeIndex`. Previously an ``AttributeError`` was raised (:issue:`33327`)
+- :func: `pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string.
+- :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in keyword ``chunksize`` now raises a ``TypeError``
+  (previously raised a ``NotImplementedError``), while passing in keyword ``encoding`` now raises a ``TypeError`` (:issue:`34464`)
+- :func: `merge` now checks ``suffixes`` parameter type to be ``tuple`` and raises ``TypeError``, whereas before a ``list`` or ``set`` were accepted and that the ``set`` could produce unexpected results (:issue:`33740`)
+- :class:`Period` no longer accepts tuples for the ``freq`` argument (:issue:`34658`)
+- :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` now raises ValueError if ``limit_direction`` is 'forward' or 'both' and ``method`` is 'backfill' or 'bfill' or ``limit_direction`` is 'backward' or 'both' and ``method`` is 'pad' or 'ffill' (:issue:`34746`)
+
+
 Increased minimum versions for dependencies
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3843,7 +3843,7 @@ def explode(self, ignore_index: bool = False) -> "Series":
         else:
             index = self.index.repeat(counts)
 
-        result = self._constructor(values, index=index, name=self.name)
+        result = self._constructor(values, index=index, name=self.name).infer_objects()
 
         return result
 
diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
@@ -25,7 +25,7 @@ def test_basic():
     expected = pd.DataFrame(
         {
             "A": pd.Series(
-                [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
+                [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=np.float64
             ),
             "B": 1,
         }
@@ -55,7 +55,7 @@ def test_multi_index_rows():
                         ("b", 2),
                     ]
                 ),
-                dtype=object,
+                dtype=np.float64,
             ),
             "B": 1,
         }
@@ -74,7 +74,7 @@ def test_multi_index_columns():
             ("A", 1): pd.Series(
                 [0, 1, 2, np.nan, np.nan, 3, 4],
                 index=pd.Index([0, 0, 0, 1, 2, 3, 3]),
-                dtype=object,
+                dtype=np.float64,
             ),
             ("A", 2): 1,
         }
@@ -93,7 +93,7 @@ def test_usecase():
     expected = pd.DataFrame(
         {
             "A": [11, 11, 11, 11, 11, 22, 22, 22],
-            "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object),
+            "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=np.int64),
             "C": [10, 10, 10, 10, 10, 20, 20, 20],
         },
         columns=list("ABC"),
@@ -160,7 +160,22 @@ def test_duplicate_index(input_dict, input_index, expected_dict, expected_index)
     # GH 28005
     df = pd.DataFrame(input_dict, index=input_index)
     result = df.explode("col1")
-    expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
+    expected = pd.DataFrame(expected_dict, index=expected_index, dtype=np.int64)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_inferred_dtype():
+    # GH 34923
+    s = pd.Series([1, None, 3])
+    df = pd.DataFrame({'A': [s, s], "B": 1})
+    result = df.explode("A")
+    expected = pd.DataFrame(
+        {
+            "A": np.array([1, np.nan, 3, 1, np.nan, 3], dtype=np.float64),
+            "B": np.array([1, 1, 1, 1, 1, 1], dtype=np.int64)
+        },
+        index=[0, 0, 0, 1, 1, 1]
+    )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py
@@ -7,9 +7,9 @@
 
 def test_basic():
     s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
-    result = s. explode()
+    result = s.explode()
     expected = pd.Series(
-        [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
+        [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=np.float64, name="foo"
     )
     tm.assert_series_equal(result, expected)
 
@@ -54,7 +54,7 @@ def test_multi_index():
         names=["foo", "bar"],
     )
     expected = pd.Series(
-        [0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
+        [0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=np.float64, name="foo"
     )
     tm.assert_series_equal(result, expected)
 
@@ -116,14 +116,14 @@ def test_duplicate_index():
     # GH 28005
     s = pd.Series([[1, 2], [3, 4]], index=[0, 0])
     result = s.explode()
-    expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
+    expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=np.int64)
     tm.assert_series_equal(result, expected)
 
 
 def test_ignore_index():
     # GH 34932
     s = pd.Series([[1, 2], [3, 4]])
     result = s.explode(ignore_index=True)
-    expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
+    expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=np.int64)
     tm.assert_series_equal(result, expected)