pandas-dev · jreback · Mar 16, 2018 · Feb 22, 2018 · Feb 27, 2018 · Feb 27, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -779,6 +779,7 @@ Categorical
 - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
 - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`)
 - Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`)
+- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`)
 
 Datetimelike
 ^^^^^^^^^^^^

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -216,6 +216,70 @@ def isna(self):
         """
         raise AbstractMethodError(self)
 
+    def fillna(self, value=None, method=None, limit=None):
+        """ Fill NA/NaN values using the specified method.
+
+        Parameters
+        ----------
+        value : scalar, array-like
+            If a scalar value is passed it is used to fill all missing values.
+            Alternatively, an array-like 'value' can be given. It's expected
+            that the array-like have the same length as 'self'.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
+        limit : int, default None
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled.
+
+        Returns
+        -------
+        filled : ExtensionArray with NA/NaN filled
+        """
+        from pandas.api.types import is_scalar
+        from pandas.util._validators import validate_fillna_kwargs
+        from pandas.core.missing import pad_1d, backfill_1d
+        from pandas.core.dtypes.common import _ensure_platform_int
+        from pandas._libs.tslib import iNaT
+
+        value, method = validate_fillna_kwargs(value, method)
+
+        mask = self.isna()
+
+        if not is_scalar(value):
+            if len(value) != len(self):
+                raise ValueError("Length of 'value' does not match. Got ({}) "
+                                 " expected {}".format(len(value), len(self)))
+            value = value[mask]
+
+        if mask.any():
+            if method is not None:
+                # ffill / bfill
+                # The basic idea is to create an array of integer positions.
+                # Internally, we use iNaT and the datetime filling routines
+                # to avoid floating-point NaN. Once filled, we take on `self`
+                # to get the actual values.
+                func = pad_1d if method == 'pad' else backfill_1d
+                idx = np.arange(len(self), dtype='int64')
+                idx[mask] = iNaT
+                idx = _ensure_platform_int(func(idx, mask=mask,
+                                                limit=limit,
+                                                dtype='datetime64[ns]'))
+                idx[idx == iNaT] = -1  # missing value marker for take.
+                new_values = self.take(idx)
+            else:
+                # fill with value
+                new_values = self.copy()
+                new_values[mask] = value
+        else:
+            new_values = self.copy()
+        return new_values
+
     # ------------------------------------------------------------------------
     # Indexing methods
     # ------------------------------------------------------------------------
@@ -253,6 +317,7 @@ def take(self, indexer, allow_fill=True, fill_value=None):
         .. code-block:: python
 
            def take(self, indexer, allow_fill=True, fill_value=None):
+               indexer = np.asarray(indexer)
                mask = indexer == -1
                result = self.data.take(indexer)
                result[mask] = np.nan  # NA for this type

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -13,7 +13,8 @@
 from pandas.core.dtypes.missing import isna, notna
 from pandas.core.dtypes.cast import (
     maybe_infer_to_datetimelike,
-    coerce_indexer_dtype)
+    coerce_indexer_dtype,
+    tolist)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.common import (
     _ensure_int64,
@@ -475,9 +476,7 @@ def tolist(self):
         (for str, int, float) or a pandas scalar
         (for Timestamp/Timedelta/Interval/Period)
         """
-        if is_datetimelike(self.categories):
-            return [com._maybe_box_datetimelike(x) for x in self]
-        return np.array(self).tolist()
+        return tolist(self)
 
     @property
     def base(self):
@@ -1587,16 +1586,16 @@ def fillna(self, value=None, method=None, limit=None):
 
         Parameters
         ----------
-        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
-            Method to use for filling holes in reindexed Series
-            pad / ffill: propagate last valid observation forward to next valid
-            backfill / bfill: use NEXT valid observation to fill gap
         value : scalar, dict, Series
             If a scalar value is passed it is used to fill all missing values.
             Alternatively, a Series or dict can be used to fill in different
             values for each index. The value should not be a list. The
             value(s) passed should either be in the categories or should be
             NaN.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
         limit : int, default None
             (Not implemented yet for Categorical!)
             If method is specified, this is the maximum number of consecutive
@@ -1712,7 +1711,7 @@ def __len__(self):
 
     def __iter__(self):
         """Returns an Iterator over the values of this Categorical."""
-        return iter(self.get_values())
+        return iter(self.get_values().tolist())
 
     def _tidy_repr(self, max_vals=10, footer=True):
         """ a short repr displaying only max_vals and an optional (but default

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -8,11 +8,11 @@
 
 from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
+from pandas.core.dtypes.cast import tolist
 from pandas.core.dtypes.common import (
     is_object_dtype,
     is_list_like,
     is_scalar,
-    is_datetimelike,
     is_extension_type,
     is_extension_array_dtype)
 
@@ -826,11 +826,7 @@ def tolist(self):
         --------
         numpy.ndarray.tolist
         """
-
-        if is_datetimelike(self):
-            return [com._maybe_box_datetimelike(x) for x in self._values]
-        else:
-            return self._values.tolist()
+        return tolist(self._values)
 
     def __iter__(self):
         """

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -11,7 +11,8 @@
 from .common import (_ensure_object, is_bool, is_integer, is_float,
                      is_complex, is_datetimetz, is_categorical_dtype,
                      is_datetimelike,
-                     is_extension_type, is_object_dtype,
+                     is_extension_type, is_extension_array_dtype,
+                     is_object_dtype,
                      is_datetime64tz_dtype, is_datetime64_dtype,
                      is_datetime64_ns_dtype,
                      is_timedelta64_dtype, is_timedelta64_ns_dtype,
@@ -1222,3 +1223,32 @@ def construct_1d_object_array_from_listlike(values):
     result = np.empty(len(values), dtype='object')
     result[:] = values
     return result
+
+
+def tolist(values):
+    """Convert an array-like to a list of scalar types.
+
+    Parameters
+    ----------
+    values : ndarray or ExtensionArray
+
+    Returns
+    -------
+    list
+        Each element of the list is a Python scalar (stor, int float)
+        or a pandas scalar (Timestamp / Timedelta / Interval / Period) or
+        the scalar type for 3rd party Extension Arrays.
+
+    See Also
+    --------
+    Series.tolist
+    numpy.ndarray.tolist
+    """
+    from pandas.core.common import _maybe_box_datetimelike
+
+    if is_datetimelike(values):
+        return [_maybe_box_datetimelike(x) for x in values]
+    elif is_extension_array_dtype(values):
+        return list(values)
+    else:
+        return values.tolist()
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1963,6 +1963,23 @@ def concat_same_type(self, to_concat, placement=None):
         return self.make_block_same_class(values, ndim=self.ndim,
                                           placement=placement)
 
+    def fillna(self, value, limit=None, inplace=False, downcast=None,
+               mgr=None):
+        values = self.values if inplace else self.values.copy()
+        values = values.fillna(value=value, limit=limit)
+        return [self.make_block_same_class(values=values,
+                                           placement=self.mgr_locs,
+                                           ndim=self.ndim)]
+
+    def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
+                    fill_value=None, **kwargs):
+
+        values = self.values if inplace else self.values.copy()
+        return self.make_block_same_class(
+            values=values.fillna(value=fill_value, method=method,
+                                 limit=limit),
+            placement=self.mgr_locs)
+
 
 class NumericBlock(Block):
     __slots__ = ()
@@ -2522,27 +2539,6 @@ def _try_coerce_result(self, result):
 
         return result
 
-    def fillna(self, value, limit=None, inplace=False, downcast=None,
-               mgr=None):
-        # we may need to upcast our fill to match our dtype
-        if limit is not None:
-            raise NotImplementedError("specifying a limit for 'fillna' has "
-                                      "not been implemented yet")
-
-        values = self.values if inplace else self.values.copy()
-        values = self._try_coerce_result(values.fillna(value=value,
-                                                       limit=limit))
-        return [self.make_block(values=values)]
-
-    def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
-                    fill_value=None, **kwargs):
-
-        values = self.values if inplace else self.values.copy()
-        return self.make_block_same_class(
-            values=values.fillna(fill_value=fill_value, method=method,
-                                 limit=limit),
-            placement=self.mgr_locs)
-
     def shift(self, periods, axis=0, mgr=None):
         return self.make_block_same_class(values=self.values.shift(periods),
                                           placement=self.mgr_locs)

diff --git a/pandas/tests/categorical/test_dtypes.py b/pandas/tests/categorical/test_dtypes.py
@@ -161,3 +161,8 @@ def test_astype_category(self, dtype_ordered, cat_ordered):
             result = cat.astype('category')
             expected = cat
             tm.assert_categorical_equal(result, expected)
+
+    def test_iter_python_types(self):
+        # GH-19909
+        cat = Categorical([1, 2])
+        assert isinstance(list(cat)[0], int)
diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py
@@ -23,7 +23,8 @@
     maybe_convert_scalar,
     find_common_type,
     construct_1d_object_array_from_listlike,
-    construct_1d_arraylike_from_scalar)
+    construct_1d_arraylike_from_scalar,
+    tolist)
 from pandas.core.dtypes.dtypes import (
     CategoricalDtype,
     DatetimeTZDtype,
@@ -324,6 +325,18 @@ def test_maybe_convert_objects_copy(self):
         out = maybe_convert_objects(values, copy=True)
         assert values is not out
 
+    @pytest.mark.parametrize('values, expected', [
+        (pd.date_range('2017', periods=1), [pd.Timestamp('2017')]),
+        (pd.period_range('2017', periods=1, freq='D'),
+         [pd.Period('2017', freq='D')]),
+        (pd.timedelta_range(0, periods=1), [pd.Timedelta('0')]),
+        (pd.interval_range(0, periods=1), [pd.Interval(0, 1)]),
+        (np.array([0, 1]), [0, 1]),
+    ])
+    def test_tolist(self, values, expected):
+        result = tolist(values)
+        assert result == expected
+
 
 class TestCommonTypes(object):
 

diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py
@@ -11,3 +11,8 @@ def test_astype_object_series(self, all_data):
         ser = pd.Series({"A": all_data})
         result = ser.astype(object)
         assert isinstance(result._data.blocks[0], ObjectBlock)
+
+    def test_tolist(self, data):
+        result = pd.Series(data).tolist()
+        expected = list(data)
+        assert result == expected
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas as pd
 import pandas.util.testing as tm
@@ -45,3 +46,71 @@ def test_dropna_frame(self, data_missing):
         result = df.dropna()
         expected = df.iloc[:0]
         self.assert_frame_equal(result, expected)
+
+    def test_fillna_limit_pad(self, data_missing):
+        arr = data_missing.take([1, 0, 0, 0, 1])
+        result = pd.Series(arr).fillna(method='ffill', limit=2)
+        expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
+        self.assert_series_equal(result, expected)
+
+    def test_fillna_limit_backfill(self, data_missing):
+        arr = data_missing.take([1, 0, 0, 0, 1])
+        result = pd.Series(arr).fillna(method='backfill', limit=2)
+        expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
+        self.assert_series_equal(result, expected)
+
+    def test_fillna_series(self, data_missing):
+        fill_value = data_missing[1]
+        ser = pd.Series(data_missing)
+
+        result = ser.fillna(fill_value)
+        expected = pd.Series(type(data_missing)([fill_value, fill_value]))
+        self.assert_series_equal(result, expected)
+
+        # Fill with a series
+        result = ser.fillna(expected)
+        self.assert_series_equal(result, expected)
+
+        # Fill with a series not affecting the missing values
+        result = ser.fillna(ser)
+        self.assert_series_equal(result, ser)
+
+    @pytest.mark.parametrize('method', ['ffill', 'bfill'])
+    def test_fillna_series_method(self, data_missing, method):
+        fill_value = data_missing[1]
+
+        if method == 'ffill':
+            data_missing = type(data_missing)(data_missing[::-1])
+
+        result = pd.Series(data_missing).fillna(method=method)
+        expected = pd.Series(type(data_missing)([fill_value, fill_value]))
+
+        self.assert_series_equal(result, expected)
+
+    def test_fillna_frame(self, data_missing):
+        fill_value = data_missing[1]
+
+        result = pd.DataFrame({
+            "A": data_missing,
+            "B": [1, 2]
+        }).fillna(fill_value)
+
+        expected = pd.DataFrame({
+            "A": type(data_missing)([fill_value, fill_value]),
+            "B": [1, 2],
+        })
+
+        self.assert_frame_equal(result, expected)
+
+    def test_fillna_fill_other(self, data):
+        result = pd.DataFrame({
+            "A": data,
+            "B": [np.nan] * len(data)
+        }).fillna({"B": 0.0})
+
+        expected = pd.DataFrame({
+            "A": data,
+            "B": [0.0] * len(result),
+        })
+
+        self.assert_frame_equal(result, expected)
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
@@ -69,7 +69,14 @@ def test_getitem_scalar(self):
 
 
 class TestMissing(base.BaseMissingTests):
-    pass
+
+    @pytest.mark.skip(reason="Not implemented")
+    def test_fillna_limit_pad(self):
+        pass
+
+    @pytest.mark.skip(reason="Not implemented")
+    def test_fillna_limit_backfill(self):
+        pass
 
 
 class TestMethods(base.BaseMethodsTests):