diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a4b943f995a33..54fbbb12c2917 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -556,6 +556,7 @@ Datetimelike API Changes - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) .. _whatsnew_0230.api.other: @@ -575,7 +576,6 @@ Other API Changes - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) - :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) - :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) -- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) - Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) @@ -589,6 +589,7 @@ Other API Changes - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7354115f8295e..493b2e5bd899b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,7 +40,7 @@ Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.io.formats.terminal import get_terminal_size -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core.config import get_option from .base import ExtensionArray @@ -1610,6 +1610,9 @@ def fillna(self, value=None, method=None, limit=None): ------- filled : Categorical with NA/NaN filled """ + value, method = validate_fillna_kwargs( + value, method, validate_scalar_dict_value=False + ) if value is None: value = np.nan diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 297450417e3cf..8034cf89cf8b7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -54,7 +54,7 @@ import pandas.core.nanops as nanops from pandas.util._decorators import (Appender, Substitution, deprecate_kwarg) -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core import config # goal is to be able to define the docs close to function, while still being @@ -4697,10 +4697,8 @@ def infer_objects(self): def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): inplace = validate_bool_kwarg(inplace, 'inplace') + value, method = validate_fillna_kwargs(value, method) - if isinstance(value, (list, tuple)): - raise TypeError('"value" parameter must be a scalar or dict, but ' - 'you passed a "{0}"'.format(type(value).__name__)) self._consolidate_inplace() # set the default here, so functions examining the signaure @@ -4711,8 +4709,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, method = missing.clean_fill_method(method) from pandas import DataFrame if value is None: - if method is None: - raise ValueError('must specify a fill method or value') + if self._is_mixed_type and axis == 1: if inplace: raise NotImplementedError() @@ -4746,9 +4743,6 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, coerce=True, downcast=downcast) else: - if method is not None: - raise ValueError('cannot specify both a fill method and value') - if len(self._get_axis(axis)) == 0: return self diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py index 79758dee5cfda..fca5573547071 100644 --- a/pandas/tests/categorical/test_missing.py +++ b/pandas/tests/categorical/test_missing.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest import pandas.util.testing as tm from pandas import (Categorical, Index, isna) @@ -53,3 +54,18 @@ def test_set_item_nan(self): exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) tm.assert_categorical_equal(cat, exp) + + @pytest.mark.parametrize('fillna_kwargs, msg', [ + (dict(value=1, method='ffill'), + "Cannot specify both 'value' and 'method'."), + (dict(), + "Must specify a fill 'value' or 'method'."), + (dict(method='bad'), + "Invalid fill method. Expecting .* bad"), + ]) + def test_fillna_raises(self, fillna_kwargs, msg): + # https://github.com/pandas-dev/pandas/issues/19682 + cat = Categorical([1, 2, 3]) + + with tm.assert_raises_regex(ValueError, msg): + cat.fillna(**fillna_kwargs) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index b30ffc7416f92..a96563051e7de 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -320,3 +320,39 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): msg = "Cannot specify all of '{}', 'index', 'columns'." raise TypeError(msg.format(arg_name)) return out + + +def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): + """Validate the keyword arguments to 'fillna'. + + This checks that exactly one of 'value' and 'method' is specified. + If 'method' is specified, this validates that it's a valid method. + + Parameters + ---------- + value, method : object + The 'value' and 'method' keyword arguments for 'fillna'. + validate_scalar_dict_value : bool, default True + Whether to validate that 'value' is a scalar or dict. Specifically, + validate that it is not a list or tuple. + + Returns + ------- + value, method : object + """ + from pandas.core.missing import clean_fill_method + + if value is None and method is None: + raise ValueError("Must specify a fill 'value' or 'method'.") + elif value is None and method is not None: + method = clean_fill_method(method) + + elif value is not None and method is None: + if validate_scalar_dict_value and isinstance(value, (list, tuple)): + raise TypeError('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) + + elif value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + + return value, method