pandas-dev · jeandersonbc · Feb 28, 2020 · Feb 28, 2020 · Mar 1, 2020 · Mar 2, 2020
diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst
@@ -74,6 +74,7 @@ Bug fixes
 **I/O**
 
 - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)
+- Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`)
 - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
 
 **Experimental dtypes**

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -66,6 +66,7 @@ Other enhancements
 - :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
 - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
 - `OptionError` is now exposed in `pandas.errors` (:issue:`27553`)
+- :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -198,6 +199,7 @@ Categorical
 
 - Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`)
 - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`)
+- Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`)
 -
 
 Datetimelike
@@ -213,6 +215,7 @@ Timedelta
 ^^^^^^^^^
 
 - Bug in constructing a :class:`Timedelta` with a high precision integer that would round the :class:`Timedelta` components (:issue:`31354`)
+- Bug in dividing ``np.nan`` or ``None`` by :class:`Timedelta`` incorrectly returning ``NaT`` (:issue:`31869`)
 -
 
 Timezones
@@ -295,8 +298,10 @@ I/O
 - Bug in :meth:`DataFrame.to_parquet` overwriting pyarrow's default for
   ``coerce_timestamps``; following pyarrow's default allows writing nanosecond
   timestamps with ``version="2.0"`` (:issue:`31652`).
+- Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`)
 - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`)
 
+
 Plotting
 ^^^^^^^^
 
@@ -322,6 +327,7 @@ Reshaping
 - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
 - :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`)
 - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
+- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`)
 
 
 Sparse

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -100,11 +100,9 @@ def values_from_object(obj: object):
     """
     func: object
 
-    if getattr(obj, '_typ', '') == 'dataframe':
-        return obj.values
-
     func = getattr(obj, '_internal_get_values', None)
     if func is not None:
+        # Includes DataFrame, for which we get frame.values
         obj = func()
 
     return obj

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -10,10 +10,13 @@ cnp.import_array()
 
 cimport pandas._libs.util as util
 
-from pandas._libs.tslibs.np_datetime cimport (
-    get_timedelta64_value, get_datetime64_value)
+
+from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value
 from pandas._libs.tslibs.nattype cimport (
-    checknull_with_nat, c_NaT as NaT, is_null_datetimelike)
+    c_NaT as NaT,
+    checknull_with_nat,
+    is_null_datetimelike,
+)
 from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
 
 from pandas.compat import is_platform_32bit
@@ -44,7 +47,7 @@ cpdef bint checknull(object val):
 
     Returns
     -------
-    result : bool
+    bool
 
     Notes
     -----
@@ -223,7 +226,7 @@ def isnaobj2d_old(arr: ndarray) -> ndarray:
 
     Returns
     -------
-    result : ndarray (dtype=np.bool_)
+    ndarray (dtype=np.bool_)
 
     Notes
     -----
@@ -248,17 +251,11 @@ def isnaobj2d_old(arr: ndarray) -> ndarray:
 
 
 def isposinf_scalar(val: object) -> bool:
-    if util.is_float_object(val) and val == INF:
-        return True
-    else:
-        return False
+    return util.is_float_object(val) and val == INF
 
 
 def isneginf_scalar(val: object) -> bool:
-    if util.is_float_object(val) and val == NEGINF:
-        return True
-    else:
-        return False
+    return util.is_float_object(val) and val == NEGINF
 
 
 cdef inline bint is_null_datetime64(v):
@@ -364,6 +361,9 @@ class NAType(C_NAType):
         exponent = 31 if is_32bit else 61
         return 2 ** exponent - 1
 
+    def __reduce__(self):
+        return "NA"
+
     # Binary arithmetic and comparison ops -> propagate
 
     __add__ = _create_binary_propagating_op("__add__")
@@ -423,7 +423,6 @@ class NAType(C_NAType):
                 return NA
         elif isinstance(other, np.ndarray):
             return np.where(other == 1, other, NA)
-
         return NotImplemented
 
     # Logical ops using Kleene logic
@@ -433,8 +432,7 @@ class NAType(C_NAType):
             return False
         elif other is True or other is C_NA:
             return NA
-        else:
-            return NotImplemented
+        return NotImplemented
 
     __rand__ = __and__
 
@@ -443,8 +441,7 @@ class NAType(C_NAType):
             return True
         elif other is False or other is C_NA:
             return NA
-        else:
-            return NotImplemented
+        return NotImplemented
 
     __ror__ = __or__
 

diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1407,15 +1407,23 @@ class Timedelta(_Timedelta):
             # convert to Timedelta below
             pass
 
+        elif util.is_nan(other):
+            # i.e. np.nan or np.float64("NaN")
+            raise TypeError("Cannot divide float by Timedelta")
+
         elif hasattr(other, 'dtype'):
+            if other.dtype.kind == "O":
+                # GH#31869
+                return np.array([x / self for x in other])
             return other / self.to_timedelta64()
 
         elif not _validate_ops_compat(other):
             return NotImplemented
 
         other = Timedelta(other)
         if other is NaT:
-            return NaT
+            # In this context we treat NaT as timedelta-like
+            return np.nan
         return float(other.value) / self.value
 
     def __floordiv__(self, other):

diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -742,9 +742,9 @@ def repr_class(x):
             raise_assert_detail(obj, msg, repr_class(left), repr_class(right))
 
 
-def assert_attr_equal(attr, left, right, obj="Attributes"):
+def assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
     """
-    checks attributes are equal. Both objects must have attribute.
+    Check attributes are equal. Both objects must have attribute.
 
     Parameters
     ----------

diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -64,7 +64,7 @@
 Label = Optional[Hashable]
 Level = Union[Label, int]
 Ordered = Optional[bool]
-JSONSerializable = Union[PythonScalar, List, Dict]
+JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
 Axes = Collection
 
 # For functions like rename that convert one label to another

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1047,6 +1047,16 @@ def series_with_simple_index(indices):
     for dtype in _narrow_dtypes
 }
 
+
+@pytest.fixture(params=_narrow_series.keys())
+def narrow_series(request):
+    """
+    Fixture for Series with low precision data types
+    """
+    # copy to avoid mutation, e.g. setting .name
+    return _narrow_series[request.param].copy()
+
+
 _index_or_series_objs = {**indices_dict, **_series, **_narrow_series}
 
 
@@ -1057,3 +1067,17 @@ def index_or_series_obj(request):
     copy to avoid mutation, e.g. setting .name
     """
     return _index_or_series_objs[request.param].copy(deep=True)
+
+
+@pytest.fixture
+def multiindex_year_month_day_dataframe_random_data():
+    """
+    DataFrame with 3 level MultiIndex (year, month, day) covering
+    first 100 business days from 2000-01-01 with random data
+    """
+    tdf = tm.makeTimeDataFrame(100)
+    ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
+    # use Int64Index, to make sure things work
+    ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True)
+    ymd.index.set_names(["year", "month", "day"], inplace=True)
+    return ymd
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -103,7 +103,10 @@ def func(self, other):
             mask = (self._codes == -1) | (other_codes == -1)
             if mask.any():
                 # In other series, the leads to False, so do that here too
-                ret[mask] = False
+                if opname == "__ne__":
+                    ret[(self._codes == -1) & (other_codes == -1)] = True
+                else:
+                    ret[mask] = False
             return ret
 
         if is_scalar(other):

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -42,6 +42,7 @@
 from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
 from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
 import pandas.core.common as com
+from pandas.core.construction import array, extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.ops.invalid import invalid_comparison, make_invalid_op
@@ -623,7 +624,7 @@ def astype(self, dtype, copy=True):
         dtype = pandas_dtype(dtype)
 
         if is_object_dtype(dtype):
-            return self._box_values(self.asi8)
+            return self._box_values(self.asi8.ravel()).reshape(self.shape)
         elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
             return self._format_native_types()
         elif is_integer_dtype(dtype):
@@ -1256,19 +1257,13 @@ def _addsub_object_array(self, other: np.ndarray, op):
             PerformanceWarning,
         )
 
-        # For EA self.astype('O') returns a numpy array, not an Index
-        left = self.astype("O")
+        # Caller is responsible for broadcasting if necessary
+        assert self.shape == other.shape, (self.shape, other.shape)
 
-        res_values = op(left, np.array(other))
-        kwargs = {}
-        if not is_period_dtype(self):
-            kwargs["freq"] = "infer"
-        try:
-            res = type(self)._from_sequence(res_values, **kwargs)
-        except ValueError:
-            # e.g. we've passed a Timestamp to TimedeltaArray
-            res = res_values
-        return res
+        res_values = op(self.astype("O"), np.array(other))
+        result = array(res_values.ravel())
+        result = extract_array(result, extract_numpy=True).reshape(self.shape)
+        return result
 
     def _time_shift(self, periods, freq=None):
         """

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -39,6 +39,7 @@
 from pandas.core.algorithms import checked_add_with_arr
 from pandas.core.arrays import datetimelike as dtl
 import pandas.core.common as com
+from pandas.core.construction import extract_array
 
 from pandas.tseries.frequencies import to_offset
 from pandas.tseries.offsets import Tick
@@ -141,8 +142,7 @@ def dtype(self):
     # Constructors
 
     def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False):
-        if isinstance(values, (ABCSeries, ABCIndexClass)):
-            values = values._values
+        values = extract_array(values)
 
         inferred_freq = getattr(values, "_freq", None)
 
@@ -258,6 +258,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
             index = _generate_regular_range(start, end, periods, freq)
         else:
             index = np.linspace(start.value, end.value, periods).astype("i8")
+            if len(index) >= 2:
+                # Infer a frequency
+                td = Timedelta(index[1] - index[0])
+                freq = to_offset(td)
 
         if not left_closed:
             index = index[1:]
@@ -614,6 +618,10 @@ def __floordiv__(self, other):
             if self.freq is not None:
                 # Note: freq gets division, not floor-division
                 freq = self.freq / other
+                if freq.nanos == 0 and self.freq.nanos != 0:
+                    # e.g. if self.freq is Nano(1) then dividing by 2
+                    #  rounds down to zero
+                    freq = None
             return type(self)(result.view("m8[ns]"), freq=freq)
 
         if not hasattr(other, "dtype"):

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1035,7 +1035,8 @@ def argmin(self, axis=None, skipna=True, *args, **kwargs):
 
         See Also
         --------
-        numpy.ndarray.argmin
+        numpy.ndarray.argmin : Return indices of the minimum values along
+            the given axis.
         """
         nv.validate_minmax_axis(axis)
         nv.validate_argmax_with_skipna(skipna, args, kwargs)
@@ -1055,7 +1056,8 @@ def tolist(self):
 
         See Also
         --------
-        numpy.ndarray.tolist
+        numpy.ndarray.tolist : Return the array as an a.ndim-levels deep
+            nested list of Python scalars.
         """
         if not isinstance(self._values, np.ndarray):
             # check for ndarray instead of dtype to catch DTA/TDA
@@ -1402,7 +1404,8 @@ def memory_usage(self, deep=False):
 
         See Also
         --------
-        numpy.ndarray.nbytes
+        numpy.ndarray.nbytes : Total bytes consumed by the elements of the
+            array.
 
         Notes
         -----
@@ -1473,8 +1476,8 @@ def factorize(self, sort=False, na_sentinel=-1):
 
         See Also
         --------
-        sort_values
-        numpy.searchsorted
+        sort_values : Sort by the values along either axis.
+        numpy.searchsorted : Similar method from NumPy.
 
         Notes
         -----

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -122,7 +122,7 @@ def is_bool_indexer(key: Any) -> bool:
         is_array_like(key) and is_extension_array_dtype(key.dtype)
     ):
         if key.dtype == np.object_:
-            key = np.asarray(values_from_object(key))
+            key = np.asarray(key)
 
             if not lib.is_bool_array(key):
                 na_msg = "Cannot mask with non-boolean array containing NA / NaN values"

diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
@@ -266,8 +266,10 @@ def eval(
 
     See Also
     --------
-    DataFrame.query
-    DataFrame.eval
+    DataFrame.query : Evaluates a boolean expression to query the columns
+            of a frame.
+    DataFrame.eval : Evaluate a string describing operations on
+            DataFrame columns.
 
     Notes
     -----