Merge branch 'master' of https://github.com/pandas-dev/pandas into ci-files-3

jbrockmendel · jbrockmendel · commit 811b9d06acc4 · 2020-08-14T10:13:04.000-07:00
diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst
@@ -16,12 +16,14 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 
 - Fixed regression where :meth:`DataFrame.to_numpy` would raise a ``RuntimeError`` for mixed dtypes when converting to ``str`` (:issue:`35455`)
-- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`).
+- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`)
 - Fixed regression where :func:`pandas.testing.assert_series_equal` would raise an error when non-numeric dtypes were passed with ``check_exact=True`` (:issue:`35446`)
 - Fixed regression in :class:`pandas.core.groupby.RollingGroupby` where column selection was ignored (:issue:`35486`)
 - Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`)
+- Fixed regression in :meth:`DataFrame.diff` with read-only data (:issue:`35559`)
 - Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`)
 - Fixed regression in :meth:`DataFrame.apply` where functions that altered the input in-place only operated on a single row (:issue:`35462`)
+- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`)
 - Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`)
 - Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)
 
@@ -32,7 +34,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 
-- Bug in ``Styler`` whereby `cell_ids` argument had no effect due to other recent changes (:issue:`35588`).
+- Bug in ``Styler`` whereby `cell_ids` argument had no effect due to other recent changes (:issue:`35588`) (:issue:`35663`).
 
 Categorical
 ^^^^^^^^^^^
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -172,6 +172,7 @@ Datetimelike
 ^^^^^^^^^^^^
 - Bug in :attr:`DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`)
 - Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`)
+- Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`)
 -
 
 Timedelta
diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py
@@ -88,12 +88,14 @@ def _valid_locales(locales, normalize):
     valid_locales : list
         A list of valid locales.
     """
-    if normalize:
-        normalizer = lambda x: locale.normalize(x.strip())
-    else:
-        normalizer = lambda x: x.strip()
-
-    return list(filter(can_set_locale, map(normalizer, locales)))
+    return [
+        loc
+        for loc in (
+            locale.normalize(loc.strip()) if normalize else loc.strip()
+            for loc in locales
+        )
+        if can_set_locale(loc)
+    ]
 
 
 def _default_locale_getter():
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -1200,14 +1200,15 @@ ctypedef fused out_t:
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def diff_2d(
-    diff_t[:, :] arr,
-    out_t[:, :] out,
+    ndarray[diff_t, ndim=2] arr,  # TODO(cython 3) update to "const diff_t[:, :] arr"
+    ndarray[out_t, ndim=2] out,
     Py_ssize_t periods,
     int axis,
 ):
     cdef:
         Py_ssize_t i, j, sx, sy, start, stop
-        bint f_contig = arr.is_f_contig()
+        bint f_contig = arr.flags.f_contiguous
+        # bint f_contig = arr.is_f_contig()  # TODO(cython 3)
 
     # Disable for unsupported dtype combinations,
     #  see https://github.com/cython/cython/issues/2646
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -989,13 +989,6 @@ cdef class RelativeDeltaOffset(BaseOffset):
             state["_offset"] = state.pop("offset")
             state["kwds"]["offset"] = state["_offset"]
 
-        if "_offset" in state and not isinstance(state["_offset"], timedelta):
-            # relativedelta, we need to populate using its kwds
-            offset = state["_offset"]
-            odict = offset.__dict__
-            kwds = {key: odict[key] for key in odict if odict[key]}
-            state.update(kwds)
-
         self.n = state.pop("n")
         self.normalize = state.pop("normalize")
         self._cache = state.pop("_cache", {})
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -167,10 +167,9 @@ def _is_type(t):
 
 # partition all AST nodes
 _all_nodes = frozenset(
-    filter(
-        lambda x: isinstance(x, type) and issubclass(x, ast.AST),
-        (getattr(ast, node) for node in dir(ast)),
-    )
+    node
+    for node in (getattr(ast, name) for name in dir(ast))
+    if isinstance(node, type) and issubclass(node, ast.AST)
 )
 
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4816,7 +4816,7 @@ def _maybe_casted_values(index, labels=None):
 
                 # we can have situations where the whole mask is -1,
                 # meaning there is nothing found in labels, so make all nan's
-                if mask.all():
+                if mask.size > 0 and mask.all():
                     dtype = index.dtype
                     fill_value = na_value_for_dtype(dtype)
                     values = construct_1d_arraylike_from_scalar(
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -2012,8 +2012,11 @@ def _sort_labels(uniques: np.ndarray, left, right):
 def _get_join_keys(llab, rlab, shape, sort: bool):
 
     # how many levels can be done without overflow
-    pred = lambda i: not is_int64_overflow_possible(shape[:i])
-    nlev = next(filter(pred, range(len(shape), 0, -1)))
+    nlev = next(
+        lev
+        for lev in range(len(shape), 0, -1)
+        if not is_int64_overflow_possible(shape[:lev])
+    )
 
     # get keys for the first `nlev` levels
     stride = np.prod(shape[1:nlev], dtype="i8")
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -390,16 +390,16 @@ def format_attr(pair):
                     "is_visible": (c not in hidden_columns),
                 }
                 # only add an id if the cell has a style
+                props = []
                 if self.cell_ids or (r, c) in ctx:
                     row_dict["id"] = "_".join(cs[1:])
+                    for x in ctx[r, c]:
+                        # have to handle empty styles like ['']
+                        if x.count(":"):
+                            props.append(tuple(x.split(":")))
+                        else:
+                            props.append(("", ""))
                 row_es.append(row_dict)
-                props = []
-                for x in ctx[r, c]:
-                    # have to handle empty styles like ['']
-                    if x.count(":"):
-                        props.append(tuple(x.split(":")))
-                    else:
-                        props.append(("", ""))
                 cellstyle_map[tuple(props)].append(f"row{r}_col{c}")
             body.append(row_es)
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -765,8 +765,9 @@ def _combine_lines(self, lines) -> str:
         """
         Combines a list of JSON objects into one JSON object.
         """
-        lines = filter(None, map(lambda x: x.strip(), lines))
-        return "[" + ",".join(lines) + "]"
+        return (
+            f'[{",".join((line for line in (line.strip() for line in lines) if line))}]'
+        )
 
     def read(self):
         """
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -2161,9 +2161,7 @@ def read(self, nrows=None):
                 if self.usecols is not None:
                     columns = self._filter_usecols(columns)
 
-                col_dict = dict(
-                    filter(lambda item: item[0] in columns, col_dict.items())
-                )
+                col_dict = {k: v for k, v in col_dict.items() if k in columns}
 
                 return index, columns, col_dict
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -99,22 +99,20 @@ def _ensure_str(name):
 
 def _ensure_term(where, scope_level: int):
     """
-    ensure that the where is a Term or a list of Term
-    this makes sure that we are capturing the scope of variables
-    that are passed
-    create the terms here with a frame_level=2 (we are 2 levels down)
+    Ensure that the where is a Term or a list of Term.
+
+    This makes sure that we are capturing the scope of variables that are
+    passed create the terms here with a frame_level=2 (we are 2 levels down)
     """
     # only consider list/tuple here as an ndarray is automatically a coordinate
     # list
     level = scope_level + 1
     if isinstance(where, (list, tuple)):
-        wlist = []
-        for w in filter(lambda x: x is not None, where):
-            if not maybe_expression(w):
-                wlist.append(w)
-            else:
-                wlist.append(Term(w, scope_level=level))
-        where = wlist
+        where = [
+            Term(term, scope_level=level + 1) if maybe_expression(term) else term
+            for term in where
+            if term is not None
+        ]
     elif maybe_expression(where):
         where = Term(where, scope_level=level)
     return where if where is None or len(where) else None
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
@@ -168,7 +168,7 @@ def setup_ops(self):
     def setup_method(self, method):
         self.setup_ops()
         self.setup_data()
-        self.current_engines = filter(lambda x: x != self.engine, _engines)
+        self.current_engines = (engine for engine in _engines if engine != self.engine)
 
     def teardown_method(self, method):
         del self.lhses, self.rhses, self.scalar_rhses, self.scalar_lhses
@@ -774,11 +774,9 @@ def setup_class(cls):
         cls.parser = "python"
 
     def setup_ops(self):
-        self.cmp_ops = list(
-            filter(lambda x: x not in ("in", "not in"), expr._cmp_ops_syms)
-        )
+        self.cmp_ops = [op for op in expr._cmp_ops_syms if op not in ("in", "not in")]
         self.cmp2_ops = self.cmp_ops[::-1]
-        self.bin_ops = [s for s in expr._bool_ops_syms if s not in ("and", "or")]
+        self.bin_ops = [op for op in expr._bool_ops_syms if op not in ("and", "or")]
         self.special_case_ops = _special_case_arith_ops_syms
         self.arith_ops = _good_arith_ops
         self.unary_ops = "+", "-", "~"
@@ -1150,9 +1148,9 @@ def eval(self, *args, **kwargs):
         return pd.eval(*args, **kwargs)
 
     def test_simple_arith_ops(self):
-        ops = self.arith_ops
+        ops = (op for op in self.arith_ops if op != "//")
 
-        for op in filter(lambda x: x != "//", ops):
+        for op in ops:
             ex = f"1 {op} 1"
             ex2 = f"x {op} 1"
             ex3 = f"1 {op} (x + 1)"
@@ -1637,8 +1635,11 @@ def setup_class(cls):
         super().setup_class()
         cls.engine = "numexpr"
         cls.parser = "python"
-        cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms
-        cls.arith_ops = filter(lambda x: x not in ("in", "not in"), cls.arith_ops)
+        cls.arith_ops = [
+            op
+            for op in expr._arith_ops_syms + expr._cmp_ops_syms
+            if op not in ("in", "not in")
+        ]
 
     def test_check_many_exprs(self):
         a = 1  # noqa
@@ -1726,8 +1727,11 @@ class TestOperationsPythonPython(TestOperationsNumExprPython):
     def setup_class(cls):
         super().setup_class()
         cls.engine = cls.parser = "python"
-        cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms
-        cls.arith_ops = filter(lambda x: x not in ("in", "not in"), cls.arith_ops)
+        cls.arith_ops = [
+            op
+            for op in expr._arith_ops_syms + expr._cmp_ops_syms
+            if op not in ("in", "not in")
+        ]
 
 
 class TestOperationsPythonPandas(TestOperationsNumExprPandas):
diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py
@@ -214,3 +214,12 @@ def test_diff_integer_na(self, axis, expected):
         # Test case for default behaviour of diff
         result = df.diff(axis=axis)
         tm.assert_frame_equal(result, expected)
+
+    def test_diff_readonly(self):
+        # https://github.com/pandas-dev/pandas/issues/35559
+        arr = np.random.randn(5, 2)
+        arr.flags.writeable = False
+        df = pd.DataFrame(arr)
+        result = df.diff()
+        expected = pd.DataFrame(np.array(df)).diff()
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
@@ -318,3 +318,33 @@ def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype):
     result = DataFrame(index=idx)[:0].reset_index().dtypes
     expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype})
     tm.assert_series_equal(result, expected)
+
+
+def test_reset_index_empty_frame_with_datetime64_multiindex():
+    # https://github.com/pandas-dev/pandas/issues/35606
+    idx = MultiIndex(
+        levels=[[pd.Timestamp("2020-07-20 00:00:00")], [3, 4]],
+        codes=[[], []],
+        names=["a", "b"],
+    )
+    df = DataFrame(index=idx, columns=["c", "d"])
+    result = df.reset_index()
+    expected = DataFrame(
+        columns=list("abcd"), index=RangeIndex(start=0, stop=0, step=1)
+    )
+    expected["a"] = expected["a"].astype("datetime64[ns]")
+    expected["b"] = expected["b"].astype("int64")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
+    # https://github.com/pandas-dev/pandas/issues/35657
+    df = DataFrame(dict(c1=[10.0], c2=["a"], c3=pd.to_datetime("2020-01-01")))
+    df = df.head(0).groupby(["c2", "c3"])[["c1"]].sum()
+    result = df.reset_index()
+    expected = DataFrame(
+        columns=["c2", "c3", "c1"], index=RangeIndex(start=0, stop=0, step=1)
+    )
+    expected["c3"] = expected["c3"].astype("datetime64[ns]")
+    expected["c1"] = expected["c1"].astype("float64")
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/data/legacy_pickle/1.1.0/1.1.0_x86_64_darwin_3.8.5.pickle b/pandas/tests/io/data/legacy_pickle/1.1.0/1.1.0_x86_64_darwin_3.8.5.pickle
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
@@ -1684,8 +1684,11 @@ def f(a, b, styler):
 
     def test_no_cell_ids(self):
         # GH 35588
+        # GH 35663
         df = pd.DataFrame(data=[[0]])
-        s = Styler(df, uuid="_", cell_ids=False).render()
+        styler = Styler(df, uuid="_", cell_ids=False)
+        styler.render()
+        s = styler.render()  # render twice to ensure ctx is not updated
         assert s.find('<td  class="data row0 col0" >') != -1
 
 
diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py
@@ -6,10 +6,10 @@
 in ~/pandas
 
 . activate pandas_0.20.3
-cd ~/
+cd ~/pandas/pandas
 
-$ python pandas/pandas/tests/io/generate_legacy_storage_files.py \
-    pandas/pandas/tests/io/data/legacy_pickle/0.20.3/ pickle
+$ python -m tests.io.generate_legacy_storage_files \
+    tests/io/data/legacy_pickle/0.20.3/ pickle
 
 This script generates a storage file for the current arch, system,
 and python version
@@ -328,7 +328,7 @@ def write_legacy_pickles(output_dir):
     pth = f"{platform_name()}.pickle"
 
     fh = open(os.path.join(output_dir, pth), "wb")
-    pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL)
+    pickle.dump(create_pickle_data(), fh, pickle.DEFAULT_PROTOCOL)
     fh.close()
 
     print(f"created pickle file: {pth}")
diff --git a/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle b/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
diff --git a/setup.py b/setup.py

Original file line number	Diff line number	Diff line change
`@@ -172,6 +172,7 @@ Datetimelike`
`172`	`172`	`^^^^^^^^^^^^`
`173`	`173`	- Bug in :attr:`DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`)
`174`	`174`	- Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`)
	`175`	+- Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`)
`175`	`176`	`-`
`176`	`177`
`177`	`178`	`Timedelta`