Merge pull request #246 from pandas-dev/master

sthagen · web-flow · commit f4a530f2cbd4 · 2021-07-30T11:40:45.000+02:00
Sync Fork from Upstream Repo
diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst
@@ -19,6 +19,8 @@ Fixed regressions
 - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
 - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
 - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
+- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
+- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -270,9 +270,9 @@ cdef class _Timestamp(ABCTimestamp):
         if op == Py_EQ:
             return False
         if op == Py_LE or op == Py_LT:
-            return other.year <= self.year
+            return self.year <= other.year
         if op == Py_GE or op == Py_GT:
-            return other.year >= self.year
+            return self.year >= other.year
 
     cdef bint _can_compare(self, datetime other):
         if self.tzinfo is not None:
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -405,9 +405,7 @@ def extract_array(
     For an ndarray-backed Series / Index a PandasArray is returned.
 
     >>> extract_array(pd.Series([1, 2, 3]))
-    <PandasArray>
-    [1, 2, 3]
-    Length: 3, dtype: int64
+    array([1, 2, 3])
 
     To extract all the way down to the ndarray, pass ``extract_numpy=True``.
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4754,7 +4754,8 @@ def drop(
         Parameters
         ----------
         labels : single label or list-like
-            Index or column labels to drop.
+            Index or column labels to drop. A tuple will be used as a single
+            label and not treated as a list-like.
         axis : {0 or 'index', 1 or 'columns'}, default 0
             Whether to drop labels from the index (0 or 'index') or
             columns (1 or 'columns').
@@ -4845,6 +4846,17 @@ def drop(
                 weight  1.0     0.8
                 length  0.3     0.2
 
+        >>> df.drop(index=('falcon', 'weight'))
+                        big     small
+        lama    speed   45.0    30.0
+                weight  200.0   100.0
+                length  1.5     1.0
+        cow     speed   30.0    20.0
+                weight  250.0   150.0
+                length  1.5     0.8
+        falcon  speed   320.0   250.0
+                length  0.3     0.2
+
         >>> df.drop(index='cow', columns='small')
                         big
         lama    speed   45.0
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -5406,6 +5406,9 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]
         self._raise_if_missing(keyarr, indexer, axis_name)
 
         keyarr = self.take(indexer)
+        if isinstance(key, Index):
+            # GH 42790 - Preserve name from an Index
+            keyarr.name = key.name
         if keyarr.dtype.kind in ["m", "M"]:
             # DTI/TDI.take can infer a freq in some cases when we dont want one
             if isinstance(key, list) or (
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -4,6 +4,7 @@
 from typing import (
     Any,
     Callable,
+    Literal,
 )
 
 import numpy as np
@@ -417,12 +418,8 @@ def _bins_to_cuts(
         else:
             bins = unique_bins
 
-    side = "left" if right else "right"
-    # error: No overload variant of "searchsorted" of "ndarray" matches
-    # argument types "Any", "str"
-    ids = ensure_platform_int(
-        bins.searchsorted(x, side=side)  # type: ignore[call-overload]
-    )
+    side: Literal["left", "right"] = "left" if right else "right"
+    ids = ensure_platform_int(bins.searchsorted(x, side=side))
 
     if include_lowest:
         ids[np.asarray(x) == bins[0]] = 1
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -1302,7 +1302,12 @@ def _refine_defaults_read(
     if delimiter and (sep is not lib.no_default):
         raise ValueError("Specified a sep and a delimiter; you can only specify one.")
 
-    if names is not lib.no_default and prefix is not lib.no_default:
+    if (
+        names is not None
+        and names is not lib.no_default
+        and prefix is not None
+        and prefix is not lib.no_default
+    ):
         raise ValueError("Specified named and prefix; you can only specify one.")
 
     kwds["names"] = None if names is lib.no_default else names
diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
@@ -417,8 +417,12 @@ def handle_shared_axes(
             except IndexError:
                 # if gridspec is used, ax.rowNum and ax.colNum may different
                 # from layout shape. in this case, use last_row logic
+                if compat.mpl_ge_3_4_0():
+                    is_last_row = lambda x: x.get_subplotspec().is_last_row()
+                else:
+                    is_last_row = lambda x: x.is_last_row()
                 for ax in axarr:
-                    if ax.is_last_row():
+                    if is_last_row(ax):
                         continue
                     if sharex or _has_externally_shared_axis(ax, "x"):
                         _remove_labels_from_axis(ax.xaxis)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -2432,6 +2432,18 @@ def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period):
         with pytest.raises(KeyError, match="not in index"):
             ser.loc[keys]
 
+    def test_loc_named_index(self):
+        # GH 42790
+        df = DataFrame(
+            [[1, 2], [4, 5], [7, 8]],
+            index=["cobra", "viper", "sidewinder"],
+            columns=["max_speed", "shield"],
+        )
+        expected = df.iloc[:2]
+        expected.index.name = "foo"
+        result = df.loc[Index(["cobra", "viper"], name="foo")]
+        tm.assert_frame_equal(result, expected)
+
 
 @pytest.mark.parametrize(
     "columns, column_key, expected_columns",
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
@@ -764,15 +764,24 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
 
 
 @pytest.mark.parametrize("func", ["read_csv", "read_table"])
-@pytest.mark.parametrize("prefix", [None, "x"])
-@pytest.mark.parametrize("names", [None, ["a"]])
-def test_names_and_prefix_not_lib_no_default(all_parsers, names, prefix, func):
+def test_names_and_prefix_not_None_raises(all_parsers, func):
     # GH#39123
     f = StringIO("a,b\n1,2")
     parser = all_parsers
     msg = "Specified named and prefix; you can only specify one."
     with pytest.raises(ValueError, match=msg):
-        getattr(parser, func)(f, names=names, prefix=prefix)
+        getattr(parser, func)(f, names=["a", "b"], prefix="x")
+
+
+@pytest.mark.parametrize("func", ["read_csv", "read_table"])
+@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)])
+def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func):
+    # GH42387
+    f = StringIO("a,b\n1,2")
+    expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]})
+    parser = all_parsers
+    result = getattr(parser, func)(f, names=names, sep=",", prefix=prefix, header=None)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_dict_keys_as_names(all_parsers):
diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py
@@ -1,5 +1,4 @@
 import datetime as dt
-from datetime import datetime
 from itertools import combinations
 
 import dateutil
diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py
@@ -371,7 +371,7 @@ def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
         )
 
         res = dti1.append(dti3)
-        # tm.assert_index_equal(res, exp)
+        tm.assert_index_equal(res, exp)
 
         dts1 = Series(dti1)
         dts3 = Series(dti3)
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
@@ -79,9 +79,6 @@ def test_concat_copy(self):
                 assert b.values.base is not None
 
     def test_concat_with_group_keys(self):
-        df = DataFrame(np.random.randn(4, 3))
-        df2 = DataFrame(np.random.randn(4, 4))
-
         # axis=0
         df = DataFrame(np.random.randn(3, 4))
         df2 = DataFrame(np.random.randn(4, 4))
diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py
@@ -15,9 +15,9 @@ class TestDataFrameConcat:
     def test_concat_multiple_frames_dtypes(self):
 
         # GH#2759
-        A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
-        B = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
-        results = concat((A, B), axis=1).dtypes
+        df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
+        df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
+        results = concat((df1, df2), axis=1).dtypes
         expected = Series(
             [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2,
             index=["foo", "bar", 0, 1],
diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py
@@ -96,18 +96,18 @@ def test_concat_rename_index(self):
         tm.assert_frame_equal(result, exp)
         assert result.index.names == exp.index.names
 
-    @pytest.mark.parametrize("test_series", [True, False])
-    def test_concat_copy_index(self, test_series, axis):
+    def test_concat_copy_index_series(self, axis):
         # GH 29879
-        if test_series:
-            ser = Series([1, 2])
-            comb = concat([ser, ser], axis=axis, copy=True)
-            assert comb.index is not ser.index
-        else:
-            df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
-            comb = concat([df, df], axis=axis, copy=True)
-            assert comb.index is not df.index
-            assert comb.columns is not df.columns
+        ser = Series([1, 2])
+        comb = concat([ser, ser], axis=axis, copy=True)
+        assert comb.index is not ser.index
+
+    def test_concat_copy_index_frame(self, axis):
+        # GH 29879
+        df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
+        comb = concat([df, df], axis=axis, copy=True)
+        assert comb.index is not df.index
+        assert comb.columns is not df.columns
 
     def test_default_index(self):
         # is_series and ignore_index
diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
@@ -32,8 +32,9 @@ def test_simple():
     tm.assert_numpy_array_equal(result, expected, check_dtype=False)
 
 
-def test_bins():
-    data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1])
+@pytest.mark.parametrize("func", [list, np.array])
+def test_bins(func):
+    data = func([0.2, 1.4, 2.5, 6.2, 9.7, 2.1])
     result, bins = cut(data, 3, retbins=True)
 
     intervals = IntervalIndex.from_breaks(bins.round(3))
@@ -68,18 +69,6 @@ def test_no_right():
     tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095]))
 
 
-def test_array_like():
-    data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1]
-    result, bins = cut(data, 3, retbins=True)
-
-    intervals = IntervalIndex.from_breaks(bins.round(3))
-    intervals = intervals.take([0, 0, 0, 1, 2, 0])
-    expected = Categorical(intervals, ordered=True)
-
-    tm.assert_categorical_equal(result, expected)
-    tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7]))
-
-
 def test_bins_from_interval_index():
     c = cut(range(5), 3)
     expected = c
diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py
@@ -266,6 +266,19 @@ def test_timestamp_compare_oob_dt64(self):
         assert Timestamp.max < other + us
         # Note: numpy gets the reversed comparison wrong
 
+        # GH-42794
+        other = datetime(9999, 9, 9)
+        assert Timestamp.min < other
+        assert other > Timestamp.min
+        assert Timestamp.max < other
+        assert other > Timestamp.max
+
+        other = datetime(1, 1, 1)
+        assert Timestamp.max > other
+        assert other < Timestamp.max
+        assert Timestamp.min > other
+        assert other < Timestamp.min
+
     def test_compare_zerodim_array(self):
         # GH#26916
         ts = Timestamp.now()
diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py
@@ -1,3 +1,5 @@
+from datetime import datetime
+
 import numpy as np
 import pytest
 
@@ -128,6 +130,15 @@ def test_clip_with_datetimes(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_clip_with_timestamps_and_oob_datetimes(self):
+        # GH-42794
+        ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)])
+
+        result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
+        expected = Series([Timestamp.min, Timestamp.max], dtype="object")
+
+        tm.assert_series_equal(result, expected)
+
     def test_clip_pos_args_deprecation(self):
         # https://github.com/pandas-dev/pandas/issues/41485
         ser = Series([1, 2, 3])

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,8 @@ Fixed regressions`
`19`	`19`	- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
`20`	`20`	- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
`21`	`21`	- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
	`22`	+- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
	`23`	+- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
`22`	`24`	`-`
`23`	`25`
`24`	`26`	`.. ---------------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`import datetime as dt`
`2`		`-from datetime import datetime`
`3`	`2`	`from itertools import combinations`
`4`	`3`
`5`	`4`	`import dateutil`
Original file line number	Diff line number	Diff line change
`@@ -371,7 +371,7 @@ def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):`
`371`	`371`	`)`
`372`	`372`
`373`	`373`	`res = dti1.append(dti3)`
`374`		`- # tm.assert_index_equal(res, exp)`
	`374`	`+ tm.assert_index_equal(res, exp)`
`375`	`375`
`376`	`376`	`dts1 = Series(dti1)`
`377`	`377`	`dts3 = Series(dti3)`