diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 94d62ae988f0c..116bdd6e1d98f 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -19,6 +19,8 @@ Fixed regressions - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) +- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) +- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e4e9df5176459..fa86b7d9899af 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -270,9 +270,9 @@ cdef class _Timestamp(ABCTimestamp): if op == Py_EQ: return False if op == Py_LE or op == Py_LT: - return other.year <= self.year + return self.year <= other.year if op == Py_GE or op == Py_GT: - return other.year >= self.year + return self.year >= other.year cdef bint _can_compare(self, datetime other): if self.tzinfo is not None: diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 387df6c6a6b70..89591f27e9092 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -405,9 +405,7 @@ def extract_array( For an ndarray-backed Series / Index a PandasArray is returned. >>> extract_array(pd.Series([1, 2, 3])) - - [1, 2, 3] - Length: 3, dtype: int64 + array([1, 2, 3]) To extract all the way down to the ndarray, pass ``extract_numpy=True``. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8b82021375a28..27aa2ed939c1a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4754,7 +4754,8 @@ def drop( Parameters ---------- labels : single label or list-like - Index or column labels to drop. + Index or column labels to drop. A tuple will be used as a single + label and not treated as a list-like. axis : {0 or 'index', 1 or 'columns'}, default 0 Whether to drop labels from the index (0 or 'index') or columns (1 or 'columns'). @@ -4845,6 +4846,17 @@ def drop( weight 1.0 0.8 length 0.3 0.2 + >>> df.drop(index=('falcon', 'weight')) + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + length 1.5 1.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + length 1.5 0.8 + falcon speed 320.0 250.0 + length 0.3 0.2 + >>> df.drop(index='cow', columns='small') big lama speed 45.0 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5f12a918c0520..54271f0f9b492 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5406,6 +5406,9 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray] self._raise_if_missing(keyarr, indexer, axis_name) keyarr = self.take(indexer) + if isinstance(key, Index): + # GH 42790 - Preserve name from an Index + keyarr.name = key.name if keyarr.dtype.kind in ["m", "M"]: # DTI/TDI.take can infer a freq in some cases when we dont want one if isinstance(key, list) or ( diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 656d38a50f77f..03dc124807f09 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -4,6 +4,7 @@ from typing import ( Any, Callable, + Literal, ) import numpy as np @@ -417,12 +418,8 @@ def _bins_to_cuts( else: bins = unique_bins - side = "left" if right else "right" - # error: No overload variant of "searchsorted" of "ndarray" matches - # argument types "Any", "str" - ids = ensure_platform_int( - bins.searchsorted(x, side=side) # type: ignore[call-overload] - ) + side: Literal["left", "right"] = "left" if right else "right" + ids = ensure_platform_int(bins.searchsorted(x, side=side)) if include_lowest: ids[np.asarray(x) == bins[0]] = 1 diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 06bdbe3054a15..c639a4a9d494e 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1302,7 +1302,12 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - if names is not lib.no_default and prefix is not lib.no_default: + if ( + names is not None + and names is not lib.no_default + and prefix is not None + and prefix is not lib.no_default + ): raise ValueError("Specified named and prefix; you can only specify one.") kwds["names"] = None if names is lib.no_default else names diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 9d509d02c2e4f..6d27cc8686b65 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -417,8 +417,12 @@ def handle_shared_axes( except IndexError: # if gridspec is used, ax.rowNum and ax.colNum may different # from layout shape. in this case, use last_row logic + if compat.mpl_ge_3_4_0(): + is_last_row = lambda x: x.get_subplotspec().is_last_row() + else: + is_last_row = lambda x: x.is_last_row() for ax in axarr: - if ax.is_last_row(): + if is_last_row(ax): continue if sharex or _has_externally_shared_axis(ax, "x"): _remove_labels_from_axis(ax.xaxis) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 9d52e8ab25306..6692a06c79d45 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2432,6 +2432,18 @@ def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period): with pytest.raises(KeyError, match="not in index"): ser.loc[keys] + def test_loc_named_index(self): + # GH 42790 + df = DataFrame( + [[1, 2], [4, 5], [7, 8]], + index=["cobra", "viper", "sidewinder"], + columns=["max_speed", "shield"], + ) + expected = df.iloc[:2] + expected.index.name = "foo" + result = df.loc[Index(["cobra", "viper"], name="foo")] + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "columns, column_key, expected_columns", diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index a1c76e2740dbe..b2e528aa5f8d5 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -764,15 +764,24 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): @pytest.mark.parametrize("func", ["read_csv", "read_table"]) -@pytest.mark.parametrize("prefix", [None, "x"]) -@pytest.mark.parametrize("names", [None, ["a"]]) -def test_names_and_prefix_not_lib_no_default(all_parsers, names, prefix, func): +def test_names_and_prefix_not_None_raises(all_parsers, func): # GH#39123 f = StringIO("a,b\n1,2") parser = all_parsers msg = "Specified named and prefix; you can only specify one." with pytest.raises(ValueError, match=msg): - getattr(parser, func)(f, names=names, prefix=prefix) + getattr(parser, func)(f, names=["a", "b"], prefix="x") + + +@pytest.mark.parametrize("func", ["read_csv", "read_table"]) +@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)]) +def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func): + # GH42387 + f = StringIO("a,b\n1,2") + expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]}) + parser = all_parsers + result = getattr(parser, func)(f, names=names, sep=",", prefix=prefix, header=None) + tm.assert_frame_equal(result, expected) def test_dict_keys_as_names(all_parsers): diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 43fe72b0776ed..ea766089f880d 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -1,5 +1,4 @@ import datetime as dt -from datetime import datetime from itertools import combinations import dateutil diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index 9bd098a9e4e72..b8b254e786194 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -371,7 +371,7 @@ def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): ) res = dti1.append(dti3) - # tm.assert_index_equal(res, exp) + tm.assert_index_equal(res, exp) dts1 = Series(dti1) dts3 = Series(dti3) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 17a7089f0ac85..a4c9e333f4d9c 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -79,9 +79,6 @@ def test_concat_copy(self): assert b.values.base is not None def test_concat_with_group_keys(self): - df = DataFrame(np.random.randn(4, 3)) - df2 = DataFrame(np.random.randn(4, 4)) - # axis=0 df = DataFrame(np.random.randn(3, 4)) df2 = DataFrame(np.random.randn(4, 4)) diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 460546f4b478a..dde8c0c19165f 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -15,9 +15,9 @@ class TestDataFrameConcat: def test_concat_multiple_frames_dtypes(self): # GH#2759 - A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) - B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) - results = concat((A, B), axis=1).dtypes + df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) + df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32) + results = concat((df1, df2), axis=1).dtypes expected = Series( [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2, index=["foo", "bar", 0, 1], diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index bd845f73c7c69..f8ad9d1084c53 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -96,18 +96,18 @@ def test_concat_rename_index(self): tm.assert_frame_equal(result, exp) assert result.index.names == exp.index.names - @pytest.mark.parametrize("test_series", [True, False]) - def test_concat_copy_index(self, test_series, axis): + def test_concat_copy_index_series(self, axis): # GH 29879 - if test_series: - ser = Series([1, 2]) - comb = concat([ser, ser], axis=axis, copy=True) - assert comb.index is not ser.index - else: - df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) - comb = concat([df, df], axis=axis, copy=True) - assert comb.index is not df.index - assert comb.columns is not df.columns + ser = Series([1, 2]) + comb = concat([ser, ser], axis=axis, copy=True) + assert comb.index is not ser.index + + def test_concat_copy_index_frame(self, axis): + # GH 29879 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + comb = concat([df, df], axis=axis, copy=True) + assert comb.index is not df.index + assert comb.columns is not df.columns def test_default_index(self): # is_series and ignore_index diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 127be504e82d5..c221a3a18911e 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -32,8 +32,9 @@ def test_simple(): tm.assert_numpy_array_equal(result, expected, check_dtype=False) -def test_bins(): - data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1]) +@pytest.mark.parametrize("func", [list, np.array]) +def test_bins(func): + data = func([0.2, 1.4, 2.5, 6.2, 9.7, 2.1]) result, bins = cut(data, 3, retbins=True) intervals = IntervalIndex.from_breaks(bins.round(3)) @@ -68,18 +69,6 @@ def test_no_right(): tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095])) -def test_array_like(): - data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] - result, bins = cut(data, 3, retbins=True) - - intervals = IntervalIndex.from_breaks(bins.round(3)) - intervals = intervals.take([0, 0, 0, 1, 2, 0]) - expected = Categorical(intervals, ordered=True) - - tm.assert_categorical_equal(result, expected) - tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7])) - - def test_bins_from_interval_index(): c = cut(range(5), 3) expected = c diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index 555067f2aba1a..ee36223eb2496 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -266,6 +266,19 @@ def test_timestamp_compare_oob_dt64(self): assert Timestamp.max < other + us # Note: numpy gets the reversed comparison wrong + # GH-42794 + other = datetime(9999, 9, 9) + assert Timestamp.min < other + assert other > Timestamp.min + assert Timestamp.max < other + assert other > Timestamp.max + + other = datetime(1, 1, 1) + assert Timestamp.max > other + assert other < Timestamp.max + assert Timestamp.min > other + assert other < Timestamp.min + def test_compare_zerodim_array(self): # GH#26916 ts = Timestamp.now() diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index e4803a9cd3038..620f529b522ae 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -1,3 +1,5 @@ +from datetime import datetime + import numpy as np import pytest @@ -128,6 +130,15 @@ def test_clip_with_datetimes(self): ) tm.assert_series_equal(result, expected) + def test_clip_with_timestamps_and_oob_datetimes(self): + # GH-42794 + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)]) + + result = ser.clip(lower=Timestamp.min, upper=Timestamp.max) + expected = Series([Timestamp.min, Timestamp.max], dtype="object") + + tm.assert_series_equal(result, expected) + def test_clip_pos_args_deprecation(self): # https://github.com/pandas-dev/pandas/issues/41485 ser = Series([1, 2, 3])