diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ebf3428020652..1f26b6d9ae6ae 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3732,6 +3732,9 @@ def _setitem_array(self, key, value): self.iloc[indexer] = value else: + # Note: unlike self.iloc[:, indexer] = value, this will + # never try to overwrite values inplace + if isinstance(value, DataFrame): check_key_length(self.columns, key, value) for k1, k2 in zip(key, value.columns): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 05a9aab4a5554..b4d6e0ace4223 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1045,8 +1045,7 @@ def iset( self._rebuild_blknos_and_blklocs() # Note: we exclude DTA/TDA here - vdtype = getattr(value, "dtype", None) - value_is_extension_type = is_1d_only_ea_dtype(vdtype) + value_is_extension_type = is_1d_only_ea_dtype(value.dtype) # categorical/sparse/datetimetz if value_is_extension_type: diff --git a/pandas/core/series.py b/pandas/core/series.py index 7ee9a0bcdd9e1..996af80139458 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4538,6 +4538,7 @@ def rename( dtype: int64 """ if axis is not None: + # Make sure we raise if an invalid 'axis' is passed. axis = self._get_axis_number(axis) if callable(index) or is_dict_like(index): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index f341014110e18..2e6318955e119 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -617,7 +617,7 @@ def test_quantile_ea_with_na(self, obj, index): expected = type(obj)(expected) tm.assert_equal(result, expected) - # TODO: filtering can be removed after GH#39763 is fixed + # TODO(GH#39763): filtering can be removed after GH#39763 is fixed @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning") def test_quantile_ea_all_na(self, obj, index, frame_or_series): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1bb4b24266de0..f92bbe1c718ab 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2585,6 +2585,19 @@ def test_error_from_2darray(self, col_a, col_b): DataFrame({"a": col_a, "b": col_b}) +class TestDataFrameConstructorIndexInference: + def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): + rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M") + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M") + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({"s1": s1, "s2": s2}) + + exp = pd.period_range("1/1/1980", "1/1/2012", freq="M") + tm.assert_index_equal(df.index, exp) + + class TestDataFrameConstructorWithDtypeCoercion: def test_floating_values_integer_dtype(self): # GH#40110 make DataFrame behavior with arraylike floating data and diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index 7a4ba52cdfdd5..87ffe99896199 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -90,7 +90,7 @@ def test_union_sort_other_incomparable(self): @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") def test_union_sort_other_incomparable_true(self): - # TODO decide on True behaviour + # TODO(GH#25151): decide on True behaviour # sort=True idx = Index([1, pd.Timestamp("2000")]) with pytest.raises(TypeError, match=".*"): @@ -98,7 +98,7 @@ def test_union_sort_other_incomparable_true(self): @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") def test_intersection_equal_sort_true(self): - # TODO decide on True behaviour + # TODO(GH#25151): decide on True behaviour idx = Index(["c", "a", "b"]) sorted_ = Index(["a", "b", "c"]) tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 80c86e0103436..a99d2f590be97 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -746,7 +746,7 @@ def test_cached_range_bug(self): assert len(rng) == 50 assert rng[0] == datetime(2010, 9, 1, 5) - def test_timezone_comparaison_bug(self): + def test_timezone_comparison_bug(self): # smoke test start = Timestamp("20130220 10:00", tz="US/Eastern") result = date_range(start, periods=2, tz="US/Eastern") diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index 2a1fa8a015ccc..507449eabfb6e 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -203,7 +203,7 @@ def test_difference_sort_special(): @pytest.mark.xfail(reason="Not implemented.") def test_difference_sort_special_true(): - # TODO decide on True behaviour + # TODO(GH#25151): decide on True behaviour idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) result = idx.difference([], sort=True) expected = MultiIndex.from_product([[0, 1], ["a", "b"]]) @@ -340,7 +340,7 @@ def test_intersect_equal_sort(): @pytest.mark.xfail(reason="Not implemented.") def test_intersect_equal_sort_true(): - # TODO decide on True behaviour + # TODO(GH#25151): decide on True behaviour idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) sorted_ = MultiIndex.from_product([[0, 1], ["a", "b"]]) tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) @@ -363,7 +363,7 @@ def test_union_sort_other_empty(slice_): @pytest.mark.xfail(reason="Not implemented.") def test_union_sort_other_empty_sort(slice_): - # TODO decide on True behaviour + # TODO(GH#25151): decide on True behaviour # # sort=True idx = MultiIndex.from_product([[1, 0], ["a", "b"]]) other = idx[:0] @@ -388,7 +388,7 @@ def test_union_sort_other_incomparable(): @pytest.mark.xfail(reason="Not implemented.") def test_union_sort_other_incomparable_sort(): - # TODO decide on True behaviour + # TODO(GH#25151): decide on True behaviour # # sort=True idx = MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) with pytest.raises(TypeError, match="Cannot compare"): diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py index 4045cc0b91313..72336d3e33b79 100644 --- a/pandas/tests/indexes/numeric/test_setops.py +++ b/pandas/tests/indexes/numeric/test_setops.py @@ -155,7 +155,7 @@ def test_union_sort_other_special(self, slice_): @pytest.mark.xfail(reason="Not implemented") @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) def test_union_sort_special_true(self, slice_): - # TODO: decide on True behaviour + # TODO(GH#25151): decide on True behaviour # sort=True idx = Index([1, 0, 2]) # default, sort=None diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index ce5c46dd55c0d..bac231ef0085d 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -153,18 +153,6 @@ def test_union_misc(self, sort): expected = index.astype(object).union(index2.astype(object), sort=sort) tm.assert_index_equal(result, expected) - # TODO: belongs elsewhere - def test_union_dataframe_index(self): - rng1 = period_range("1/1/1999", "1/1/2012", freq="M") - s1 = pd.Series(np.random.randn(len(rng1)), rng1) - - rng2 = period_range("1/1/1980", "12/1/2001", freq="M") - s2 = pd.Series(np.random.randn(len(rng2)), rng2) - df = pd.DataFrame({"s1": s1, "s2": s2}) - - exp = period_range("1/1/1980", "1/1/2012", freq="M") - tm.assert_index_equal(df.index, exp) - def test_intersection(self, sort): index = period_range("1/1/2000", "1/20/2000", freq="D") diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 277f686a8487a..c45a4c771856c 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -204,10 +204,10 @@ def test_delete_preserves_rangeindex_list_middle(self): loc = [1, 2, 3, 4] result = idx.delete(loc) expected = RangeIndex(0, 6, 5) - tm.assert_index_equal(result, expected, exact="equiv") # TODO: retain! + tm.assert_index_equal(result, expected, exact=True) result = idx.delete(loc[::-1]) - tm.assert_index_equal(result, expected, exact="equiv") # TODO: retain! + tm.assert_index_equal(result, expected, exact=True) def test_delete_all_preserves_rangeindex(self): idx = RangeIndex(0, 6, 1) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f1ece3e363bb6..50be69fb93d7c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -525,20 +525,6 @@ def test_asof_numeric_vs_bool_raises(self): with pytest.raises(TypeError, match=msg): right.asof(left) - # TODO: this tests Series.asof - def test_asof_nanosecond_index_access(self): - s = Timestamp("20130101").value - r = DatetimeIndex([s + 50 + i for i in range(100)]) - ser = Series(np.random.randn(100), index=r) - - first_value = ser.asof(ser.index[0]) - - # this does not yet work, as parsing strings is done via dateutil - # assert first_value == x['2013-01-01 00:00:00.000000050+0000'] - - expected_ts = np_datetime64_compat("2013-01-01 00:00:00.000000050+0000", "ns") - assert first_value == ser[Timestamp(expected_ts)] - @pytest.mark.parametrize("index", ["string"], indirect=True) def test_booleanindex(self, index): bool_index = np.ones(len(index), dtype=bool) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index a0e97223435e6..abe1c4fd03fcd 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -773,7 +773,7 @@ def test_difference_incomparable(self, opname): @pytest.mark.xfail(reason="Not implemented") @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) def test_difference_incomparable_true(self, opname): - # TODO: decide on True behaviour + # TODO(GH#25151): decide on True behaviour # # sort=True, raises a = Index([3, Timestamp("2000"), 1]) b = Index([2, Timestamp("1999"), 1]) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index d446d606d726f..7d2f68b00d95f 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -96,10 +96,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage # check we dont have a view on cat (may be undesired GH#39986) df.iloc[0, 0] = "gamma" - if overwrite: - assert cat[0] != "gamma" - else: - assert cat[0] != "gamma" + assert cat[0] != "gamma" # TODO with mixed dataframe ("split" path), we always overwrite the column frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d6402e027be98..a10288b2091ca 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -129,6 +129,21 @@ def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): with pytest.raises(err, match=msg): idxr[nd3] = 0 + def test_getitem_ndarray_0d(self): + # GH#24924 + key = np.array(0) + + # dataframe __getitem__ + df = DataFrame([[1, 2], [3, 4]]) + result = df[key] + expected = Series([1, 3], name=0) + tm.assert_series_equal(result, expected) + + # series __getitem__ + ser = Series([1, 2]) + result = ser[key] + assert result == 1 + def test_inf_upcast(self): # GH 16957 # We should be able to use np.inf as a key diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index bc08c53784e76..b0aa05371271b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -6,7 +6,6 @@ time, timedelta, ) -from io import StringIO import re from dateutil.tz import gettz @@ -558,15 +557,27 @@ def test_loc_setitem_consistency_empty(self): def test_loc_setitem_consistency_slice_column_len(self): # .loc[:,column] setting with slice == len of the column # GH10408 - data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat -Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse -Region,Site,RespondentID,,,,, -Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, -Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes -Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, -Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" - - df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) + levels = [ + ["Region_1"] * 4, + ["Site_1", "Site_1", "Site_2", "Site_2"], + [3987227376, 3980680971, 3977723249, 3977723089], + ] + mi = MultiIndex.from_arrays(levels, names=["Region", "Site", "RespondentID"]) + + clevels = [ + ["Respondent", "Respondent", "Respondent", "OtherCat", "OtherCat"], + ["Something", "StartDate", "EndDate", "Yes/No", "SomethingElse"], + ] + cols = MultiIndex.from_arrays(clevels, names=["Level_0", "Level_1"]) + + values = [ + ["A", "5/25/2015 10:59", "5/25/2015 11:22", "Yes", np.nan], + ["A", "5/21/2015 9:40", "5/21/2015 9:52", "Yes", "Yes"], + ["A", "5/20/2015 8:27", "5/20/2015 8:41", "Yes", np.nan], + ["A", "5/20/2015 8:33", "5/20/2015 9:09", "Yes", "No"], + ] + df = DataFrame(values, index=mi, columns=cols) + df.loc[:, ("Respondent", "StartDate")] = to_datetime( df.loc[:, ("Respondent", "StartDate")] ) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 39611bce2b4fa..bf262e6755289 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -74,8 +74,8 @@ def _check(f, func, values=False): _check(f, "at") -class TestScalar2: - # TODO: Better name, just separating things that dont need Base class +class TestAtAndiAT: + # at and iat tests that don't need Base class def test_at_iat_coercion(self): @@ -214,19 +214,6 @@ def test_iat_setter_incompatible_assignment(self): expected = DataFrame({"a": [None, 1], "b": [4, 5]}) tm.assert_frame_equal(result, expected) - def test_getitem_zerodim_np_array(self): - # GH24924 - # dataframe __getitem__ - df = DataFrame([[1, 2], [3, 4]]) - result = df[np.array(0)] - expected = Series([1, 3], name=0) - tm.assert_series_equal(result, expected) - - # series __getitem__ - s = Series([1, 2]) - result = s[np.array(0)] - assert result == 1 - def test_iat_dont_wrap_object_datetimelike(): # GH#32809 .iat calls go through DataFrame._get_value, should not diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py index 39674db6916c1..5bd73e6045e32 100644 --- a/pandas/tests/io/formats/test_console.py +++ b/pandas/tests/io/formats/test_console.py @@ -5,7 +5,7 @@ from pandas._config import detect_console_encoding -class MockEncoding: # TODO(py27): replace with mock +class MockEncoding: """ Used to add a side effect when accessing the 'encoding' property. If the side effect is a str in nature, the value will be returned. Otherwise, the diff --git a/pandas/tests/plotting/test_backend.py b/pandas/tests/plotting/test_backend.py index 2eef940ee9a40..be053a8f46051 100644 --- a/pandas/tests/plotting/test_backend.py +++ b/pandas/tests/plotting/test_backend.py @@ -71,7 +71,7 @@ def test_register_entrypoint(restore_backend): result = pandas.plotting._core._get_plot_backend("my_backend") assert result is mod - # TODO: https://github.com/pandas-dev/pandas/issues/27517 + # TODO(GH#27517): https://github.com/pandas-dev/pandas/issues/27517 # Remove the td.skip_if_no_mpl with pandas.option_context("plotting.backend", "my_backend"): result = pandas.plotting._core._get_plot_backend() diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index af9d6dd83bee3..8a83cdcbdefb0 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -671,13 +671,12 @@ def test_simple(self): tm.assert_frame_equal(result, expected) def test_stubs(self): - # GH9204 + # GH9204 wide_to_long call should not modify 'stubs' list df = DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) df.columns = ["id", "inc1", "inc2", "edu1", "edu2"] stubs = ["inc", "edu"] - # TODO: unused? - df_long = wide_to_long(df, stubs, i="id", j="age") # noqa + wide_to_long(df, stubs, i="id", j="age") assert stubs == ["inc", "edu"] diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 9110352d33c26..386ab4150c6ff 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -428,9 +428,6 @@ def test_conv_daily(self): ival_D_saturday = Period(freq="D", year=2007, month=1, day=6) ival_D_sunday = Period(freq="D", year=2007, month=1, day=7) - # TODO: unused? - # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) - ival_B_friday = Period(freq="B", year=2007, month=1, day=5) ival_B_monday = Period(freq="B", year=2007, month=1, day=8) diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 7a3f68fd3d990..8ddcf07934e21 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -2,8 +2,10 @@ import pytest from pandas._libs.tslibs import IncompatibleFrequency +from pandas.compat import np_datetime64_compat from pandas import ( + DatetimeIndex, Series, Timestamp, date_range, @@ -15,6 +17,20 @@ class TestSeriesAsof: + def test_asof_nanosecond_index_access(self): + ts = Timestamp("20130101").value + dti = DatetimeIndex([ts + 50 + i for i in range(100)]) + ser = Series(np.random.randn(100), index=dti) + + first_value = ser.asof(ser.index[0]) + + # this used to not work bc parsing was done by dateutil that didn't + # handle nanoseconds + assert first_value == ser["2013-01-01 00:00:00.000000050+0000"] + + expected_ts = np_datetime64_compat("2013-01-01 00:00:00.000000050+0000", "ns") + assert first_value == ser[Timestamp(expected_ts)] + def test_basic(self): # array or list or dates diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index ec060aa91e383..563c8f63df57d 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -49,9 +49,6 @@ def test_logical_operators_bool_dtype_with_empty(self): def test_logical_operators_int_dtype_with_int_dtype(self): # GH#9016: support bitwise op for integer types - # TODO: unused - # s_0101 = Series([0, 1, 0, 1]) - s_0123 = Series(range(4), dtype="int64") s_3333 = Series([3] * 4) s_4444 = Series([4] * 4)