From 198474ddea103a78304e6907213b8344abb4241a Mon Sep 17 00:00:00 2001 From: Vishal Date: Thu, 5 Mar 2020 21:45:09 +0530 Subject: [PATCH 01/15] BUG: Fix issue with datetime[ns, tz] input in Block.setitem GH32395 --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/internals/blocks.py | 3 +++ pandas/tests/internals/test_internals.py | 33 +++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 808e6ae709ce9..d8f26011f1f7a 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -23,6 +23,7 @@ Fixed regressions - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) +- Fixed regression in :meth:`pandas.core.internals.blocks.Block.setitem` throwing an error when a datetime64[ns, tz] value is provided (:issue:`32395`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 70fd3ecdc2098..a2c472ded7394 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -839,6 +839,9 @@ def setitem(self, indexer, value): # coerce if block dtype can store value values = self.values + if is_object_dtype(values) and isinstance(value, DatetimeArray): + value = value.astype(object) + if self._can_hold_element(value): # We only get here for non-Extension Blocks, so _try_coerce_args # is only relevant for DatetimeBlock and TimedeltaBlock diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0b9d84d261708..33ed22e734251 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from datetime import date, datetime +from datetime import date, datetime, timezone import itertools import operator import re @@ -1242,3 +1242,34 @@ def test_interleave_non_unique_cols(): assert df_unique.values.shape == df.values.shape tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) + + +def test_assign_df_datetime_column_with_index(): + df = pd.DataFrame( + [ + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + ], + columns=["data"], + ) + df2 = pd.DataFrame(index=df.index) + df2.loc[df.index, "data"] = df["data"] + tm.assert_numpy_array_equal(np.array(df.data), np.array(df2.data)) + assert df2.data.dtype == np.object + + +def test_assign_df_datetime_column_without_index(): + df = pd.DataFrame( + [ + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], + ], + columns=["data"], + ) + df2 = pd.DataFrame(index=df.index) + df2.loc[:, "data"] = df["data"] + tm.assert_series_equal(df.data, df2.data) From 6eba1c98e1b39c3c38dbc06cea06dbefb658b5a6 Mon Sep 17 00:00:00 2001 From: Vishal Date: Fri, 6 Mar 2020 11:37:00 +0530 Subject: [PATCH 02/15] Added tests for series, moved tests to test_loc.py --- doc/source/whatsnew/v1.0.2.rst | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/tests/indexing/test_loc.py | 40 ++++++++++++++++++++++++ pandas/tests/internals/test_internals.py | 33 +------------------ 4 files changed, 43 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index d8f26011f1f7a..44a6adb999c3d 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -23,7 +23,7 @@ Fixed regressions - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) -- Fixed regression in :meth:`pandas.core.internals.blocks.Block.setitem` throwing an error when a datetime64[ns, tz] value is provided (:issue:`32395`) +- Fixed regression in :meth:`loc.__setitem__` throwing an error when a datetime64[ns, tz] value is provided (:issue:`32395`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a2c472ded7394..738bae62dff99 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -839,7 +839,7 @@ def setitem(self, indexer, value): # coerce if block dtype can store value values = self.values - if is_object_dtype(values) and isinstance(value, DatetimeArray): + if is_object_dtype(values.dtype) and isinstance(value, DatetimeArray): value = value.astype(object) if self._can_hold_element(value): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4d042af8d59b4..7344a249329ec 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,4 +1,5 @@ """ test label based indexing with loc """ +from datetime import timezone from io import StringIO import re @@ -975,3 +976,42 @@ def test_loc_mixed_int_float(): result = ser.loc[1] assert result == 0 + + +def test_loc_setitem_df_datetime64tz_column_with_index(): + df = pd.DataFrame( + pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] + ) + df2 = pd.DataFrame(index=df.index) + df2.loc[df.index, "data"] = df["data"] + tm.assert_numpy_array_equal(np.array(df.data), np.array(df2.data)) + assert df2.data.dtype == np.object + + +def test_loc_setitem_df_datetime64tz_column_without_index(): + df = pd.DataFrame( + pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] + ) + df2 = pd.DataFrame(index=df.index) + df2.loc[:, "data"] = df["data"] + tm.assert_series_equal(df.data, df2.data) + + +def test_loc_setitem_series_datetime64tz_with_index(): + s1 = pd.Series( + pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" + ) + s2 = pd.Series(index=s1.index) + s2.loc[s1.index] = s1 + tm.assert_numpy_array_equal(np.array(s1), np.array(s2)) + assert s2.dtype == np.object + + +def test_loc_setitem_series_datetime64tz_without_index(): + s1 = pd.Series( + pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" + ) + s2 = pd.Series(index=s1.index) + s2.loc[:] = s1 + tm.assert_numpy_array_equal(np.array(s1), np.array(s2)) + assert s2.dtype == np.object diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 33ed22e734251..0b9d84d261708 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from datetime import date, datetime, timezone +from datetime import date, datetime import itertools import operator import re @@ -1242,34 +1242,3 @@ def test_interleave_non_unique_cols(): assert df_unique.values.shape == df.values.shape tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) - - -def test_assign_df_datetime_column_with_index(): - df = pd.DataFrame( - [ - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - ], - columns=["data"], - ) - df2 = pd.DataFrame(index=df.index) - df2.loc[df.index, "data"] = df["data"] - tm.assert_numpy_array_equal(np.array(df.data), np.array(df2.data)) - assert df2.data.dtype == np.object - - -def test_assign_df_datetime_column_without_index(): - df = pd.DataFrame( - [ - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - [datetime(2020, 2, 28, 13, 51, 27, tzinfo=timezone.utc)], - ], - columns=["data"], - ) - df2 = pd.DataFrame(index=df.index) - df2.loc[:, "data"] = df["data"] - tm.assert_series_equal(df.data, df2.data) From 11b4f299ee71fdac2b839f0053523b60f6c12e07 Mon Sep 17 00:00:00 2001 From: Vishal Date: Fri, 6 Mar 2020 12:16:33 +0530 Subject: [PATCH 03/15] Add dtype to series object in tests to suppress warning --- pandas/tests/indexing/test_loc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 7344a249329ec..316c0df919b06 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1001,7 +1001,7 @@ def test_loc_setitem_series_datetime64tz_with_index(): s1 = pd.Series( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" ) - s2 = pd.Series(index=s1.index) + s2 = pd.Series(index=s1.index, dtype=np.object, name="data") s2.loc[s1.index] = s1 tm.assert_numpy_array_equal(np.array(s1), np.array(s2)) assert s2.dtype == np.object @@ -1011,7 +1011,7 @@ def test_loc_setitem_series_datetime64tz_without_index(): s1 = pd.Series( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" ) - s2 = pd.Series(index=s1.index) + s2 = pd.Series(index=s1.index, dtype=np.object, name="data") s2.loc[:] = s1 tm.assert_numpy_array_equal(np.array(s1), np.array(s2)) assert s2.dtype == np.object From 1b3cba13e6886b10a65bc2258e06718bb32bf676 Mon Sep 17 00:00:00 2001 From: Vishal Date: Sat, 7 Mar 2020 00:13:46 +0530 Subject: [PATCH 04/15] Fix whatsnew --- doc/source/whatsnew/v1.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 44a6adb999c3d..41110cd8b6ef7 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -23,7 +23,7 @@ Fixed regressions - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) -- Fixed regression in :meth:`loc.__setitem__` throwing an error when a datetime64[ns, tz] value is provided (:issue:`32395`) +- Fixed regression in :meth:`DataFrame.loc`, :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - From 3e78985cd5e4a2501e06e03e9206c963b75ec134 Mon Sep 17 00:00:00 2001 From: Vishal Date: Fri, 27 Mar 2020 22:52:35 +0530 Subject: [PATCH 05/15] Move whatsnew to 1.1.0 --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 20415bba99476..3dc7cf91d1535 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -341,6 +341,7 @@ Indexing - Fix to preserve the ability to index with the "nearest" method with xarray's CFTimeIndex, an :class:`Index` subclass (`pydata/xarray#3751 `_, :issue:`32905`). - Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`) - Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) +- Fixed regression in :meth:`DataFrame.loc`, :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) Missing ^^^^^^^ From 7e600c700d8ff370018c62ecbd24d40040d4fb5e Mon Sep 17 00:00:00 2001 From: Vishal Date: Fri, 3 Apr 2020 04:48:16 +0530 Subject: [PATCH 06/15] Genralise fix for other extension array types --- pandas/core/internals/blocks.py | 9 ++- pandas/tests/indexing/test_loc.py | 120 +++++++++++++++++++++++++++--- 2 files changed, 116 insertions(+), 13 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3601ac0a21cf5..0b6692e4fb579 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -840,8 +840,6 @@ def setitem(self, indexer, value): # coerce if block dtype can store value values = self.values - if is_object_dtype(values.dtype) and isinstance(value, DatetimeArray): - value = value.astype(object) if self._can_hold_element(value): # We only get here for non-Extension Blocks, so _try_coerce_args @@ -874,8 +872,10 @@ def setitem(self, indexer, value): if is_extension_array_dtype(getattr(value, "dtype", None)): # We need to be careful not to allow through strings that # can be parsed to EADtypes + is_ea_value = True arr_value = value else: + is_ea_value = False arr_value = np.array(value) if transpose: @@ -903,6 +903,11 @@ def setitem(self, indexer, value): values[indexer] = value return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) + elif exact_match and is_ea_value: + # GH#?32395 if we're going to replace the values entirely, just + # substitute in the new array + return self.make_block(arr_value) + # if we are an exact match (ex-broadcasting), # then use the resultant dtype elif exact_match: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 55bfb467c5113..c1d345f55def5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1098,14 +1098,25 @@ def test_loc_datetimelike_mismatched_dtypes(): df["a"].loc[tdi] -def test_loc_setitem_df_datetime64tz_column_with_index(): +def test_loc_setitem_df_datetime64tz_full_column_with_index(): df = pd.DataFrame( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] ) df2 = pd.DataFrame(index=df.index) df2.loc[df.index, "data"] = df["data"] - tm.assert_numpy_array_equal(np.array(df.data), np.array(df2.data)) - assert df2.data.dtype == np.object + tm.assert_frame_equal(df, df2) + + +def test_loc_setitem_df_datetime64tz_slice(): + df = pd.DataFrame( + pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] + ) + df2 = pd.DataFrame(index=df.index) + expected = pd.DataFrame([pd.Timestamp("2020-01-01", tz=timezone.utc), pd.Timestamp("2020-01-02", tz=timezone.utc), + pd.Timestamp("2020-01-03", tz=timezone.utc), pd.NaT, pd.NaT, pd.NaT], columns=["data"], + dtype="object") + df2.loc[df.index[:3], "data"] = df["data"][:3] + tm.assert_frame_equal(df2, expected) def test_loc_setitem_df_datetime64tz_column_without_index(): @@ -1114,24 +1125,111 @@ def test_loc_setitem_df_datetime64tz_column_without_index(): ) df2 = pd.DataFrame(index=df.index) df2.loc[:, "data"] = df["data"] - tm.assert_series_equal(df.data, df2.data) + tm.assert_frame_equal(df, df2) -def test_loc_setitem_series_datetime64tz_with_index(): +def test_loc_setitem_series_datetime64tz_full_with_index(): s1 = pd.Series( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" ) - s2 = pd.Series(index=s1.index, dtype=np.object, name="data") + s2 = pd.Series(index=s1.index, dtype="object", name="data") s2.loc[s1.index] = s1 - tm.assert_numpy_array_equal(np.array(s1), np.array(s2)) - assert s2.dtype == np.object + tm.assert_series_equal(s1, s2) + + +def test_loc_setitem_series_datetime64tz_slice(): + s1 = pd.Series( + pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" + ) + s2 = pd.Series(index=s1.index, dtype="object", name="data") + expected = pd.Series([*pd.date_range("2020-01-01", "2020-01-03", 3, tz=timezone.utc), np.nan, np.nan, np.nan], + name="data", + dtype="object") + s2.loc[s1.index[:3]] = s1[:3] + tm.assert_series_equal(expected, s2) def test_loc_setitem_series_datetime64tz_without_index(): s1 = pd.Series( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" ) - s2 = pd.Series(index=s1.index, dtype=np.object, name="data") + s2 = pd.Series(index=s1.index, dtype="object", name="data") s2.loc[:] = s1 - tm.assert_numpy_array_equal(np.array(s1), np.array(s2)) - assert s2.dtype == np.object + tm.assert_series_equal(s1, s2) + + +def test_loc_setitem_series_timedelta64_full_with_index(): + s1 = pd.Series( + pd.timedelta_range(start='1 day', periods=4), name="data" + ) + s2 = pd.Series(index=s1.index, dtype="object", name="data") + s2.loc[s1.index] = s1 + tm.assert_series_equal(s1, s2) + + +def test_loc_setitem_df_period_full_column_with_index(): + df = pd.DataFrame( + pd.period_range(start='2020Q1', periods=5), columns=["data"] + ) + df2 = pd.DataFrame(index=df.index) + df2.loc[df.index, "data"] = df["data"] + tm.assert_frame_equal(df, df2) + + +def test_loc_setitem_series_interval_full_with_index(): + s1 = pd.Series( + pd.interval_range(start=0, end=5), name="data" + ) + s2 = pd.Series(index=s1.index, dtype="object", name="data") + s2.loc[s1.index] = s1 + tm.assert_series_equal(s1, s2) + + +def test_loc_setitem_df_sparse_full_column_with_index(): + df = pd.DataFrame( + np.random.randn(100), columns=["data"] + ) + df.iloc[5:95] = np.nan + df = df.astype(pd.SparseDtype("int", np.nan)) + df2 = pd.DataFrame(index=df.index) + df2.loc[df.index, "data"] = df["data"] + tm.assert_frame_equal(df, df2) + + +def test_loc_setitem_series_categorical_full_with_index(): + s1 = pd.Series( + pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data" + ) + s2 = pd.Series(index=s1.index, dtype="object", name="data") + s2.loc[s1.index] = s1 + tm.assert_series_equal(s1, s2) + + +def test_loc_setitem_series_categorical_slice(): + s1 = pd.Series( + pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data" + ) + s2 = pd.Series(index=s1.index, dtype="object", name="data") + expected = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [np.nan] * 10), name="data", dtype=object) + s2.loc[s1.index[:15]] = s1[:15] + tm.assert_series_equal(expected, s2) + + +def test_loc_setitem_df_interval_slice(): + df = pd.DataFrame( + pd.interval_range(start=0, end=5), columns=["data"] + ) + df2 = pd.DataFrame(index=df.index) + expected = pd.DataFrame([*pd.interval_range(start=0, end=3), np.nan, np.nan], columns=["data"]) + df2.loc[df.index[:3], "data"] = df["data"][:3] + tm.assert_frame_equal(df2, expected) + + +def test_loc_setitem_series_period_slice(): + s1 = pd.Series( + pd.period_range(start='2020Q1', periods=5), name="data" + ) + s2 = pd.Series(index=s1.index, dtype="object", name="data") + expected = pd.Series([*pd.period_range(start='2020Q1', periods=3), np.nan, np.nan], name="data", dtype=object) + s2.loc[s1.index[:3]] = s1[:3] + tm.assert_series_equal(expected, s2) From b136ce306538e0019d05d98ad392f8dadae4f449 Mon Sep 17 00:00:00 2001 From: Vishal Date: Fri, 3 Apr 2020 21:33:19 +0530 Subject: [PATCH 07/15] Add new whatsnew --- doc/source/whatsnew/v1.0.4.rst | 28 ++++++++++++ pandas/tests/indexing/test_loc.py | 74 +++++++++++++++++-------------- 2 files changed, 69 insertions(+), 33 deletions(-) create mode 100644 doc/source/whatsnew/v1.0.4.rst diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst new file mode 100644 index 0000000000000..ef3bc270c4ff8 --- /dev/null +++ b/doc/source/whatsnew/v1.0.4.rst @@ -0,0 +1,28 @@ + +.. _whatsnew_104: + +What's new in 1.0.3 (April 15, 2020) +------------------------------------ + +These are the changes in pandas 1.0.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_104.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.loc`, :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) + +.. _whatsnew_104.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.3..v1.0.4|HEAD diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 59c45802cd4b3..d71c9a813134b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1123,9 +1123,18 @@ def test_loc_setitem_df_datetime64tz_slice(): pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] ) df2 = pd.DataFrame(index=df.index) - expected = pd.DataFrame([pd.Timestamp("2020-01-01", tz=timezone.utc), pd.Timestamp("2020-01-02", tz=timezone.utc), - pd.Timestamp("2020-01-03", tz=timezone.utc), pd.NaT, pd.NaT, pd.NaT], columns=["data"], - dtype="object") + expected = pd.DataFrame( + [ + pd.Timestamp("2020-01-01", tz=timezone.utc), + pd.Timestamp("2020-01-02", tz=timezone.utc), + pd.Timestamp("2020-01-03", tz=timezone.utc), + pd.NaT, + pd.NaT, + pd.NaT, + ], + columns=["data"], + dtype="object", + ) df2.loc[df.index[:3], "data"] = df["data"][:3] tm.assert_frame_equal(df2, expected) @@ -1153,9 +1162,16 @@ def test_loc_setitem_series_datetime64tz_slice(): pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" ) s2 = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series([*pd.date_range("2020-01-01", "2020-01-03", 3, tz=timezone.utc), np.nan, np.nan, np.nan], - name="data", - dtype="object") + expected = pd.Series( + [ + *pd.date_range("2020-01-01", "2020-01-03", 3, tz=timezone.utc), + np.nan, + np.nan, + np.nan, + ], + name="data", + dtype="object", + ) s2.loc[s1.index[:3]] = s1[:3] tm.assert_series_equal(expected, s2) @@ -1170,36 +1186,28 @@ def test_loc_setitem_series_datetime64tz_without_index(): def test_loc_setitem_series_timedelta64_full_with_index(): - s1 = pd.Series( - pd.timedelta_range(start='1 day', periods=4), name="data" - ) + s1 = pd.Series(pd.timedelta_range(start="1 day", periods=4), name="data") s2 = pd.Series(index=s1.index, dtype="object", name="data") s2.loc[s1.index] = s1 tm.assert_series_equal(s1, s2) def test_loc_setitem_df_period_full_column_with_index(): - df = pd.DataFrame( - pd.period_range(start='2020Q1', periods=5), columns=["data"] - ) + df = pd.DataFrame(pd.period_range(start="2020Q1", periods=5), columns=["data"]) df2 = pd.DataFrame(index=df.index) df2.loc[df.index, "data"] = df["data"] tm.assert_frame_equal(df, df2) def test_loc_setitem_series_interval_full_with_index(): - s1 = pd.Series( - pd.interval_range(start=0, end=5), name="data" - ) + s1 = pd.Series(pd.interval_range(start=0, end=5), name="data") s2 = pd.Series(index=s1.index, dtype="object", name="data") s2.loc[s1.index] = s1 tm.assert_series_equal(s1, s2) def test_loc_setitem_df_sparse_full_column_with_index(): - df = pd.DataFrame( - np.random.randn(100), columns=["data"] - ) + df = pd.DataFrame(np.random.randn(100), columns=["data"]) df.iloc[5:95] = np.nan df = df.astype(pd.SparseDtype("int", np.nan)) df2 = pd.DataFrame(index=df.index) @@ -1208,39 +1216,39 @@ def test_loc_setitem_df_sparse_full_column_with_index(): def test_loc_setitem_series_categorical_full_with_index(): - s1 = pd.Series( - pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data" - ) + s1 = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data") s2 = pd.Series(index=s1.index, dtype="object", name="data") s2.loc[s1.index] = s1 tm.assert_series_equal(s1, s2) def test_loc_setitem_series_categorical_slice(): - s1 = pd.Series( - pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data" - ) + s1 = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data") s2 = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [np.nan] * 10), name="data", dtype=object) + expected = pd.Series( + pd.Categorical([1] * 10 + [2] * 5 + [np.nan] * 10), name="data", dtype=object + ) s2.loc[s1.index[:15]] = s1[:15] tm.assert_series_equal(expected, s2) def test_loc_setitem_df_interval_slice(): - df = pd.DataFrame( - pd.interval_range(start=0, end=5), columns=["data"] - ) + df = pd.DataFrame(pd.interval_range(start=0, end=5), columns=["data"]) df2 = pd.DataFrame(index=df.index) - expected = pd.DataFrame([*pd.interval_range(start=0, end=3), np.nan, np.nan], columns=["data"]) + expected = pd.DataFrame( + [*pd.interval_range(start=0, end=3), np.nan, np.nan], columns=["data"] + ) df2.loc[df.index[:3], "data"] = df["data"][:3] tm.assert_frame_equal(df2, expected) def test_loc_setitem_series_period_slice(): - s1 = pd.Series( - pd.period_range(start='2020Q1', periods=5), name="data" - ) + s1 = pd.Series(pd.period_range(start="2020Q1", periods=5), name="data") s2 = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series([*pd.period_range(start='2020Q1', periods=3), np.nan, np.nan], name="data", dtype=object) + expected = pd.Series( + [*pd.period_range(start="2020Q1", periods=3), np.nan, np.nan], + name="data", + dtype=object, + ) s2.loc[s1.index[:3]] = s1[:3] tm.assert_series_equal(expected, s2) From d433d7c3ffdb28b597c24ad04d04d027e3e0c734 Mon Sep 17 00:00:00 2001 From: Vishal Date: Fri, 3 Apr 2020 23:13:46 +0530 Subject: [PATCH 08/15] Fix doc --- doc/source/whatsnew/index.rst | 1 + doc/source/whatsnew/v1.0.3.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 50333b54ca903..b60262cd5bdf9 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -24,6 +24,7 @@ Version 1.0 .. toctree:: :maxdepth: 2 + v1.0.4 v1.0.3 v1.0.2 v1.0.1 diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst index 26d06433bda0c..62e6ae5b1c5cc 100644 --- a/doc/source/whatsnew/v1.0.3.rst +++ b/doc/source/whatsnew/v1.0.3.rst @@ -26,4 +26,4 @@ Bug fixes Contributors ~~~~~~~~~~~~ -.. contributors:: v1.0.2..v1.0.3|HEAD +.. contributors:: v1.0.2..v1.0.3 From 00199f659de1b98687a2a1e609715721d50a59c0 Mon Sep 17 00:00:00 2001 From: Vishal Date: Fri, 3 Apr 2020 23:32:13 +0530 Subject: [PATCH 09/15] Fix version number in whatsnew --- doc/source/whatsnew/v1.0.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst index ef3bc270c4ff8..cd8e4d07dd320 100644 --- a/doc/source/whatsnew/v1.0.4.rst +++ b/doc/source/whatsnew/v1.0.4.rst @@ -1,7 +1,7 @@ .. _whatsnew_104: -What's new in 1.0.3 (April 15, 2020) +What's new in 1.0.4 (April 15, 2020) ------------------------------------ These are the changes in pandas 1.0.4. See :ref:`release` for a full changelog From 8cfa045a0d955b23b78e71406bd8eb0ba11ca5f2 Mon Sep 17 00:00:00 2001 From: Vishal Date: Sat, 4 Apr 2020 02:29:32 +0530 Subject: [PATCH 10/15] TST: Fix precision test in tests.computation.test_eval.check_alignment() --- pandas/tests/computation/test_eval.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 08d8d5ca342b7..c10ac5b8d92a9 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -314,7 +314,10 @@ def check_alignment(self, result, nlhs, ghs, op): # direct numpy comparison expected = self.ne.evaluate(f"nlhs {op} ghs") - tm.assert_numpy_array_equal(result.values, expected) + if np.issubdtype(result.values.dtype, np.floating): + tm.assert_almost_equal(result.values, expected) + else: + tm.assert_numpy_array_equal(result.values, expected) # modulus, pow, and floor division require special casing From 4f8fccd732f5054333317bb7fa436e83d848bda3 Mon Sep 17 00:00:00 2001 From: Vishal Date: Sun, 5 Apr 2020 01:28:56 +0530 Subject: [PATCH 11/15] Revert "TST: Fix precision test in tests.computation.test_eval.check_alignment()" This reverts commit 8cfa045a --- pandas/tests/computation/test_eval.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index c10ac5b8d92a9..08d8d5ca342b7 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -314,10 +314,7 @@ def check_alignment(self, result, nlhs, ghs, op): # direct numpy comparison expected = self.ne.evaluate(f"nlhs {op} ghs") - if np.issubdtype(result.values.dtype, np.floating): - tm.assert_almost_equal(result.values, expected) - else: - tm.assert_numpy_array_equal(result.values, expected) + tm.assert_numpy_array_equal(result.values, expected) # modulus, pow, and floor division require special casing From 0f8a913e03a846bcfd41b3d65667e1c4ac9bd84e Mon Sep 17 00:00:00 2001 From: Vishal Date: Sun, 5 Apr 2020 01:37:14 +0530 Subject: [PATCH 12/15] CLN: Test style tests.indexing.test_loc --- pandas/core/internals/blocks.py | 2 +- pandas/tests/indexing/test_loc.py | 35 +++++++++++-------------------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f6c608877fef0..b520d46058cae 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -882,7 +882,7 @@ def setitem(self, indexer, value): return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) elif exact_match and is_ea_value: - # GH#?32395 if we're going to replace the values entirely, just + # GH#32395 if we're going to replace the values entirely, just # substitute in the new array return self.make_block(arr_value) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d71c9a813134b..1d6bced20e663 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1158,20 +1158,11 @@ def test_loc_setitem_series_datetime64tz_full_with_index(): def test_loc_setitem_series_datetime64tz_slice(): - s1 = pd.Series( - pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" - ) + dates = pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc) + s1 = pd.Series(dates, name="data") s2 = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series( - [ - *pd.date_range("2020-01-01", "2020-01-03", 3, tz=timezone.utc), - np.nan, - np.nan, - np.nan, - ], - name="data", - dtype="object", - ) + expected = pd.Series(dates, name="data", dtype="object") + expected[-3:] = pd.NaT s2.loc[s1.index[:3]] = s1[:3] tm.assert_series_equal(expected, s2) @@ -1233,22 +1224,20 @@ def test_loc_setitem_series_categorical_slice(): def test_loc_setitem_df_interval_slice(): - df = pd.DataFrame(pd.interval_range(start=0, end=5), columns=["data"]) + intervals = pd.interval_range(start=0, end=5) + df = pd.DataFrame(intervals, columns=["data"]) df2 = pd.DataFrame(index=df.index) - expected = pd.DataFrame( - [*pd.interval_range(start=0, end=3), np.nan, np.nan], columns=["data"] - ) + expected = pd.DataFrame(intervals, columns=["data"], dtype="object") + expected.data[-2:] = np.nan df2.loc[df.index[:3], "data"] = df["data"][:3] tm.assert_frame_equal(df2, expected) def test_loc_setitem_series_period_slice(): - s1 = pd.Series(pd.period_range(start="2020Q1", periods=5), name="data") + periods = pd.period_range(start="2020Q1", periods=5) + s1 = pd.Series(periods, name="data") s2 = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series( - [*pd.period_range(start="2020Q1", periods=3), np.nan, np.nan], - name="data", - dtype=object, - ) + expected = pd.Series(periods, name="data", dtype=object,) + expected[-2:] = np.nan s2.loc[s1.index[:3]] = s1[:3] tm.assert_series_equal(expected, s2) From a01a676918835404ba07d89e7b37c2b56a5a0258 Mon Sep 17 00:00:00 2001 From: Vishal Date: Mon, 6 Apr 2020 22:29:55 +0530 Subject: [PATCH 13/15] Modify tests after review --- doc/source/whatsnew/index.rst | 1 - doc/source/whatsnew/v1.0.3.rst | 2 +- doc/source/whatsnew/v1.0.4.rst | 28 ------- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/indexing/test_loc.py | 122 +++++++++++++++--------------- 5 files changed, 61 insertions(+), 93 deletions(-) delete mode 100644 doc/source/whatsnew/v1.0.4.rst diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index b60262cd5bdf9..50333b54ca903 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -24,7 +24,6 @@ Version 1.0 .. toctree:: :maxdepth: 2 - v1.0.4 v1.0.3 v1.0.2 v1.0.1 diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst index 62e6ae5b1c5cc..26d06433bda0c 100644 --- a/doc/source/whatsnew/v1.0.3.rst +++ b/doc/source/whatsnew/v1.0.3.rst @@ -26,4 +26,4 @@ Bug fixes Contributors ~~~~~~~~~~~~ -.. contributors:: v1.0.2..v1.0.3 +.. contributors:: v1.0.2..v1.0.3|HEAD diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst deleted file mode 100644 index cd8e4d07dd320..0000000000000 --- a/doc/source/whatsnew/v1.0.4.rst +++ /dev/null @@ -1,28 +0,0 @@ - -.. _whatsnew_104: - -What's new in 1.0.4 (April 15, 2020) ------------------------------------- - -These are the changes in pandas 1.0.4. See :ref:`release` for a full changelog -including other versions of pandas. - -{{ header }} - -.. --------------------------------------------------------------------------- - -.. _whatsnew_104.regressions: - -Fixed regressions -~~~~~~~~~~~~~~~~~ -- Fixed regression in :meth:`DataFrame.loc`, :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) - -.. _whatsnew_104.bug_fixes: - -Bug fixes -~~~~~~~~~ - -Contributors -~~~~~~~~~~~~ - -.. contributors:: v1.0.3..v1.0.4|HEAD diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8bff34dbdadad..3636155c9d102 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -363,6 +363,7 @@ Indexing - Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`) - Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) - Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`) +- Fixed regression in :meth:`DataFrame.loc`, :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 1d6bced20e663..d44dcb91df733 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1110,19 +1110,21 @@ def test_loc_with_period_index_indexer(): def test_loc_setitem_df_datetime64tz_full_column_with_index(): - df = pd.DataFrame( + # GH#32395 ea assignments with an index raise TypeError + expected = pd.DataFrame( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] ) - df2 = pd.DataFrame(index=df.index) - df2.loc[df.index, "data"] = df["data"] - tm.assert_frame_equal(df, df2) + result = pd.DataFrame(index=expected.index) + result.loc[expected.index, "data"] = expected["data"] + tm.assert_frame_equal(expected, result) def test_loc_setitem_df_datetime64tz_slice(): + # GH#32395 ea assignments with an index raise TypeError df = pd.DataFrame( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] ) - df2 = pd.DataFrame(index=df.index) + result = pd.DataFrame(index=df.index) expected = pd.DataFrame( [ pd.Timestamp("2020-01-01", tz=timezone.utc), @@ -1135,109 +1137,103 @@ def test_loc_setitem_df_datetime64tz_slice(): columns=["data"], dtype="object", ) - df2.loc[df.index[:3], "data"] = df["data"][:3] - tm.assert_frame_equal(df2, expected) - - -def test_loc_setitem_df_datetime64tz_column_without_index(): - df = pd.DataFrame( - pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] - ) - df2 = pd.DataFrame(index=df.index) - df2.loc[:, "data"] = df["data"] - tm.assert_frame_equal(df, df2) + result.loc[df.index[:3], "data"] = df["data"][:3] + tm.assert_frame_equal(expected, result) def test_loc_setitem_series_datetime64tz_full_with_index(): - s1 = pd.Series( + # GH#32395 ea assignments with an index raise TypeError + expected = pd.Series( pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" ) - s2 = pd.Series(index=s1.index, dtype="object", name="data") - s2.loc[s1.index] = s1 - tm.assert_series_equal(s1, s2) + result = pd.Series(index=expected.index, dtype="object", name="data") + result.loc[expected.index] = expected + tm.assert_series_equal(expected, result) def test_loc_setitem_series_datetime64tz_slice(): + # GH#32395 ea assignments with an index raise TypeError dates = pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc) s1 = pd.Series(dates, name="data") - s2 = pd.Series(index=s1.index, dtype="object", name="data") + result = pd.Series(index=s1.index, dtype="object", name="data") expected = pd.Series(dates, name="data", dtype="object") expected[-3:] = pd.NaT - s2.loc[s1.index[:3]] = s1[:3] - tm.assert_series_equal(expected, s2) - - -def test_loc_setitem_series_datetime64tz_without_index(): - s1 = pd.Series( - pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" - ) - s2 = pd.Series(index=s1.index, dtype="object", name="data") - s2.loc[:] = s1 - tm.assert_series_equal(s1, s2) + result.loc[s1.index[:3]] = s1[:3] + tm.assert_series_equal(expected, result) def test_loc_setitem_series_timedelta64_full_with_index(): - s1 = pd.Series(pd.timedelta_range(start="1 day", periods=4), name="data") - s2 = pd.Series(index=s1.index, dtype="object", name="data") - s2.loc[s1.index] = s1 - tm.assert_series_equal(s1, s2) + # GH#32395 ea assignments with an index raise TypeError + expected = pd.Series(pd.timedelta_range(start="1 day", periods=4), name="data") + result = pd.Series(index=expected.index, dtype="object", name="data") + result.loc[expected.index] = expected + tm.assert_series_equal(expected, result) def test_loc_setitem_df_period_full_column_with_index(): - df = pd.DataFrame(pd.period_range(start="2020Q1", periods=5), columns=["data"]) - df2 = pd.DataFrame(index=df.index) - df2.loc[df.index, "data"] = df["data"] - tm.assert_frame_equal(df, df2) + # GH#32395 ea assignments with an index raise TypeError + expected = pd.DataFrame( + pd.period_range(start="2020Q1", periods=5), columns=["data"] + ) + result = pd.DataFrame(index=expected.index) + result.loc[expected.index, "data"] = expected["data"] + tm.assert_frame_equal(expected, result) def test_loc_setitem_series_interval_full_with_index(): - s1 = pd.Series(pd.interval_range(start=0, end=5), name="data") - s2 = pd.Series(index=s1.index, dtype="object", name="data") - s2.loc[s1.index] = s1 - tm.assert_series_equal(s1, s2) + # GH#32395 ea assignments with an index raise TypeError + expected = pd.Series(pd.interval_range(start=0, end=5), name="data") + result = pd.Series(index=expected.index, dtype="object", name="data") + result.loc[expected.index] = expected + tm.assert_series_equal(expected, result) def test_loc_setitem_df_sparse_full_column_with_index(): - df = pd.DataFrame(np.random.randn(100), columns=["data"]) - df.iloc[5:95] = np.nan - df = df.astype(pd.SparseDtype("int", np.nan)) - df2 = pd.DataFrame(index=df.index) - df2.loc[df.index, "data"] = df["data"] - tm.assert_frame_equal(df, df2) + # GH#32395 ea assignments with an index raise TypeError + expected = pd.DataFrame(np.random.randn(100), columns=["data"]) + expected.iloc[5:95] = np.nan + expected = expected.astype(pd.SparseDtype("int", np.nan)) + result = pd.DataFrame(index=expected.index) + result.loc[expected.index, "data"] = expected["data"] + tm.assert_frame_equal(expected, result) def test_loc_setitem_series_categorical_full_with_index(): - s1 = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data") - s2 = pd.Series(index=s1.index, dtype="object", name="data") - s2.loc[s1.index] = s1 - tm.assert_series_equal(s1, s2) + # GH#32395 ea assignments with an index raise TypeError + expected = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data") + result = pd.Series(index=expected.index, dtype="object", name="data") + result.loc[expected.index] = expected + tm.assert_series_equal(expected, result) def test_loc_setitem_series_categorical_slice(): + # GH#32395 ea assignments with an index raise TypeError s1 = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data") - s2 = pd.Series(index=s1.index, dtype="object", name="data") + result = pd.Series(index=s1.index, dtype="object", name="data") expected = pd.Series( pd.Categorical([1] * 10 + [2] * 5 + [np.nan] * 10), name="data", dtype=object ) - s2.loc[s1.index[:15]] = s1[:15] - tm.assert_series_equal(expected, s2) + result.loc[s1.index[:15]] = s1[:15] + tm.assert_series_equal(expected, result) def test_loc_setitem_df_interval_slice(): + # GH#32395 ea assignments with an index raise TypeError intervals = pd.interval_range(start=0, end=5) df = pd.DataFrame(intervals, columns=["data"]) - df2 = pd.DataFrame(index=df.index) + result = pd.DataFrame(index=df.index) expected = pd.DataFrame(intervals, columns=["data"], dtype="object") expected.data[-2:] = np.nan - df2.loc[df.index[:3], "data"] = df["data"][:3] - tm.assert_frame_equal(df2, expected) + result.loc[df.index[:3], "data"] = df["data"][:3] + tm.assert_frame_equal(expected, result) def test_loc_setitem_series_period_slice(): + # GH#32395 ea assignments with an index raise TypeError periods = pd.period_range(start="2020Q1", periods=5) s1 = pd.Series(periods, name="data") - s2 = pd.Series(index=s1.index, dtype="object", name="data") + result = pd.Series(index=s1.index, dtype="object", name="data") expected = pd.Series(periods, name="data", dtype=object,) expected[-2:] = np.nan - s2.loc[s1.index[:3]] = s1[:3] - tm.assert_series_equal(expected, s2) + result.loc[s1.index[:3]] = s1[:3] + tm.assert_series_equal(expected, result) From fbc31c848a74c85eaac3a26525900fb2b3694e9e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 20 May 2020 18:37:44 +0100 Subject: [PATCH 14/15] nits --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/internals/blocks.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2bd376c836678..96cb64e1ffb0e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -726,7 +726,7 @@ Indexing - Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`) - Bug in :class:`Interval` where a :class:`Timedelta` could not be added or subtracted from a :class:`Timestamp` interval (:issue:`32023`) - Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`) -- Fixed regression in :meth:`DataFrame.loc`, :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) +- Fixed regression in :meth:`DataFrame.loc` and :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) - Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`) - Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`) - Bug in :meth:`DatetimeIndex.insert` and :meth:`TimedeltaIndex.insert` causing index ``freq`` to be lost when setting an element into an empty :class:`Series` (:issue:33573`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8b55715ca6b73..d880bd81bd947 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -793,7 +793,6 @@ def setitem(self, indexer, value): # coerce if block dtype can store value values = self.values - if self._can_hold_element(value): # We only get here for non-Extension Blocks, so _try_coerce_args # is only relevant for DatetimeBlock and TimedeltaBlock From c7e7cf547175e282d30f7fd425041adc857e2488 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 20 May 2020 22:09:28 +0100 Subject: [PATCH 15/15] update tests --- pandas/tests/extension/base/setitem.py | 28 ++++++ pandas/tests/indexing/test_loc.py | 131 ------------------------- 2 files changed, 28 insertions(+), 131 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index dece8098c8542..eed9a584cc030 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -301,3 +301,31 @@ def test_setitem_preserves_views(self, data): data[0] = data[1] assert view1[0] == data[1] assert view2[0] == data[1] + + def test_setitem_dataframe_column_with_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + result.loc[df.index, "data"] = df["data"] + self.assert_frame_equal(result, expected) + + def test_setitem_dataframe_column_without_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + result.loc[:, "data"] = df["data"] + self.assert_frame_equal(result, expected) + + def test_setitem_series_with_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + ser = expected = pd.Series(data, name="data") + result = pd.Series(index=ser.index, dtype=np.object, name="data") + result.loc[ser.index] = ser + self.assert_series_equal(result, expected) + + def test_setitem_series_without_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + ser = expected = pd.Series(data, name="data") + result = pd.Series(index=ser.index, dtype=np.object, name="data") + result.loc[:] = ser + self.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4a10864241359..30416985f2020 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,5 +1,4 @@ """ test label based indexing with loc """ -from datetime import timezone from io import StringIO import re @@ -1120,133 +1119,3 @@ def test_loc_with_period_index_indexer(): tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) tm.assert_frame_equal(df, df.loc[list(idx)]) - - -def test_loc_setitem_df_datetime64tz_full_column_with_index(): - # GH#32395 ea assignments with an index raise TypeError - expected = pd.DataFrame( - pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] - ) - result = pd.DataFrame(index=expected.index) - result.loc[expected.index, "data"] = expected["data"] - tm.assert_frame_equal(expected, result) - - -def test_loc_setitem_df_datetime64tz_slice(): - # GH#32395 ea assignments with an index raise TypeError - df = pd.DataFrame( - pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), columns=["data"] - ) - result = pd.DataFrame(index=df.index) - expected = pd.DataFrame( - [ - pd.Timestamp("2020-01-01", tz=timezone.utc), - pd.Timestamp("2020-01-02", tz=timezone.utc), - pd.Timestamp("2020-01-03", tz=timezone.utc), - pd.NaT, - pd.NaT, - pd.NaT, - ], - columns=["data"], - dtype="object", - ) - result.loc[df.index[:3], "data"] = df["data"][:3] - tm.assert_frame_equal(expected, result) - - -def test_loc_setitem_series_datetime64tz_full_with_index(): - # GH#32395 ea assignments with an index raise TypeError - expected = pd.Series( - pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc), name="data" - ) - result = pd.Series(index=expected.index, dtype="object", name="data") - result.loc[expected.index] = expected - tm.assert_series_equal(expected, result) - - -def test_loc_setitem_series_datetime64tz_slice(): - # GH#32395 ea assignments with an index raise TypeError - dates = pd.date_range("2020-01-01", "2020-01-06", 6, tz=timezone.utc) - s1 = pd.Series(dates, name="data") - result = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series(dates, name="data", dtype="object") - expected[-3:] = pd.NaT - result.loc[s1.index[:3]] = s1[:3] - tm.assert_series_equal(expected, result) - - -def test_loc_setitem_series_timedelta64_full_with_index(): - # GH#32395 ea assignments with an index raise TypeError - expected = pd.Series(pd.timedelta_range(start="1 day", periods=4), name="data") - result = pd.Series(index=expected.index, dtype="object", name="data") - result.loc[expected.index] = expected - tm.assert_series_equal(expected, result) - - -def test_loc_setitem_df_period_full_column_with_index(): - # GH#32395 ea assignments with an index raise TypeError - expected = pd.DataFrame( - pd.period_range(start="2020Q1", periods=5), columns=["data"] - ) - result = pd.DataFrame(index=expected.index) - result.loc[expected.index, "data"] = expected["data"] - tm.assert_frame_equal(expected, result) - - -def test_loc_setitem_series_interval_full_with_index(): - # GH#32395 ea assignments with an index raise TypeError - expected = pd.Series(pd.interval_range(start=0, end=5), name="data") - result = pd.Series(index=expected.index, dtype="object", name="data") - result.loc[expected.index] = expected - tm.assert_series_equal(expected, result) - - -def test_loc_setitem_df_sparse_full_column_with_index(): - # GH#32395 ea assignments with an index raise TypeError - expected = pd.DataFrame(np.random.randn(100), columns=["data"]) - expected.iloc[5:95] = np.nan - expected = expected.astype(pd.SparseDtype("int", np.nan)) - result = pd.DataFrame(index=expected.index) - result.loc[expected.index, "data"] = expected["data"] - tm.assert_frame_equal(expected, result) - - -def test_loc_setitem_series_categorical_full_with_index(): - # GH#32395 ea assignments with an index raise TypeError - expected = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data") - result = pd.Series(index=expected.index, dtype="object", name="data") - result.loc[expected.index] = expected - tm.assert_series_equal(expected, result) - - -def test_loc_setitem_series_categorical_slice(): - # GH#32395 ea assignments with an index raise TypeError - s1 = pd.Series(pd.Categorical([1] * 10 + [2] * 5 + [3] * 10), name="data") - result = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series( - pd.Categorical([1] * 10 + [2] * 5 + [np.nan] * 10), name="data", dtype=object - ) - result.loc[s1.index[:15]] = s1[:15] - tm.assert_series_equal(expected, result) - - -def test_loc_setitem_df_interval_slice(): - # GH#32395 ea assignments with an index raise TypeError - intervals = pd.interval_range(start=0, end=5) - df = pd.DataFrame(intervals, columns=["data"]) - result = pd.DataFrame(index=df.index) - expected = pd.DataFrame(intervals, columns=["data"], dtype="object") - expected.data[-2:] = np.nan - result.loc[df.index[:3], "data"] = df["data"][:3] - tm.assert_frame_equal(expected, result) - - -def test_loc_setitem_series_period_slice(): - # GH#32395 ea assignments with an index raise TypeError - periods = pd.period_range(start="2020Q1", periods=5) - s1 = pd.Series(periods, name="data") - result = pd.Series(index=s1.index, dtype="object", name="data") - expected = pd.Series(periods, name="data", dtype=object,) - expected[-2:] = np.nan - result.loc[s1.index[:3]] = s1[:3] - tm.assert_series_equal(expected, result)