From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 1/6] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From 4f4f2932cb5ae293a58a13e8aa40427f71214150 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 1 Aug 2019 16:25:45 +0200 Subject: [PATCH 2/6] Fix boolean error in replace --- pandas/core/generic.py | 6 +++++- pandas/tests/generic/test_generic.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 821c35e0cce2f..9913a44006ea7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6640,7 +6640,11 @@ def replace( for k, v in items: keys, values = list(zip(*v.items())) or ([], []) - if set(keys) & set(values): + # add another check to avoid boolean being regarded + # as binary in python set + if set(keys) & set(values) and set(map(str, keys)) & set( + map(str, values) + ): raise ValueError( "Replacement not allowed with " "overlapping keys and values" diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 7b9e50ebbf342..ccb2668718f90 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -948,3 +948,15 @@ def test_deprecated_get_dtype_counts(self): df = DataFrame([1]) with tm.assert_produces_warning(FutureWarning): df.get_dtype_counts() + + def test_boolean_in_replace(self): + # GH 27660 + df = DataFrame({"col": [False, True, 0, 1]}) + + result = df.replace({"col": {False: 0, True: 1}}) + expected = DataFrame({"col": [0, 1, 0, 1]}) + assert_frame_equal(result, expected) + + msg = "Replacement not allowed with overlapping keys and values" + with pytest.raises(ValueError, match=msg): + df.replace({"col": {0: 1, 1: "a"}}) From 7529084481fd08d6d91b39e5e9be43c3dfd27c52 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 5 Aug 2019 13:18:38 +0200 Subject: [PATCH 3/6] tst --- pandas/tests/generic/test_generic.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index ccb2668718f90..a5da14724180e 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -953,10 +953,13 @@ def test_boolean_in_replace(self): # GH 27660 df = DataFrame({"col": [False, True, 0, 1]}) - result = df.replace({"col": {False: 0, True: 1}}) - expected = DataFrame({"col": [0, 1, 0, 1]}) - assert_frame_equal(result, expected) +# result = df.replace({"col": {False: 0, True: 1}}) +# expected = DataFrame({"col": [0, 1, 0, 1]}) +# assert_frame_equal(result, expected) - msg = "Replacement not allowed with overlapping keys and values" - with pytest.raises(ValueError, match=msg): - df.replace({"col": {0: 1, 1: "a"}}) +# msg = "Replacement not allowed with overlapping keys and values" +# with pytest.raises(ValueError, match=msg): +# df.replace({"col": {0: 1, 1: "a"}}) + + dd = df.replace({False:1, True:0}) + assert dd['col'].values == 0 \ No newline at end of file From eb6cee8d29791180cdff887c8f482c56b2657571 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 24 Aug 2019 10:24:10 +0200 Subject: [PATCH 4/6] code change based on review --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/generic.py | 10 +--------- pandas/tests/frame/test_replace.py | 18 ++++++++++++++---- pandas/tests/generic/test_generic.py | 15 --------------- 4 files changed, 16 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 8e25857e5ad69..be5e950580c01 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -206,6 +206,7 @@ ExtensionArray Other ^^^^^ - Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`) +- Keeping the consistency in :meth:`DataFrame.replace` behaviour between simple dictionary replacement and nested dictionary replacement (:issue:`27660`) .. _whatsnew_1000.contributors: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9bf87a1cdca53..b0c148601732d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6642,15 +6642,7 @@ def replace( for k, v in items: keys, values = list(zip(*v.items())) or ([], []) - # add another check to avoid boolean being regarded - # as binary in python set - if set(keys) & set(values) and set(map(str, keys)) & set( - map(str, values) - ): - raise ValueError( - "Replacement not allowed with " - "overlapping keys and values" - ) + to_rep_dict[k] = list(keys) value_dict[k] = list(values) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 2862615ef8585..47e4d619be318 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -1070,17 +1070,27 @@ def test_replace_truthy(self): assert_frame_equal(r, e) def test_replace_int_to_int_chain(self): + # GH 27660 keep behaviour consistent for simple dictionary and + # nested dictionary replacement df = DataFrame({"a": list(range(1, 5))}) - with pytest.raises(ValueError, match="Replacement not allowed .+"): - df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) + + # nested dictionary replace + result1 = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) + + # simple dictionary replace + result2 = df.replace(dict(zip(range(1, 5), range(2, 6)))) + + assert_frame_equal(result1, result2) def test_replace_str_to_str_chain(self): + # GH 27660 a = np.arange(1, 5) astr = a.astype(str) bstr = np.arange(2, 6).astype(str) df = DataFrame({"a": astr}) - with pytest.raises(ValueError, match="Replacement not allowed .+"): - df.replace({"a": dict(zip(astr, bstr))}) + result1 = df.replace(dict(zip(astr, bstr))) + result2 = df.replace({"a": dict(zip(astr, bstr))}) + assert_frame_equal(result1, result2) def test_replace_swapping_bug(self): df = pd.DataFrame({"a": [True, False, True]}) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index a5da14724180e..7b9e50ebbf342 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -948,18 +948,3 @@ def test_deprecated_get_dtype_counts(self): df = DataFrame([1]) with tm.assert_produces_warning(FutureWarning): df.get_dtype_counts() - - def test_boolean_in_replace(self): - # GH 27660 - df = DataFrame({"col": [False, True, 0, 1]}) - -# result = df.replace({"col": {False: 0, True: 1}}) -# expected = DataFrame({"col": [0, 1, 0, 1]}) -# assert_frame_equal(result, expected) - -# msg = "Replacement not allowed with overlapping keys and values" -# with pytest.raises(ValueError, match=msg): -# df.replace({"col": {0: 1, 1: "a"}}) - - dd = df.replace({False:1, True:0}) - assert dd['col'].values == 0 \ No newline at end of file From 18bd51743f72443470e980d6ee7c639912413bd3 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 25 Aug 2019 23:00:23 +0200 Subject: [PATCH 5/6] code change based on review --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/tests/frame/test_replace.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index be5e950580c01..a9bc1942667ec 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -206,7 +206,7 @@ ExtensionArray Other ^^^^^ - Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`) -- Keeping the consistency in :meth:`DataFrame.replace` behaviour between simple dictionary replacement and nested dictionary replacement (:issue:`27660`) +- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) .. _whatsnew_1000.contributors: diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 47e4d619be318..2e639a833b912 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -1069,20 +1069,18 @@ def test_replace_truthy(self): e = df assert_frame_equal(r, e) - def test_replace_int_to_int_chain(self): + def test_nested_dict_overlapping_keys_replace_int(self): # GH 27660 keep behaviour consistent for simple dictionary and # nested dictionary replacement df = DataFrame({"a": list(range(1, 5))}) - # nested dictionary replace result1 = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) - # simple dictionary replace result2 = df.replace(dict(zip(range(1, 5), range(2, 6)))) assert_frame_equal(result1, result2) - def test_replace_str_to_str_chain(self): + def test_nested_dict_overlapping_keys_replace_str(self): # GH 27660 a = np.arange(1, 5) astr = a.astype(str) From 2c9749537281b8e5daf55ed5f79db9a522f8ea64 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 27 Aug 2019 08:54:04 +0200 Subject: [PATCH 6/6] minor naming change --- pandas/tests/frame/test_replace.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 2e639a833b912..b341ed6a52ca5 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -1074,11 +1074,9 @@ def test_nested_dict_overlapping_keys_replace_int(self): # nested dictionary replacement df = DataFrame({"a": list(range(1, 5))}) - result1 = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) - - result2 = df.replace(dict(zip(range(1, 5), range(2, 6)))) - - assert_frame_equal(result1, result2) + result = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) + expected = df.replace(dict(zip(range(1, 5), range(2, 6)))) + assert_frame_equal(result, expected) def test_nested_dict_overlapping_keys_replace_str(self): # GH 27660 @@ -1086,9 +1084,9 @@ def test_nested_dict_overlapping_keys_replace_str(self): astr = a.astype(str) bstr = np.arange(2, 6).astype(str) df = DataFrame({"a": astr}) - result1 = df.replace(dict(zip(astr, bstr))) - result2 = df.replace({"a": dict(zip(astr, bstr))}) - assert_frame_equal(result1, result2) + result = df.replace(dict(zip(astr, bstr))) + expected = df.replace({"a": dict(zip(astr, bstr))}) + assert_frame_equal(result, expected) def test_replace_swapping_bug(self): df = pd.DataFrame({"a": [True, False, True]})