From 3451902344f7f1d97175678f2021277c8f5e879a Mon Sep 17 00:00:00 2001 From: usersblock Date: Tue, 20 Jul 2021 01:52:59 -0400 Subject: [PATCH 01/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 9da7951c199ca..349943c6d0b50 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2530,3 +2530,16 @@ def test_empty_string_datetime_coerce__unit(): # verify that no exception is raised even when errors='raise' is set result = to_datetime([1, ""], unit="s", errors="raise") tm.assert_index_equal(expected, result) + + +def test_to_datetime_monotonic_increasing_index(): + # GH28238 + # Create date range of 1000 hour periods + times = pd.date_range(datetime.now(), periods=1000, freq='h') + # Random sort + times = times.to_frame(index=False, name='DT').sample(1000) + # Divide index integers by 1000 + times.index = times.index.to_series().astype(float) / 1000 + # Convert to datetime + result = pd.to_datetime(times.iloc[:, 0]) + assert result.values.dtype == np.dtype('datetime64[ns]') From 1b528225089d7469f83ddc0e20ce86f750ccfdb2 Mon Sep 17 00:00:00 2001 From: usersblock Date: Tue, 20 Jul 2021 03:33:02 -0400 Subject: [PATCH 02/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 349943c6d0b50..d5e90162d671e 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2535,11 +2535,11 @@ def test_empty_string_datetime_coerce__unit(): def test_to_datetime_monotonic_increasing_index(): # GH28238 # Create date range of 1000 hour periods - times = pd.date_range(datetime.now(), periods=1000, freq='h') + times = date_range(datetime.now(), periods=1000, freq="h") # Random sort - times = times.to_frame(index=False, name='DT').sample(1000) + times = times.to_frame(index=False, name="DT").sample(1000) # Divide index integers by 1000 times.index = times.index.to_series().astype(float) / 1000 # Convert to datetime - result = pd.to_datetime(times.iloc[:, 0]) - assert result.values.dtype == np.dtype('datetime64[ns]') + result = to_datetime(times.iloc[:, 0]) + assert result.values.dtype == np.dtype("datetime64[ns]") From 59d4145c6f62d87bccd13bdc213fffe262903485 Mon Sep 17 00:00:00 2001 From: usersblock Date: Tue, 20 Jul 2021 20:08:02 -0400 Subject: [PATCH 03/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index d5e90162d671e..a172ecd8e8f56 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2534,12 +2534,10 @@ def test_empty_string_datetime_coerce__unit(): def test_to_datetime_monotonic_increasing_index(): # GH28238 - # Create date range of 1000 hour periods times = date_range(datetime.now(), periods=1000, freq="h") - # Random sort times = times.to_frame(index=False, name="DT").sample(1000) - # Divide index integers by 1000 times.index = times.index.to_series().astype(float) / 1000 - # Convert to datetime - result = to_datetime(times.iloc[:, 0]) - assert result.values.dtype == np.dtype("datetime64[ns]") + converted = to_datetime(times.iloc[:, 0]) + expected = np.ravel(times.values) + result = np.ravel(converted.values) + tm.assert_equal(result, expected) From 763d84ca65fa34dca69f648b43728b137445584c Mon Sep 17 00:00:00 2001 From: usersblock Date: Thu, 22 Jul 2021 16:58:44 -0400 Subject: [PATCH 04/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a172ecd8e8f56..7e876e1c1d8c0 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2532,12 +2532,12 @@ def test_empty_string_datetime_coerce__unit(): tm.assert_index_equal(expected, result) -def test_to_datetime_monotonic_increasing_index(): +@pytest.mark.parametrize("cache", [True, False]) +def test_to_datetime_monotonic_increasing_index(cache): # GH28238 - times = date_range(datetime.now(), periods=1000, freq="h") - times = times.to_frame(index=False, name="DT").sample(1000) + times = date_range(pd.Timestamp("2002"), periods=3, freq="h") + times = times.to_frame(index=False, name="DT").sample() times.index = times.index.to_series().astype(float) / 1000 - converted = to_datetime(times.iloc[:, 0]) - expected = np.ravel(times.values) - result = np.ravel(converted.values) - tm.assert_equal(result, expected) + result = to_datetime(times.iloc[:, 0], cache=cache) + expected = times.iloc[:, 0] + tm.assert_series_equal(result, expected) From 1404f7a55f7642af1801bec4b7862dd699740ede Mon Sep 17 00:00:00 2001 From: usersblock Date: Fri, 23 Jul 2021 14:13:41 -0400 Subject: [PATCH 05/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7e876e1c1d8c0..8f1a918bef4d6 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2535,8 +2535,8 @@ def test_empty_string_datetime_coerce__unit(): @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_monotonic_increasing_index(cache): # GH28238 - times = date_range(pd.Timestamp("2002"), periods=3, freq="h") - times = times.to_frame(index=False, name="DT").sample() + times = Series([Timestamp("2002"), Timestamp("2012"), Timestamp("2020")]) + times = times.to_frame(name="DT").sample(3) times.index = times.index.to_series().astype(float) / 1000 result = to_datetime(times.iloc[:, 0], cache=cache) expected = times.iloc[:, 0] From c649998f35ec4ac6aa67502a10b523372c5c4a44 Mon Sep 17 00:00:00 2001 From: usersblock Date: Fri, 23 Jul 2021 19:26:17 -0400 Subject: [PATCH 06/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 8f1a918bef4d6..c0b782354c6f2 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2535,8 +2535,8 @@ def test_empty_string_datetime_coerce__unit(): @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_monotonic_increasing_index(cache): # GH28238 - times = Series([Timestamp("2002"), Timestamp("2012"), Timestamp("2020")]) - times = times.to_frame(name="DT").sample(3) + times = pd.date_range(pd.Timestamp("1980"), periods=50, freq="YS") + times = times.to_frame(index=False, name="DT").sample(50) times.index = times.index.to_series().astype(float) / 1000 result = to_datetime(times.iloc[:, 0], cache=cache) expected = times.iloc[:, 0] From 44645d73f372bd43cb63bb952dead3073a03aaf3 Mon Sep 17 00:00:00 2001 From: usersblock Date: Sat, 24 Jul 2021 01:37:31 -0400 Subject: [PATCH 07/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index c0b782354c6f2..3a67b8b128050 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2535,8 +2535,8 @@ def test_empty_string_datetime_coerce__unit(): @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_monotonic_increasing_index(cache): # GH28238 - times = pd.date_range(pd.Timestamp("1980"), periods=50, freq="YS") - times = times.to_frame(index=False, name="DT").sample(50) + times = date_range(Timestamp("1980"), periods=50, freq="YS") + times = times.to_frame(index=False, name="DT").sample(n=50, random_state=1) times.index = times.index.to_series().astype(float) / 1000 result = to_datetime(times.iloc[:, 0], cache=cache) expected = times.iloc[:, 0] From e3cb80c298f3c2bd2988532bbac6a53bf0df493f Mon Sep 17 00:00:00 2001 From: usersblock Date: Sat, 24 Jul 2021 15:07:24 -0400 Subject: [PATCH 08/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 3a67b8b128050..7078661b4568a 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2535,8 +2535,9 @@ def test_empty_string_datetime_coerce__unit(): @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_monotonic_increasing_index(cache): # GH28238 + cstart = start_caching_at times = date_range(Timestamp("1980"), periods=50, freq="YS") - times = times.to_frame(index=False, name="DT").sample(n=50, random_state=1) + times = times.to_frame(index=False, name="DT").sample(n=cstart, random_state=1) times.index = times.index.to_series().astype(float) / 1000 result = to_datetime(times.iloc[:, 0], cache=cache) expected = times.iloc[:, 0] From 970ed06055ec703159a26af5895127d28f105bc3 Mon Sep 17 00:00:00 2001 From: usersblock Date: Sun, 25 Jul 2021 00:21:41 -0400 Subject: [PATCH 09/22] Update test_to_datetime.py --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7078661b4568a..7351f50aea8c1 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2536,7 +2536,7 @@ def test_empty_string_datetime_coerce__unit(): def test_to_datetime_monotonic_increasing_index(cache): # GH28238 cstart = start_caching_at - times = date_range(Timestamp("1980"), periods=50, freq="YS") + times = date_range(Timestamp("1980"), periods=cstart, freq="YS") times = times.to_frame(index=False, name="DT").sample(n=cstart, random_state=1) times.index = times.index.to_series().astype(float) / 1000 result = to_datetime(times.iloc[:, 0], cache=cache) From 71be2093e1f71825c73638c99aec0ea047295fed Mon Sep 17 00:00:00 2001 From: usersblock Date: Sun, 15 Aug 2021 19:13:26 -0400 Subject: [PATCH 10/22] Updated Test and fixed nlargest --- pandas/core/algorithms.py | 9 +++++++-- pandas/tests/series/methods/test_nlargest.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4f9dd61b8e0da..da7184297fb56 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -72,6 +72,7 @@ na_value_for_dtype, ) +import pandas as pd from pandas.core.array_algos.take import take_nd from pandas.core.construction import ( array as pd_array, @@ -1256,6 +1257,7 @@ def compute(self, method: str) -> Series: return self.obj[[]] dropped = self.obj.dropna() + nan_index = self.obj.drop(dropped.index) if is_extension_array_dtype(dropped.dtype): # GH#41816 bc we have dropped NAs above, MaskedArrays can use the @@ -1272,7 +1274,7 @@ def compute(self, method: str) -> Series: # slow method if n >= len(self.obj): ascending = method == "nsmallest" - return dropped.sort_values(ascending=ascending).head(n) + return self.obj.sort_values(ascending=ascending).head(n) # fast method new_dtype = dropped.dtype @@ -1290,6 +1292,8 @@ def compute(self, method: str) -> Series: if self.keep == "last": arr = arr[::-1] + nbase = n + findex = len(self.obj) narr = len(arr) n = min(n, narr) @@ -1301,12 +1305,13 @@ def compute(self, method: str) -> Series: if self.keep != "all": inds = inds[:n] + findex = nbase if self.keep == "last": # reverse indices inds = narr - 1 - inds - return dropped.iloc[inds] + return pd.concat([dropped.iloc[inds],nan_index])[:findex] class SelectNFrame(SelectN): diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 0efb0663a0327..739fd772c48f1 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -127,8 +127,8 @@ def test_nsmallest_nlargest(self, s_main_dtypes_split): def test_nlargest_misc(self): ser = Series([3.0, np.nan, 1, 2, 5]) - tm.assert_series_equal(ser.nlargest(), ser.iloc[[4, 0, 3, 2]]) - tm.assert_series_equal(ser.nsmallest(), ser.iloc[[2, 3, 0, 4]]) + tm.assert_series_equal(ser.nlargest(), ser.iloc[[4, 0, 3, 2, 1]]) + tm.assert_series_equal(ser.nsmallest(), ser.iloc[[2, 3, 0, 4, 1]]) msg = 'keep must be either "first", "last"' with pytest.raises(ValueError, match=msg): From c5c751c4e521756b0ad454e7c9b4b211a65ba3d8 Mon Sep 17 00:00:00 2001 From: usersblock Date: Sun, 15 Aug 2021 20:11:42 -0400 Subject: [PATCH 11/22] Update algorithms.py --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index da7184297fb56..bbcd1edfea1c1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1311,7 +1311,7 @@ def compute(self, method: str) -> Series: # reverse indices inds = narr - 1 - inds - return pd.concat([dropped.iloc[inds],nan_index])[:findex] + return pd.concat([dropped.iloc[inds], nan_index])[:findex] class SelectNFrame(SelectN): From a00fb9e0e77cfebce8bba929233858570d900a86 Mon Sep 17 00:00:00 2001 From: usersblock Date: Sun, 15 Aug 2021 23:56:15 -0400 Subject: [PATCH 12/22] Update algorithms.py --- pandas/core/algorithms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index bbcd1edfea1c1..40e08fd6b691b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -72,7 +72,6 @@ na_value_for_dtype, ) -import pandas as pd from pandas.core.array_algos.take import take_nd from pandas.core.construction import ( array as pd_array, @@ -1248,6 +1247,7 @@ class SelectNSeries(SelectN): def compute(self, method: str) -> Series: + from pandas.core.reshape.concat import concat n = self.n dtype = self.obj.dtype if not self.is_valid_dtype_n_method(dtype): @@ -1311,7 +1311,7 @@ def compute(self, method: str) -> Series: # reverse indices inds = narr - 1 - inds - return pd.concat([dropped.iloc[inds], nan_index])[:findex] + return concat([dropped.iloc[inds], nan_index])[:findex] class SelectNFrame(SelectN): From 6e5c7b09d82d77626963505bb67cc56f3202e32f Mon Sep 17 00:00:00 2001 From: usersblock Date: Mon, 16 Aug 2021 00:10:28 -0400 Subject: [PATCH 13/22] Update algorithms.py --- pandas/core/algorithms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 40e08fd6b691b..0765ef0acf7b8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1248,6 +1248,7 @@ class SelectNSeries(SelectN): def compute(self, method: str) -> Series: from pandas.core.reshape.concat import concat + n = self.n dtype = self.obj.dtype if not self.is_valid_dtype_n_method(dtype): From a1a9d06abbb8a11baa48dd236c8e94fa75fe3d20 Mon Sep 17 00:00:00 2001 From: usersblock Date: Mon, 16 Aug 2021 20:44:16 -0400 Subject: [PATCH 14/22] Update test_nlargest.py --- pandas/tests/frame/methods/test_nlargest.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 4ce474230b686..a79bef38b6ffe 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -209,3 +209,17 @@ def test_nlargest_multiindex_column_lookup(self): result = df.nlargest(3, ("x", "b")) expected = df.iloc[[3, 2, 1]] tm.assert_frame_equal(result, expected) + + def test_nlargest_nan_column(self): + # GH#43060 + df = pd.DataFrame( + { + 'grp': [1, 1, 2, 2], + 'y': [1, 0, 2, 5], + 'z': [1, 2, np.nan, np.nan] + } + ) + dfgrp = df.groupby('grp') + result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, 'z')) + expected = dfgrp.apply(lambda x: x.sort_values('z', ascending=False).head(1)) + tm.assert_frame_equal(result, expected) From 491d077682199e5d93d30b77275df2f8be66fcd3 Mon Sep 17 00:00:00 2001 From: usersblock Date: Mon, 16 Aug 2021 20:56:47 -0400 Subject: [PATCH 15/22] Update test_nlargest.py --- pandas/tests/frame/methods/test_nlargest.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index a79bef38b6ffe..3d73a3974eaaa 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -213,11 +213,7 @@ def test_nlargest_multiindex_column_lookup(self): def test_nlargest_nan_column(self): # GH#43060 df = pd.DataFrame( - { - 'grp': [1, 1, 2, 2], - 'y': [1, 0, 2, 5], - 'z': [1, 2, np.nan, np.nan] - } + {'grp': [1, 1, 2, 2], 'y': [1, 0, 2, 5], 'z': [1, 2, np.nan, np.nan]} ) dfgrp = df.groupby('grp') result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, 'z')) From 8708cc036518074349d92412860542e618cd13e5 Mon Sep 17 00:00:00 2001 From: usersblock Date: Mon, 16 Aug 2021 21:07:35 -0400 Subject: [PATCH 16/22] Update test_nlargest.py --- pandas/tests/frame/methods/test_nlargest.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 3d73a3974eaaa..12be924bbef44 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -213,9 +213,9 @@ def test_nlargest_multiindex_column_lookup(self): def test_nlargest_nan_column(self): # GH#43060 df = pd.DataFrame( - {'grp': [1, 1, 2, 2], 'y': [1, 0, 2, 5], 'z': [1, 2, np.nan, np.nan]} + {"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]} ) - dfgrp = df.groupby('grp') - result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, 'z')) - expected = dfgrp.apply(lambda x: x.sort_values('z', ascending=False).head(1)) + dfgrp = df.groupby("grp") + result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z")) + expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1)) tm.assert_frame_equal(result, expected) From a5cb642fc4850bb783193dd022c8b91dc30b4420 Mon Sep 17 00:00:00 2001 From: usersblock Date: Wed, 25 Aug 2021 04:20:58 -0400 Subject: [PATCH 17/22] Update test_nlargest.py --- pandas/tests/frame/methods/test_nlargest.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 12be924bbef44..1b2db80d782ce 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -210,12 +210,9 @@ def test_nlargest_multiindex_column_lookup(self): expected = df.iloc[[3, 2, 1]] tm.assert_frame_equal(result, expected) - def test_nlargest_nan_column(self): + def test_nlargest_nan(self): # GH#43060 - df = pd.DataFrame( - {"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]} - ) - dfgrp = df.groupby("grp") - result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z")) - expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1)) + df = pd.DataFrame([np.nan, np.nan, 0, 1, 2, 3]) + result = df.nlargest(5, 0) + expected = df.sort_values(0, ascending=False).head(5) tm.assert_frame_equal(result, expected) From 7d2aac441be75cdbff804661e187efd6cac4b9fc Mon Sep 17 00:00:00 2001 From: usersblock Date: Fri, 27 Aug 2021 03:50:10 -0400 Subject: [PATCH 18/22] Updated test_nlargest and docs --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/series/methods/test_nlargest.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index f0af60f80edd5..90d71434bc39f 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -288,7 +288,7 @@ Indexing - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`) - Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`) - Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`) -- +- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` Missing ^^^^^^^ diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 739fd772c48f1..8b0779c44bf4c 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -127,8 +127,12 @@ def test_nsmallest_nlargest(self, s_main_dtypes_split): def test_nlargest_misc(self): ser = Series([3.0, np.nan, 1, 2, 5]) - tm.assert_series_equal(ser.nlargest(), ser.iloc[[4, 0, 3, 2, 1]]) - tm.assert_series_equal(ser.nsmallest(), ser.iloc[[2, 3, 0, 4, 1]]) + result = ser.nlargest() + expected = ser.iloc[[4, 0, 3, 2, 1]] + tm.assert_series_equal(result, expected) + result = ser.nsmallest() + expected = ser.iloc[[2, 3, 0, 4, 1]] + tm.assert_series_equal(result, expected) msg = 'keep must be either "first", "last"' with pytest.raises(ValueError, match=msg): From 2d0931f15fe43455e1484aa13104d95fa84c5151 Mon Sep 17 00:00:00 2001 From: usersblock Date: Fri, 27 Aug 2021 13:25:14 -0400 Subject: [PATCH 19/22] Update v1.4.0.rst --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 90d71434bc39f..b1752a506c486 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -288,7 +288,7 @@ Indexing - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`) - Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`) - Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`) -- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` +- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`) Missing ^^^^^^^ From ca3af2477911c5cb59af43dde601229a3f616357 Mon Sep 17 00:00:00 2001 From: usersblock Date: Fri, 3 Sep 2021 03:24:28 -0400 Subject: [PATCH 20/22] Update test_apply.py --- pandas/tests/groupby/test_apply.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 25529e65118c8..f0767cb566e29 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1150,3 +1150,15 @@ def test_doctest_example2(): {"B": [1.0, 0.0], "C": [2.0, 0.0]}, index=Index(["a", "b"], name="A") ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_apply_na(dropna): + # GH#28984 + df = pd.DataFrame( + {"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]} + ) + dfgrp = df.groupby("grp", dropna=dropna) + result = dfgrp.apply(lambda grp_df: grp_df.nlargest(1, "z")) + expected = dfgrp.apply(lambda x: x.sort_values("z", ascending=False).head(1)) + tm.assert_frame_equal(result, expected) From 80d4fcbcac638f5ce3aab0594832141c140fe841 Mon Sep 17 00:00:00 2001 From: usersblock Date: Fri, 3 Sep 2021 03:54:43 -0400 Subject: [PATCH 21/22] Update test_apply.py --- pandas/tests/groupby/test_apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index f0767cb566e29..0330bc0203185 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1155,7 +1155,7 @@ def test_doctest_example2(): @pytest.mark.parametrize("dropna", [True, False]) def test_apply_na(dropna): # GH#28984 - df = pd.DataFrame( + df = DataFrame( {"grp": [1, 1, 2, 2], "y": [1, 0, 2, 5], "z": [1, 2, np.nan, np.nan]} ) dfgrp = df.groupby("grp", dropna=dropna) From 5717466e2546f3ae3ccf27a42c7ae89db3b4a067 Mon Sep 17 00:00:00 2001 From: usersblock Date: Tue, 7 Sep 2021 03:41:37 -0400 Subject: [PATCH 22/22] Update algorithms.py --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 0765ef0acf7b8..0e986aa6afb5d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1312,7 +1312,7 @@ def compute(self, method: str) -> Series: # reverse indices inds = narr - 1 - inds - return concat([dropped.iloc[inds], nan_index])[:findex] + return concat([dropped.iloc[inds], nan_index]).iloc[:findex] class SelectNFrame(SelectN):