From bdafa802e464b3a965e12c0bc4e449e33984c48b Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:24:51 -0700 Subject: [PATCH 01/19] Better subclassed type support in DataFrame.count() --- pandas/core/frame.py | 6 +++--- pandas/tests/frame/test_subclass.py | 32 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a5e134617b71a..c7bc1eb71a14e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7861,7 +7861,7 @@ def count(self, axis=0, level=None, numeric_only=False): # GH #423 if len(frame._get_axis(axis)) == 0: - result = Series(0, index=frame._get_agg_axis(axis)) + result = self._constructor_sliced(0, index=frame._get_agg_axis(axis)) else: if frame._is_mixed_type or frame._data.any_extension_types: # the or any_extension_types is really only hit for single- @@ -7871,7 +7871,7 @@ def count(self, axis=0, level=None, numeric_only=False): # GH13407 series_counts = notna(frame).sum(axis=axis) counts = series_counts.values - result = Series(counts, index=frame._get_agg_axis(axis)) + result = self._constructor_sliced(counts, index=frame._get_agg_axis(axis)) return result.astype("int64") @@ -7910,7 +7910,7 @@ def _count_level(self, level, axis=0, numeric_only=False): level_codes = ensure_int64(count_axis.codes[level]) counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0) - result = DataFrame(counts, index=level_index, columns=agg_axis) + result = self._constructor(counts, index=level_index, columns=agg_axis) if axis == 1: # Undo our earlier transpose diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index a2e7dc527c4b8..491e517ec4ff2 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -571,3 +571,35 @@ def test_subclassed_boolean_reductions(self, all_boolean_reductions): df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result = getattr(df, all_boolean_reductions)() assert isinstance(result, tm.SubclassedSeries) + + def test_subclassed_count(self): + + df = tm.SubclassedDataFrame( + { + "Person": ["John", "Myla", "Lewis", "John", "Myla"], + "Age": [24.0, np.nan, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]}) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + result = df.count(level=1) + assert isinstance(result, tm.SubclassedDataFrame) + + df = tm.SubclassedDataFrame() + result = df.count() + assert isinstance(result, tm.SubclassedSeries) From 43673b65b78e0e7553c4d3b09871144080ac22c5 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:26:03 -0700 Subject: [PATCH 02/19] Subclassed type support in DataFrame.count() --- pandas/core/frame.py | 2 +- pandas/tests/frame/test_subclass.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7bc1eb71a14e..4f1b6ab9edc56 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8506,7 +8506,7 @@ def isin(self, values) -> "DataFrame": "to be passed to DataFrame.isin(), " f"you passed a '{type(values).__name__}'" ) - return DataFrame( + return self._constructor( algorithms.isin(self.values.ravel(), values).reshape(self.shape), self.index, self.columns, diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 491e517ec4ff2..7d7e5eee775e4 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -603,3 +603,11 @@ def test_subclassed_count(self): df = tm.SubclassedDataFrame() result = df.count() assert isinstance(result, tm.SubclassedSeries) + + def test_isin(self): + + df = tm.SubclassedDataFrame( + {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"] + ) + result = df.isin([0, 2]) + assert isinstance(result, tm.SubclassedDataFrame) From 7f594b5dfa9f2fcb83a72d78bbdf35496d44436a Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:27:45 -0700 Subject: [PATCH 03/19] Subclassed type support in DataFrame.duplicated() --- pandas/core/frame.py | 4 ++-- pandas/tests/frame/test_subclass.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4f1b6ab9edc56..492e2be2a1931 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4883,7 +4883,7 @@ def duplicated( from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT if self.empty: - return Series(dtype=bool) + return self._constructor_sliced(dtype=bool) def f(vals): labels, shape = algorithms.factorize( @@ -4915,7 +4915,7 @@ def f(vals): labels, shape = map(list, zip(*map(f, vals))) ids = get_group_index(labels, shape, sort=False, xnull=False) - return Series(duplicated_int64(ids, keep), index=self.index) + return self._constructor_sliced(duplicated_int64(ids, keep), index=self.index) # ---------------------------------------------------------------------- # Sorting diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 7d7e5eee775e4..a130e82df9997 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -611,3 +611,13 @@ def test_isin(self): ) result = df.isin([0, 2]) assert isinstance(result, tm.SubclassedDataFrame) + + def test_duplicated(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame() + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file From a278d38002a4c1310ffccb3b56f2531131b78fa3 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:29:00 -0700 Subject: [PATCH 04/19] Subclassed type support in DataFrame.idxmin() --- pandas/core/frame.py | 2 +- pandas/tests/frame/test_subclass.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 492e2be2a1931..05b3eaabf52da 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8110,7 +8110,7 @@ def idxmin(self, axis=0, skipna=True) -> Series: indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] - return Series(result, index=self._get_agg_axis(axis)) + return self._constructor_sliced(result, index=self._get_agg_axis(axis)) def idxmax(self, axis=0, skipna=True) -> Series: """ diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index a130e82df9997..4e53d370ddc87 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -620,4 +620,10 @@ def test_duplicated(self): df = tm.SubclassedDataFrame() result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + def test_idxmin(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.idxmin() assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file From b9cf60a2cee2298f256a830f9730147ba387f9bf Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:29:46 -0700 Subject: [PATCH 05/19] Subclassed type support in DataFrame.idxmax() --- pandas/core/frame.py | 2 +- pandas/tests/frame/test_subclass.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 05b3eaabf52da..394e905004fc9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8148,7 +8148,7 @@ def idxmax(self, axis=0, skipna=True) -> Series: indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] - return Series(result, index=self._get_agg_axis(axis)) + return self._constructor_sliced(result, index=self._get_agg_axis(axis)) def _get_agg_axis(self, axis_num): """ diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 4e53d370ddc87..07081f62e98d7 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -626,4 +626,10 @@ def test_idxmin(self): df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result = df.idxmin() + assert isinstance(result, tm.SubclassedSeries) + + def test_idxmax(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.idxmax() assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file From 99742a022c490a649c092b1ac7c6d94a02d04e8f Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:31:34 -0700 Subject: [PATCH 06/19] Subclassed type support in DataFrame.dot() --- pandas/core/frame.py | 4 ++-- pandas/tests/frame/test_subclass.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 394e905004fc9..937e1fe7830b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1144,13 +1144,13 @@ def dot(self, other): np.dot(lvals, rvals), index=left.index, columns=other.columns ) elif isinstance(other, Series): - return Series(np.dot(lvals, rvals), index=left.index) + return self._constructor_sliced(np.dot(lvals, rvals), index=left.index) elif isinstance(rvals, (np.ndarray, Index)): result = np.dot(lvals, rvals) if result.ndim == 2: return self._constructor(result, index=left.index) else: - return Series(result, index=left.index) + return self._constructor_sliced(result, index=left.index) else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 07081f62e98d7..d56a4bd258885 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -632,4 +632,16 @@ def test_idxmax(self): df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result = df.idxmax() - assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file + assert isinstance(result, tm.SubclassedSeries) + + def test_dot(self): + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedSeries([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedDataFrame([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedDataFrame) \ No newline at end of file From b1172761c822f719f48ab4fb852a6e70bd153ab5 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:35:00 -0700 Subject: [PATCH 07/19] Subclassed type support in DataFrame.memory_usage() --- pandas/core/frame.py | 4 ++-- pandas/tests/frame/test_subclass.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 937e1fe7830b6..0b32be972e6e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2585,12 +2585,12 @@ def memory_usage(self, index=True, deep=False) -> Series: >>> df['object'].astype('category').memory_usage(deep=True) 5216 """ - result = Series( + result = self._constructor_sliced( [c.memory_usage(index=False, deep=deep) for col, c in self.items()], index=self.columns, ) if index: - result = Series(self.index.memory_usage(deep=deep), index=["Index"]).append( + result = self._constructor_sliced(self.index.memory_usage(deep=deep), index=["Index"]).append( result ) return result diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index d56a4bd258885..88a5ef84cf927 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -644,4 +644,13 @@ def test_dot(self): df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) s = tm.SubclassedDataFrame([1, 1, 2, 1]) result = df.dot(s) - assert isinstance(result, tm.SubclassedDataFrame) \ No newline at end of file + assert isinstance(result, tm.SubclassedDataFrame) + + def test_memory_usage(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.memory_usage() + assert isinstance(result, tm.SubclassedSeries) + + result = df.memory_usage(index=False) + assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file From 20f9574958f38415ba9ee1afebaabe9372d28534 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:37:31 -0700 Subject: [PATCH 08/19] Subclassed type support in DataFrame.corrwith() --- pandas/core/frame.py | 2 +- pandas/tests/frame/test_subclass.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0b32be972e6e3..26c937d0eae53 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7748,7 +7748,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series: def c(x): return nanops.nancorr(x[0], x[1], method=method) - correl = Series( + correl = self._constructor_sliced( map(c, zip(left.values.T, right.values.T)), index=left.columns ) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 88a5ef84cf927..c8bde144919b0 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -653,4 +653,17 @@ def test_memory_usage(self): assert isinstance(result, tm.SubclassedSeries) result = df.memory_usage(index=False) - assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file + assert isinstance(result, tm.SubclassedSeries) + + def test_corrwith(self): + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = tm.SubclassedDataFrame( + np.random.randn(5, 4), index=index, columns=columns + ) + df2 = tm.SubclassedDataFrame( + np.random.randn(4, 4), index=index[:4], columns=columns + ) + correls = df1.corrwith(df2, axis=1, drop=True, method="kendall") + + assert isinstance(correls, (tm.SubclassedSeries)) \ No newline at end of file From f0eaaa2c20e58ae2ade80cbf4561657c2190fd1e Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 12:41:55 -0700 Subject: [PATCH 09/19] Better subclassed type support in DataFrame.asof() --- pandas/core/generic.py | 12 +++--------- pandas/tests/frame/test_subclass.py | 24 +++++++++++++++++++++++- pandas/tests/series/test_subclass.py | 11 ++++++++++- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bfeaf8bca48e9..35d6be3022654 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6962,9 +6962,7 @@ def asof(self, where, subset=None): if where < start: if not is_series: - from pandas import Series - - return Series(index=self.columns, name=where, dtype=np.float64) + return self._constructor_sliced(index=self.columns, name=where, dtype=np.float64) return np.nan # It's always much faster to use a *while* loop here for @@ -6991,13 +6989,9 @@ def asof(self, where, subset=None): if is_series: return self._constructor(np.nan, index=where, name=self.name) elif is_list: - from pandas import DataFrame - - return DataFrame(np.nan, index=where, columns=self.columns) + return self._constructor(np.nan, index=where, columns=self.columns) else: - from pandas import Series - - return Series(np.nan, index=self.columns, name=where[0]) + return self._constructor_sliced(np.nan, index=self.columns, name=where[0]) locs = self.index.asof_locs(where, ~(nulls.values)) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index c8bde144919b0..4cee868892d6e 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -666,4 +666,26 @@ def test_corrwith(self): ) correls = df1.corrwith(df2, axis=1, drop=True, method="kendall") - assert isinstance(correls, (tm.SubclassedSeries)) \ No newline at end of file + assert isinstance(correls, (tm.SubclassedSeries)) + + def test_asof(self): + + N = 3 + rng = pd.date_range("1/1/1990", periods=N, freq="53s") + df = tm.SubclassedDataFrame( + { + "A": [np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan], + "C": [np.nan, np.nan, np.nan], + }, + index=rng, + ) + + result = df.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedDataFrame) + + result = df.asof(rng[-2]) + assert isinstance(result, tm.SubclassedSeries) + + result = df.asof("1989-12-31") + assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 73247bbf8b3d6..cc881c3b13e67 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -1,5 +1,6 @@ import pandas._testing as tm - +import pandas as pd +import numpy as np class TestSeriesSubclassing: def test_indexing_sliced(self): @@ -35,3 +36,11 @@ def test_subclass_empty_repr(self): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): sub_series = tm.SubclassedSeries() assert "SubclassedSeries" in repr(sub_series) + + def test_asof(self): + N = 3 + rng = pd.date_range("1/1/1990", periods=N, freq="53s") + s = tm.SubclassedSeries({"A": [np.nan, np.nan, np.nan]}, index=rng) + + result = s.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file From 0393c5db3a093a850faaeea7ad1b8bfc1d184863 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 13:23:00 -0700 Subject: [PATCH 10/19] Subclassed type support in Series.explode() --- pandas/core/series.py | 2 +- pandas/tests/series/test_subclass.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e79404ccd9521..b2e982b030696 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3471,7 +3471,7 @@ def explode(self) -> "Series": values, counts = reshape.explode(np.asarray(self.array)) - result = Series(values, index=self.index.repeat(counts), name=self.name) + result = self._constructor(values, index=self.index.repeat(counts), name=self.name) return result def unstack(self, level=-1, fill_value=None): diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index cc881c3b13e67..9c64e38cbafd5 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -43,4 +43,9 @@ def test_asof(self): s = tm.SubclassedSeries({"A": [np.nan, np.nan, np.nan]}, index=rng) result = s.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedSeries) + + def test_explode(self): + s = tm.SubclassedSeries([[1, 2, 3], "foo", [], [3, 4]]) + result = s.explode() assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file From 40f1ba78c1764c8c78ce3c6a774867880e122b92 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 13:43:02 -0700 Subject: [PATCH 11/19] black formatting --- pandas/core/frame.py | 10 ++++++---- pandas/core/generic.py | 8 ++++++-- pandas/core/series.py | 4 +++- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/series/test_subclass.py | 3 ++- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 26c937d0eae53..1bc2c11132a0c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2590,9 +2590,9 @@ def memory_usage(self, index=True, deep=False) -> Series: index=self.columns, ) if index: - result = self._constructor_sliced(self.index.memory_usage(deep=deep), index=["Index"]).append( - result - ) + result = self._constructor_sliced( + self.index.memory_usage(deep=deep), index=["Index"] + ).append(result) return result def transpose(self, *args, copy: bool = False) -> "DataFrame": @@ -7871,7 +7871,9 @@ def count(self, axis=0, level=None, numeric_only=False): # GH13407 series_counts = notna(frame).sum(axis=axis) counts = series_counts.values - result = self._constructor_sliced(counts, index=frame._get_agg_axis(axis)) + result = self._constructor_sliced( + counts, index=frame._get_agg_axis(axis) + ) return result.astype("int64") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35d6be3022654..8cfd5aa249355 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6962,7 +6962,9 @@ def asof(self, where, subset=None): if where < start: if not is_series: - return self._constructor_sliced(index=self.columns, name=where, dtype=np.float64) + return self._constructor_sliced( + index=self.columns, name=where, dtype=np.float64 + ) return np.nan # It's always much faster to use a *while* loop here for @@ -6991,7 +6993,9 @@ def asof(self, where, subset=None): elif is_list: return self._constructor(np.nan, index=where, columns=self.columns) else: - return self._constructor_sliced(np.nan, index=self.columns, name=where[0]) + return self._constructor_sliced( + np.nan, index=self.columns, name=where[0] + ) locs = self.index.asof_locs(where, ~(nulls.values)) diff --git a/pandas/core/series.py b/pandas/core/series.py index b2e982b030696..9dcc6ab9a125e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3471,7 +3471,9 @@ def explode(self) -> "Series": values, counts = reshape.explode(np.asarray(self.array)) - result = self._constructor(values, index=self.index.repeat(counts), name=self.name) + result = self._constructor( + values, index=self.index.repeat(counts), name=self.name + ) return result def unstack(self, level=-1, fill_value=None): diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 4cee868892d6e..1e1df43eb0990 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -688,4 +688,4 @@ def test_asof(self): assert isinstance(result, tm.SubclassedSeries) result = df.asof("1989-12-31") - assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file + assert isinstance(result, tm.SubclassedSeries) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 9c64e38cbafd5..0e51984da82b7 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -2,6 +2,7 @@ import pandas as pd import numpy as np + class TestSeriesSubclassing: def test_indexing_sliced(self): s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd")) @@ -48,4 +49,4 @@ def test_asof(self): def test_explode(self): s = tm.SubclassedSeries([[1, 2, 3], "foo", [], [3, 4]]) result = s.explode() - assert isinstance(result, tm.SubclassedSeries) \ No newline at end of file + assert isinstance(result, tm.SubclassedSeries) From 979f3ccc09c5a68255fc74779bed5aae7c062d08 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sun, 26 Jan 2020 13:43:11 -0700 Subject: [PATCH 12/19] Added What's New entry --- doc/source/whatsnew/v1.1.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 920919755dc23..859b95a9e5e9d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -190,6 +190,9 @@ Reshaping - Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) - Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`) +- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`, + :meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`, + :meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame.asof()` not returning subclassed types. Sparse ^^^^^^ From c926c9f2351e979b12287570bf2c672f45e2f109 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sat, 15 Feb 2020 08:48:17 -0700 Subject: [PATCH 13/19] Parameterized idx tests --- pandas/tests/frame/test_subclass.py | 11 +++-------- pandas/tests/series/test_subclass.py | 5 +++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 1e1df43eb0990..3e665027fab49 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -622,16 +622,11 @@ def test_duplicated(self): result = df.duplicated() assert isinstance(result, tm.SubclassedSeries) - def test_idxmin(self): + @pytest.mark.parametrize("idx_method", ["idxmax", "idxmin"]) + def test_idx(self, idx_method): df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - result = df.idxmin() - assert isinstance(result, tm.SubclassedSeries) - - def test_idxmax(self): - - df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - result = df.idxmax() + result = getattr(df, idx_method)() assert isinstance(result, tm.SubclassedSeries) def test_dot(self): diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 0e51984da82b7..a596ed49c1df2 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -1,7 +1,8 @@ -import pandas._testing as tm -import pandas as pd import numpy as np +import pandas as pd +import pandas._testing as tm + class TestSeriesSubclassing: def test_indexing_sliced(self): From 570dc6d76706acbf283f1b5ff676b4ee635e274b Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Fri, 17 Apr 2020 20:44:45 -0700 Subject: [PATCH 14/19] Merge master --- doc/source/whatsnew/v1.1.0.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a797090a83444..989308896af6c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -624,7 +624,9 @@ Reshaping - Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`) - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) - Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) - +- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`, + :meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`, + :meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame. Sparse ^^^^^^ From e853d8049fea16c882285f4303cfa28b96c04bcd Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sat, 18 Apr 2020 10:32:43 -0700 Subject: [PATCH 15/19] Added skip_if_no_scipy to failing test for three build systems failing on scipy import. --- pandas/tests/frame/test_subclass.py | 3 +++ web/pandas_web.py | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 905d756a60394..397e605fd85dd 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm @@ -652,6 +654,7 @@ def test_memory_usage(self): result = df.memory_usage(index=False) assert isinstance(result, tm.SubclassedSeries) + @td.skip_if_no_scipy def test_corrwith(self): index = ["a", "b", "c", "d", "e"] columns = ["one", "two", "three", "four"] diff --git a/web/pandas_web.py b/web/pandas_web.py index e62deaa8cdc7f..7dd63175e69ac 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -34,13 +34,12 @@ import time import typing +import feedparser import jinja2 +import markdown import requests import yaml -import feedparser -import markdown - class Preprocessors: """ From 4e60f49b2a2861ad736ad571859c432735dd3bac Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sat, 18 Apr 2020 10:34:17 -0700 Subject: [PATCH 16/19] Add docs --- doc/source/whatsnew/v1.1.0.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 989308896af6c..a797090a83444 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -624,9 +624,7 @@ Reshaping - Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`) - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) - Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) -- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`, - :meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`, - :meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame. + Sparse ^^^^^^ From 5794ffc5f74b18d0fbf59ef34c947496dcd06d4c Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sat, 18 Apr 2020 11:20:28 -0700 Subject: [PATCH 17/19] Removed an error in merging master. --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 746a263a69d3b..b42f793c48c54 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8097,7 +8097,6 @@ def _count_level(self, level, axis=0, numeric_only=False): level_index = count_axis.levels[level]._shallow_copy(name=level_name) level_codes = ensure_int64(count_axis.codes[level]) counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis) - counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0) if axis == 1: result = self._constructor(counts, index=agg_axis, columns=level_index) From 5072f9ea884eca7545077e508ff144162e91be38 Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sat, 18 Apr 2020 11:56:49 -0700 Subject: [PATCH 18/19] Final commit for review --- doc/source/whatsnew/v1.1.0.rst | 5 ++++- web/pandas_web.py | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a797090a83444..c48e8e10c1007 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -624,7 +624,10 @@ Reshaping - Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`) - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) - Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) - +- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`, + :meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`, + :meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame.asof()` not + returning subclassed types. (:issue:`31331`) Sparse ^^^^^^ diff --git a/web/pandas_web.py b/web/pandas_web.py index 7dd63175e69ac..e62deaa8cdc7f 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -34,12 +34,13 @@ import time import typing -import feedparser import jinja2 -import markdown import requests import yaml +import feedparser +import markdown + class Preprocessors: """ From a25da2e91e691655e242c15955d084259c2dff8e Mon Sep 17 00:00:00 2001 From: "emiliano.jordan" Date: Sat, 9 May 2020 12:33:50 -0700 Subject: [PATCH 19/19] created all_reductions fixture --- pandas/conftest.py | 11 +++++++++++ pandas/tests/frame/test_subclass.py | 11 ++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index e1088dae3925a..94136f0fb6614 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -705,6 +705,17 @@ def all_boolean_reductions(request): return request.param +_all_reductions = _all_numeric_reductions + _all_boolean_reductions + + +@pytest.fixture(params=_all_reductions) +def all_reductions(request): + """ + Fixture for all (boolean + numeric) reduction names. + """ + return request.param + + @pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) def all_compare_operators(request): """ diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 397e605fd85dd..72253f7780a71 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -562,18 +562,11 @@ def strech(row): assert not isinstance(result, tm.SubclassedDataFrame) tm.assert_series_equal(result, expected) - def test_subclassed_numeric_reductions(self, all_numeric_reductions): + def test_subclassed_reductions(self, all_reductions): # GH 25596 df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - result = getattr(df, all_numeric_reductions)() - assert isinstance(result, tm.SubclassedSeries) - - def test_subclassed_boolean_reductions(self, all_boolean_reductions): - # GH 25596 - - df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - result = getattr(df, all_boolean_reductions)() + result = getattr(df, all_reductions)() assert isinstance(result, tm.SubclassedSeries) def test_subclassed_count(self):