diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 79f78471922bc..d8e600f2b64e0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -748,10 +748,13 @@ Reshaping - Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`) - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) - Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) +- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`, + :meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`, + :meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame.asof()` not + returning subclassed types. (:issue:`31331`) - Bug in :func:`concat` was not allowing for concatenation of ``DataFrame`` and ``Series`` with duplicate keys (:issue:`33654`) - Bug in :func:`cut` raised an error when non-unique labels (:issue:`33141`) - Sparse ^^^^^^ - Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`) diff --git a/pandas/conftest.py b/pandas/conftest.py index 11bb16fc0a3a9..1e7f1b769c856 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -709,6 +709,17 @@ def all_boolean_reductions(request): return request.param +_all_reductions = _all_numeric_reductions + _all_boolean_reductions + + +@pytest.fixture(params=_all_reductions) +def all_reductions(request): + """ + Fixture for all (boolean + numeric) reduction names. + """ + return request.param + + @pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) def all_compare_operators(request): """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 540524df44632..38be67188a5bb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1173,13 +1173,13 @@ def dot(self, other): np.dot(lvals, rvals), index=left.index, columns=other.columns ) elif isinstance(other, Series): - return Series(np.dot(lvals, rvals), index=left.index) + return self._constructor_sliced(np.dot(lvals, rvals), index=left.index) elif isinstance(rvals, (np.ndarray, Index)): result = np.dot(lvals, rvals) if result.ndim == 2: return self._constructor(result, index=left.index) else: - return Series(result, index=left.index) + return self._constructor_sliced(result, index=left.index) else: # pragma: no cover raise TypeError(f"unsupported type: {type(other)}") @@ -2533,14 +2533,14 @@ def memory_usage(self, index=True, deep=False) -> Series: >>> df['object'].astype('category').memory_usage(deep=True) 5216 """ - result = Series( + result = self._constructor_sliced( [c.memory_usage(index=False, deep=deep) for col, c in self.items()], index=self.columns, ) if index: - result = Series(self.index.memory_usage(deep=deep), index=["Index"]).append( - result - ) + result = self._constructor_sliced( + self.index.memory_usage(deep=deep), index=["Index"] + ).append(result) return result def transpose(self, *args, copy: bool = False) -> "DataFrame": @@ -5013,7 +5013,7 @@ def duplicated( from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT if self.empty: - return Series(dtype=bool) + return self._constructor_sliced(dtype=bool) def f(vals): labels, shape = algorithms.factorize( @@ -5045,7 +5045,7 @@ def f(vals): labels, shape = map(list, zip(*map(f, vals))) ids = get_group_index(labels, shape, sort=False, xnull=False) - return Series(duplicated_int64(ids, keep), index=self.index) + return self._constructor_sliced(duplicated_int64(ids, keep), index=self.index) # ---------------------------------------------------------------------- # Sorting @@ -8084,7 +8084,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series: def c(x): return nanops.nancorr(x[0], x[1], method=method) - correl = Series( + correl = self._constructor_sliced( map(c, zip(left.values.T, right.values.T)), index=left.columns ) @@ -8197,7 +8197,7 @@ def count(self, axis=0, level=None, numeric_only=False): # GH #423 if len(frame._get_axis(axis)) == 0: - result = Series(0, index=frame._get_agg_axis(axis)) + result = self._constructor_sliced(0, index=frame._get_agg_axis(axis)) else: if frame._is_mixed_type or frame._mgr.any_extension_types: # the or any_extension_types is really only hit for single- @@ -8207,7 +8207,9 @@ def count(self, axis=0, level=None, numeric_only=False): # GH13407 series_counts = notna(frame).sum(axis=axis) counts = series_counts.values - result = Series(counts, index=frame._get_agg_axis(axis)) + result = self._constructor_sliced( + counts, index=frame._get_agg_axis(axis) + ) return result.astype("int64") @@ -8250,9 +8252,9 @@ def _count_level(self, level, axis=0, numeric_only=False): counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis) if axis == 1: - result = DataFrame(counts, index=agg_axis, columns=level_index) + result = self._constructor(counts, index=agg_axis, columns=level_index) else: - result = DataFrame(counts, index=level_index, columns=agg_axis) + result = self._constructor(counts, index=level_index, columns=agg_axis) return result @@ -8523,7 +8525,7 @@ def idxmin(self, axis=0, skipna=True) -> Series: index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] - return Series(result, index=self._get_agg_axis(axis)) + return self._constructor_sliced(result, index=self._get_agg_axis(axis)) def idxmax(self, axis=0, skipna=True) -> Series: """ @@ -8596,7 +8598,7 @@ def idxmax(self, axis=0, skipna=True) -> Series: index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] - return Series(result, index=self._get_agg_axis(axis)) + return self._constructor_sliced(result, index=self._get_agg_axis(axis)) def _get_agg_axis(self, axis_num: int) -> Index: """ @@ -8940,7 +8942,7 @@ def isin(self, values) -> "DataFrame": "to be passed to DataFrame.isin(), " f"you passed a '{type(values).__name__}'" ) - return DataFrame( + return self._constructor( algorithms.isin(self.values.ravel(), values).reshape(self.shape), self.index, self.columns, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b550857252466..c3ac86e450428 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7067,9 +7067,9 @@ def asof(self, where, subset=None): if where < start: if not is_series: - from pandas import Series - - return Series(index=self.columns, name=where, dtype=np.float64) + return self._constructor_sliced( + index=self.columns, name=where, dtype=np.float64 + ) return np.nan # It's always much faster to use a *while* loop here for @@ -7096,13 +7096,11 @@ def asof(self, where, subset=None): if is_series: return self._constructor(np.nan, index=where, name=self.name) elif is_list: - from pandas import DataFrame - - return DataFrame(np.nan, index=where, columns=self.columns) + return self._constructor(np.nan, index=where, columns=self.columns) else: - from pandas import Series - - return Series(np.nan, index=self.columns, name=where[0]) + return self._constructor_sliced( + np.nan, index=self.columns, name=where[0] + ) locs = self.index.asof_locs(where, ~(nulls._values)) diff --git a/pandas/core/series.py b/pandas/core/series.py index b236825ac4dc8..73c6103b99bf9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3625,7 +3625,9 @@ def explode(self) -> "Series": values, counts = reshape.explode(np.asarray(self.array)) - result = Series(values, index=self.index.repeat(counts), name=self.name) + result = self._constructor( + values, index=self.index.repeat(counts), name=self.name + ) return result def unstack(self, level=-1, fill_value=None): diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 16bf651829a04..72253f7780a71 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm @@ -560,16 +562,123 @@ def strech(row): assert not isinstance(result, tm.SubclassedDataFrame) tm.assert_series_equal(result, expected) - def test_subclassed_numeric_reductions(self, all_numeric_reductions): + def test_subclassed_reductions(self, all_reductions): # GH 25596 df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - result = getattr(df, all_numeric_reductions)() + result = getattr(df, all_reductions)() assert isinstance(result, tm.SubclassedSeries) - def test_subclassed_boolean_reductions(self, all_boolean_reductions): - # GH 25596 + def test_subclassed_count(self): + + df = tm.SubclassedDataFrame( + { + "Person": ["John", "Myla", "Lewis", "John", "Myla"], + "Age": [24.0, np.nan, 21.0, 33, 26], + "Single": [False, True, True, True, False], + } + ) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]}) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + result = df.count(level=1) + assert isinstance(result, tm.SubclassedDataFrame) + + df = tm.SubclassedDataFrame() + result = df.count() + assert isinstance(result, tm.SubclassedSeries) + + def test_isin(self): + + df = tm.SubclassedDataFrame( + {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"] + ) + result = df.isin([0, 2]) + assert isinstance(result, tm.SubclassedDataFrame) + + def test_duplicated(self): df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) - result = getattr(df, all_boolean_reductions)() + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame() + result = df.duplicated() + assert isinstance(result, tm.SubclassedSeries) + + @pytest.mark.parametrize("idx_method", ["idxmax", "idxmin"]) + def test_idx(self, idx_method): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = getattr(df, idx_method)() + assert isinstance(result, tm.SubclassedSeries) + + def test_dot(self): + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedSeries([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedSeries) + + df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + s = tm.SubclassedDataFrame([1, 1, 2, 1]) + result = df.dot(s) + assert isinstance(result, tm.SubclassedDataFrame) + + def test_memory_usage(self): + + df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) + result = df.memory_usage() + assert isinstance(result, tm.SubclassedSeries) + + result = df.memory_usage(index=False) + assert isinstance(result, tm.SubclassedSeries) + + @td.skip_if_no_scipy + def test_corrwith(self): + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = tm.SubclassedDataFrame( + np.random.randn(5, 4), index=index, columns=columns + ) + df2 = tm.SubclassedDataFrame( + np.random.randn(4, 4), index=index[:4], columns=columns + ) + correls = df1.corrwith(df2, axis=1, drop=True, method="kendall") + + assert isinstance(correls, (tm.SubclassedSeries)) + + def test_asof(self): + + N = 3 + rng = pd.date_range("1/1/1990", periods=N, freq="53s") + df = tm.SubclassedDataFrame( + { + "A": [np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan], + "C": [np.nan, np.nan, np.nan], + }, + index=rng, + ) + + result = df.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedDataFrame) + + result = df.asof(rng[-2]) + assert isinstance(result, tm.SubclassedSeries) + + result = df.asof("1989-12-31") assert isinstance(result, tm.SubclassedSeries) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 73247bbf8b3d6..a596ed49c1df2 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -1,3 +1,6 @@ +import numpy as np + +import pandas as pd import pandas._testing as tm @@ -35,3 +38,16 @@ def test_subclass_empty_repr(self): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): sub_series = tm.SubclassedSeries() assert "SubclassedSeries" in repr(sub_series) + + def test_asof(self): + N = 3 + rng = pd.date_range("1/1/1990", periods=N, freq="53s") + s = tm.SubclassedSeries({"A": [np.nan, np.nan, np.nan]}, index=rng) + + result = s.asof(rng[-2:]) + assert isinstance(result, tm.SubclassedSeries) + + def test_explode(self): + s = tm.SubclassedSeries([[1, 2, 3], "foo", [], [3, 4]]) + result = s.explode() + assert isinstance(result, tm.SubclassedSeries)