Skip to content

Commit 8f125f6

Browse files
author
Emiliano Jordan
authored
BUG: Increased support for subclassed types. (#31331)
1 parent a5db643 commit 8f125f6

File tree

7 files changed

+173
-32
lines changed

7 files changed

+173
-32
lines changed

doc/source/whatsnew/v1.1.0.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -781,11 +781,14 @@ Reshaping
781781
- Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`)
782782
- :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`)
783783
- Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`)
784+
- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`,
785+
:meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`,
786+
:meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame.asof()` not
787+
returning subclassed types. (:issue:`31331`)
784788
- Bug in :func:`concat` was not allowing for concatenation of ``DataFrame`` and ``Series`` with duplicate keys (:issue:`33654`)
785789
- Bug in :func:`cut` raised an error when non-unique labels (:issue:`33141`)
786790
- Bug in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`)
787791

788-
789792
Sparse
790793
^^^^^^
791794
- Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`)

pandas/conftest.py

+11
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,17 @@ def all_boolean_reductions(request):
709709
return request.param
710710

711711

712+
_all_reductions = _all_numeric_reductions + _all_boolean_reductions
713+
714+
715+
@pytest.fixture(params=_all_reductions)
716+
def all_reductions(request):
717+
"""
718+
Fixture for all (boolean + numeric) reduction names.
719+
"""
720+
return request.param
721+
722+
712723
@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"])
713724
def all_compare_operators(request):
714725
"""

pandas/core/frame.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -1173,13 +1173,13 @@ def dot(self, other):
11731173
np.dot(lvals, rvals), index=left.index, columns=other.columns
11741174
)
11751175
elif isinstance(other, Series):
1176-
return Series(np.dot(lvals, rvals), index=left.index)
1176+
return self._constructor_sliced(np.dot(lvals, rvals), index=left.index)
11771177
elif isinstance(rvals, (np.ndarray, Index)):
11781178
result = np.dot(lvals, rvals)
11791179
if result.ndim == 2:
11801180
return self._constructor(result, index=left.index)
11811181
else:
1182-
return Series(result, index=left.index)
1182+
return self._constructor_sliced(result, index=left.index)
11831183
else: # pragma: no cover
11841184
raise TypeError(f"unsupported type: {type(other)}")
11851185

@@ -2533,14 +2533,14 @@ def memory_usage(self, index=True, deep=False) -> Series:
25332533
>>> df['object'].astype('category').memory_usage(deep=True)
25342534
5216
25352535
"""
2536-
result = Series(
2536+
result = self._constructor_sliced(
25372537
[c.memory_usage(index=False, deep=deep) for col, c in self.items()],
25382538
index=self.columns,
25392539
)
25402540
if index:
2541-
result = Series(self.index.memory_usage(deep=deep), index=["Index"]).append(
2542-
result
2543-
)
2541+
result = self._constructor_sliced(
2542+
self.index.memory_usage(deep=deep), index=["Index"]
2543+
).append(result)
25442544
return result
25452545

25462546
def transpose(self, *args, copy: bool = False) -> "DataFrame":
@@ -5013,7 +5013,7 @@ def duplicated(
50135013
from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT
50145014

50155015
if self.empty:
5016-
return Series(dtype=bool)
5016+
return self._constructor_sliced(dtype=bool)
50175017

50185018
def f(vals):
50195019
labels, shape = algorithms.factorize(
@@ -5045,7 +5045,7 @@ def f(vals):
50455045
labels, shape = map(list, zip(*map(f, vals)))
50465046

50475047
ids = get_group_index(labels, shape, sort=False, xnull=False)
5048-
return Series(duplicated_int64(ids, keep), index=self.index)
5048+
return self._constructor_sliced(duplicated_int64(ids, keep), index=self.index)
50495049

50505050
# ----------------------------------------------------------------------
50515051
# Sorting
@@ -8121,7 +8121,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series:
81218121
def c(x):
81228122
return nanops.nancorr(x[0], x[1], method=method)
81238123

8124-
correl = Series(
8124+
correl = self._constructor_sliced(
81258125
map(c, zip(left.values.T, right.values.T)), index=left.columns
81268126
)
81278127

@@ -8234,7 +8234,7 @@ def count(self, axis=0, level=None, numeric_only=False):
82348234

82358235
# GH #423
82368236
if len(frame._get_axis(axis)) == 0:
8237-
result = Series(0, index=frame._get_agg_axis(axis))
8237+
result = self._constructor_sliced(0, index=frame._get_agg_axis(axis))
82388238
else:
82398239
if frame._is_mixed_type or frame._mgr.any_extension_types:
82408240
# the or any_extension_types is really only hit for single-
@@ -8244,7 +8244,9 @@ def count(self, axis=0, level=None, numeric_only=False):
82448244
# GH13407
82458245
series_counts = notna(frame).sum(axis=axis)
82468246
counts = series_counts.values
8247-
result = Series(counts, index=frame._get_agg_axis(axis))
8247+
result = self._constructor_sliced(
8248+
counts, index=frame._get_agg_axis(axis)
8249+
)
82488250

82498251
return result.astype("int64")
82508252

@@ -8287,9 +8289,9 @@ def _count_level(self, level, axis=0, numeric_only=False):
82878289
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis)
82888290

82898291
if axis == 1:
8290-
result = DataFrame(counts, index=agg_axis, columns=level_index)
8292+
result = self._constructor(counts, index=agg_axis, columns=level_index)
82918293
else:
8292-
result = DataFrame(counts, index=level_index, columns=agg_axis)
8294+
result = self._constructor(counts, index=level_index, columns=agg_axis)
82938295

82948296
return result
82958297

@@ -8560,7 +8562,7 @@ def idxmin(self, axis=0, skipna=True) -> Series:
85608562

85618563
index = self._get_axis(axis)
85628564
result = [index[i] if i >= 0 else np.nan for i in indices]
8563-
return Series(result, index=self._get_agg_axis(axis))
8565+
return self._constructor_sliced(result, index=self._get_agg_axis(axis))
85648566

85658567
def idxmax(self, axis=0, skipna=True) -> Series:
85668568
"""
@@ -8633,7 +8635,7 @@ def idxmax(self, axis=0, skipna=True) -> Series:
86338635

86348636
index = self._get_axis(axis)
86358637
result = [index[i] if i >= 0 else np.nan for i in indices]
8636-
return Series(result, index=self._get_agg_axis(axis))
8638+
return self._constructor_sliced(result, index=self._get_agg_axis(axis))
86378639

86388640
def _get_agg_axis(self, axis_num: int) -> Index:
86398641
"""
@@ -8977,7 +8979,7 @@ def isin(self, values) -> "DataFrame":
89778979
"to be passed to DataFrame.isin(), "
89788980
f"you passed a '{type(values).__name__}'"
89798981
)
8980-
return DataFrame(
8982+
return self._constructor(
89818983
algorithms.isin(self.values.ravel(), values).reshape(self.shape),
89828984
self.index,
89838985
self.columns,

pandas/core/generic.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -7067,9 +7067,9 @@ def asof(self, where, subset=None):
70677067

70687068
if where < start:
70697069
if not is_series:
7070-
from pandas import Series
7071-
7072-
return Series(index=self.columns, name=where, dtype=np.float64)
7070+
return self._constructor_sliced(
7071+
index=self.columns, name=where, dtype=np.float64
7072+
)
70737073
return np.nan
70747074

70757075
# It's always much faster to use a *while* loop here for
@@ -7096,13 +7096,11 @@ def asof(self, where, subset=None):
70967096
if is_series:
70977097
return self._constructor(np.nan, index=where, name=self.name)
70987098
elif is_list:
7099-
from pandas import DataFrame
7100-
7101-
return DataFrame(np.nan, index=where, columns=self.columns)
7099+
return self._constructor(np.nan, index=where, columns=self.columns)
71027100
else:
7103-
from pandas import Series
7104-
7105-
return Series(np.nan, index=self.columns, name=where[0])
7101+
return self._constructor_sliced(
7102+
np.nan, index=self.columns, name=where[0]
7103+
)
71067104

71077105
locs = self.index.asof_locs(where, ~(nulls._values))
71087106

pandas/core/series.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3655,7 +3655,9 @@ def explode(self) -> "Series":
36553655

36563656
values, counts = reshape.explode(np.asarray(self.array))
36573657

3658-
result = Series(values, index=self.index.repeat(counts), name=self.name)
3658+
result = self._constructor(
3659+
values, index=self.index.repeat(counts), name=self.name
3660+
)
36593661
return result
36603662

36613663
def unstack(self, level=-1, fill_value=None):

pandas/tests/frame/test_subclass.py

+114-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
import pandas as pd
57
from pandas import DataFrame, Index, MultiIndex, Series
68
import pandas._testing as tm
@@ -560,16 +562,123 @@ def strech(row):
560562
assert not isinstance(result, tm.SubclassedDataFrame)
561563
tm.assert_series_equal(result, expected)
562564

563-
def test_subclassed_numeric_reductions(self, all_numeric_reductions):
565+
def test_subclassed_reductions(self, all_reductions):
564566
# GH 25596
565567

566568
df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
567-
result = getattr(df, all_numeric_reductions)()
569+
result = getattr(df, all_reductions)()
568570
assert isinstance(result, tm.SubclassedSeries)
569571

570-
def test_subclassed_boolean_reductions(self, all_boolean_reductions):
571-
# GH 25596
572+
def test_subclassed_count(self):
573+
574+
df = tm.SubclassedDataFrame(
575+
{
576+
"Person": ["John", "Myla", "Lewis", "John", "Myla"],
577+
"Age": [24.0, np.nan, 21.0, 33, 26],
578+
"Single": [False, True, True, True, False],
579+
}
580+
)
581+
result = df.count()
582+
assert isinstance(result, tm.SubclassedSeries)
583+
584+
df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]})
585+
result = df.count()
586+
assert isinstance(result, tm.SubclassedSeries)
587+
588+
df = tm.SubclassedDataFrame(
589+
[[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
590+
index=MultiIndex.from_tuples(
591+
list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
592+
),
593+
columns=MultiIndex.from_tuples(
594+
list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
595+
),
596+
)
597+
result = df.count(level=1)
598+
assert isinstance(result, tm.SubclassedDataFrame)
599+
600+
df = tm.SubclassedDataFrame()
601+
result = df.count()
602+
assert isinstance(result, tm.SubclassedSeries)
603+
604+
def test_isin(self):
605+
606+
df = tm.SubclassedDataFrame(
607+
{"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"]
608+
)
609+
result = df.isin([0, 2])
610+
assert isinstance(result, tm.SubclassedDataFrame)
611+
612+
def test_duplicated(self):
572613

573614
df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
574-
result = getattr(df, all_boolean_reductions)()
615+
result = df.duplicated()
616+
assert isinstance(result, tm.SubclassedSeries)
617+
618+
df = tm.SubclassedDataFrame()
619+
result = df.duplicated()
620+
assert isinstance(result, tm.SubclassedSeries)
621+
622+
@pytest.mark.parametrize("idx_method", ["idxmax", "idxmin"])
623+
def test_idx(self, idx_method):
624+
625+
df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
626+
result = getattr(df, idx_method)()
627+
assert isinstance(result, tm.SubclassedSeries)
628+
629+
def test_dot(self):
630+
631+
df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
632+
s = tm.SubclassedSeries([1, 1, 2, 1])
633+
result = df.dot(s)
634+
assert isinstance(result, tm.SubclassedSeries)
635+
636+
df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
637+
s = tm.SubclassedDataFrame([1, 1, 2, 1])
638+
result = df.dot(s)
639+
assert isinstance(result, tm.SubclassedDataFrame)
640+
641+
def test_memory_usage(self):
642+
643+
df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
644+
result = df.memory_usage()
645+
assert isinstance(result, tm.SubclassedSeries)
646+
647+
result = df.memory_usage(index=False)
648+
assert isinstance(result, tm.SubclassedSeries)
649+
650+
@td.skip_if_no_scipy
651+
def test_corrwith(self):
652+
index = ["a", "b", "c", "d", "e"]
653+
columns = ["one", "two", "three", "four"]
654+
df1 = tm.SubclassedDataFrame(
655+
np.random.randn(5, 4), index=index, columns=columns
656+
)
657+
df2 = tm.SubclassedDataFrame(
658+
np.random.randn(4, 4), index=index[:4], columns=columns
659+
)
660+
correls = df1.corrwith(df2, axis=1, drop=True, method="kendall")
661+
662+
assert isinstance(correls, (tm.SubclassedSeries))
663+
664+
def test_asof(self):
665+
666+
N = 3
667+
rng = pd.date_range("1/1/1990", periods=N, freq="53s")
668+
df = tm.SubclassedDataFrame(
669+
{
670+
"A": [np.nan, np.nan, np.nan],
671+
"B": [np.nan, np.nan, np.nan],
672+
"C": [np.nan, np.nan, np.nan],
673+
},
674+
index=rng,
675+
)
676+
677+
result = df.asof(rng[-2:])
678+
assert isinstance(result, tm.SubclassedDataFrame)
679+
680+
result = df.asof(rng[-2])
681+
assert isinstance(result, tm.SubclassedSeries)
682+
683+
result = df.asof("1989-12-31")
575684
assert isinstance(result, tm.SubclassedSeries)

pandas/tests/series/test_subclass.py

+16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import numpy as np
2+
3+
import pandas as pd
14
import pandas._testing as tm
25

36

@@ -35,3 +38,16 @@ def test_subclass_empty_repr(self):
3538
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
3639
sub_series = tm.SubclassedSeries()
3740
assert "SubclassedSeries" in repr(sub_series)
41+
42+
def test_asof(self):
43+
N = 3
44+
rng = pd.date_range("1/1/1990", periods=N, freq="53s")
45+
s = tm.SubclassedSeries({"A": [np.nan, np.nan, np.nan]}, index=rng)
46+
47+
result = s.asof(rng[-2:])
48+
assert isinstance(result, tm.SubclassedSeries)
49+
50+
def test_explode(self):
51+
s = tm.SubclassedSeries([[1, 2, 3], "foo", [], [3, 4]])
52+
result = s.explode()
53+
assert isinstance(result, tm.SubclassedSeries)

0 commit comments

Comments
 (0)