Skip to content

BUG: Increased support for subclassed types. #31331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
May 10, 2020
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
bdafa80
Better subclassed type support in DataFrame.count()
Jan 26, 2020
43673b6
Subclassed type support in DataFrame.count()
Jan 26, 2020
7f594b5
Subclassed type support in DataFrame.duplicated()
Jan 26, 2020
a278d38
Subclassed type support in DataFrame.idxmin()
Jan 26, 2020
b9cf60a
Subclassed type support in DataFrame.idxmax()
Jan 26, 2020
99742a0
Subclassed type support in DataFrame.dot()
Jan 26, 2020
b117276
Subclassed type support in DataFrame.memory_usage()
Jan 26, 2020
20f9574
Subclassed type support in DataFrame.corrwith()
Jan 26, 2020
f0eaaa2
Better subclassed type support in DataFrame.asof()
Jan 26, 2020
0393c5d
Subclassed type support in Series.explode()
Jan 26, 2020
40f1ba7
black formatting
Jan 26, 2020
979f3cc
Added What's New entry
Jan 26, 2020
a02cbf3
Merge branch 'master' into subclass
Feb 15, 2020
c926c9f
Parameterized idx tests
Feb 15, 2020
f28852e
Merge remote-tracking branch 'upstream/master' into subclass
Feb 16, 2020
67ccc7f
Merge remote-tracking branch 'upstream/master' into subclass
Feb 17, 2020
a298deb
Merge branch 'master' into subclass
Apr 17, 2020
570dc6d
Merge master
Apr 18, 2020
e853d80
Added skip_if_no_scipy to failing test for three build systems failin…
Apr 18, 2020
4e60f49
Add docs
Apr 18, 2020
3f0d920
Merge remote-tracking branch 'upstream/master' into subclass
Apr 18, 2020
5794ffc
Removed an error in merging master.
Apr 18, 2020
5072f9e
Final commit for review
Apr 18, 2020
a25da2e
created all_reductions fixture
May 9, 2020
25b4dfb
Merge branch 'master' into subclass
May 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ Reshaping
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
- Bug in :meth:`DataFrame.corrwith()`, :meth:`DataFrame.memory_usage()`, :meth:`DataFrame.dot()`,
:meth:`DataFrame.idxmin()`, :meth:`DataFrame.idxmax()`, :meth:`DataFrame.duplicated()`, :meth:`DataFrame.isin()`,
:meth:`DataFrame.count()`, :meth:`Series.explode()`, :meth:`Series.asof()` and :meth:`DataFrame.asof()` not returning subclassed types.

Sparse
^^^^^^
Expand Down
32 changes: 17 additions & 15 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,13 +1144,13 @@ def dot(self, other):
np.dot(lvals, rvals), index=left.index, columns=other.columns
)
elif isinstance(other, Series):
return Series(np.dot(lvals, rvals), index=left.index)
return self._constructor_sliced(np.dot(lvals, rvals), index=left.index)
elif isinstance(rvals, (np.ndarray, Index)):
result = np.dot(lvals, rvals)
if result.ndim == 2:
return self._constructor(result, index=left.index)
else:
return Series(result, index=left.index)
return self._constructor_sliced(result, index=left.index)
else: # pragma: no cover
raise TypeError(f"unsupported type: {type(other)}")

Expand Down Expand Up @@ -2585,14 +2585,14 @@ def memory_usage(self, index=True, deep=False) -> Series:
>>> df['object'].astype('category').memory_usage(deep=True)
5216
"""
result = Series(
result = self._constructor_sliced(
[c.memory_usage(index=False, deep=deep) for col, c in self.items()],
index=self.columns,
)
if index:
result = Series(self.index.memory_usage(deep=deep), index=["Index"]).append(
result
)
result = self._constructor_sliced(
self.index.memory_usage(deep=deep), index=["Index"]
).append(result)
return result

def transpose(self, *args, copy: bool = False) -> "DataFrame":
Expand Down Expand Up @@ -4883,7 +4883,7 @@ def duplicated(
from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT

if self.empty:
return Series(dtype=bool)
return self._constructor_sliced(dtype=bool)

def f(vals):
labels, shape = algorithms.factorize(
Expand Down Expand Up @@ -4915,7 +4915,7 @@ def f(vals):
labels, shape = map(list, zip(*map(f, vals)))

ids = get_group_index(labels, shape, sort=False, xnull=False)
return Series(duplicated_int64(ids, keep), index=self.index)
return self._constructor_sliced(duplicated_int64(ids, keep), index=self.index)

# ----------------------------------------------------------------------
# Sorting
Expand Down Expand Up @@ -7748,7 +7748,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series:
def c(x):
return nanops.nancorr(x[0], x[1], method=method)

correl = Series(
correl = self._constructor_sliced(
map(c, zip(left.values.T, right.values.T)), index=left.columns
)

Expand Down Expand Up @@ -7861,7 +7861,7 @@ def count(self, axis=0, level=None, numeric_only=False):

# GH #423
if len(frame._get_axis(axis)) == 0:
result = Series(0, index=frame._get_agg_axis(axis))
result = self._constructor_sliced(0, index=frame._get_agg_axis(axis))
else:
if frame._is_mixed_type or frame._data.any_extension_types:
# the or any_extension_types is really only hit for single-
Expand All @@ -7871,7 +7871,9 @@ def count(self, axis=0, level=None, numeric_only=False):
# GH13407
series_counts = notna(frame).sum(axis=axis)
counts = series_counts.values
result = Series(counts, index=frame._get_agg_axis(axis))
result = self._constructor_sliced(
counts, index=frame._get_agg_axis(axis)
)

return result.astype("int64")

Expand Down Expand Up @@ -7910,7 +7912,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
level_codes = ensure_int64(count_axis.codes[level])
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)

result = DataFrame(counts, index=level_index, columns=agg_axis)
result = self._constructor(counts, index=level_index, columns=agg_axis)

if axis == 1:
# Undo our earlier transpose
Expand Down Expand Up @@ -8110,7 +8112,7 @@ def idxmin(self, axis=0, skipna=True) -> Series:
indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna)
index = self._get_axis(axis)
result = [index[i] if i >= 0 else np.nan for i in indices]
return Series(result, index=self._get_agg_axis(axis))
return self._constructor_sliced(result, index=self._get_agg_axis(axis))

def idxmax(self, axis=0, skipna=True) -> Series:
"""
Expand Down Expand Up @@ -8148,7 +8150,7 @@ def idxmax(self, axis=0, skipna=True) -> Series:
indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna)
index = self._get_axis(axis)
result = [index[i] if i >= 0 else np.nan for i in indices]
return Series(result, index=self._get_agg_axis(axis))
return self._constructor_sliced(result, index=self._get_agg_axis(axis))

def _get_agg_axis(self, axis_num):
"""
Expand Down Expand Up @@ -8506,7 +8508,7 @@ def isin(self, values) -> "DataFrame":
"to be passed to DataFrame.isin(), "
f"you passed a '{type(values).__name__}'"
)
return DataFrame(
return self._constructor(
algorithms.isin(self.values.ravel(), values).reshape(self.shape),
self.index,
self.columns,
Expand Down
16 changes: 7 additions & 9 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6962,9 +6962,9 @@ def asof(self, where, subset=None):

if where < start:
if not is_series:
from pandas import Series

return Series(index=self.columns, name=where, dtype=np.float64)
return self._constructor_sliced(
index=self.columns, name=where, dtype=np.float64
)
return np.nan

# It's always much faster to use a *while* loop here for
Expand All @@ -6991,13 +6991,11 @@ def asof(self, where, subset=None):
if is_series:
return self._constructor(np.nan, index=where, name=self.name)
elif is_list:
from pandas import DataFrame

return DataFrame(np.nan, index=where, columns=self.columns)
return self._constructor(np.nan, index=where, columns=self.columns)
else:
from pandas import Series

return Series(np.nan, index=self.columns, name=where[0])
return self._constructor_sliced(
np.nan, index=self.columns, name=where[0]
)

locs = self.index.asof_locs(where, ~(nulls.values))

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3471,7 +3471,9 @@ def explode(self) -> "Series":

values, counts = reshape.explode(np.asarray(self.array))

result = Series(values, index=self.index.repeat(counts), name=self.name)
result = self._constructor(
values, index=self.index.repeat(counts), name=self.name
)
return result

def unstack(self, level=-1, fill_value=None):
Expand Down
118 changes: 118 additions & 0 deletions pandas/tests/frame/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,121 @@ def test_subclassed_boolean_reductions(self, all_boolean_reductions):
df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
result = getattr(df, all_boolean_reductions)()
assert isinstance(result, tm.SubclassedSeries)

def test_subclassed_count(self):

df = tm.SubclassedDataFrame(
{
"Person": ["John", "Myla", "Lewis", "John", "Myla"],
"Age": [24.0, np.nan, 21.0, 33, 26],
"Single": [False, True, True, True, False],
}
)
result = df.count()
assert isinstance(result, tm.SubclassedSeries)

df = tm.SubclassedDataFrame({"A": [1, 0, 3], "B": [0, 5, 6], "C": [7, 8, 0]})
result = df.count()
assert isinstance(result, tm.SubclassedSeries)

df = tm.SubclassedDataFrame(
[[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]],
index=MultiIndex.from_tuples(
list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]
),
columns=MultiIndex.from_tuples(
list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]
),
)
result = df.count(level=1)
assert isinstance(result, tm.SubclassedDataFrame)

df = tm.SubclassedDataFrame()
result = df.count()
assert isinstance(result, tm.SubclassedSeries)

def test_isin(self):

df = tm.SubclassedDataFrame(
{"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"]
)
result = df.isin([0, 2])
assert isinstance(result, tm.SubclassedDataFrame)

def test_duplicated(self):

df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
result = df.duplicated()
assert isinstance(result, tm.SubclassedSeries)

df = tm.SubclassedDataFrame()
result = df.duplicated()
assert isinstance(result, tm.SubclassedSeries)

def test_idxmin(self):

df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
result = df.idxmin()
assert isinstance(result, tm.SubclassedSeries)

def test_idxmax(self):

df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
result = df.idxmax()
assert isinstance(result, tm.SubclassedSeries)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks like it can be parametrized+shared with test_idxmin. if there are any others that can be parametrized, pls give it a try


def test_dot(self):

df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
s = tm.SubclassedSeries([1, 1, 2, 1])
result = df.dot(s)
assert isinstance(result, tm.SubclassedSeries)

df = tm.SubclassedDataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
s = tm.SubclassedDataFrame([1, 1, 2, 1])
result = df.dot(s)
assert isinstance(result, tm.SubclassedDataFrame)

def test_memory_usage(self):

df = tm.SubclassedDataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
result = df.memory_usage()
assert isinstance(result, tm.SubclassedSeries)

result = df.memory_usage(index=False)
assert isinstance(result, tm.SubclassedSeries)

def test_corrwith(self):
index = ["a", "b", "c", "d", "e"]
columns = ["one", "two", "three", "four"]
df1 = tm.SubclassedDataFrame(
np.random.randn(5, 4), index=index, columns=columns
)
df2 = tm.SubclassedDataFrame(
np.random.randn(4, 4), index=index[:4], columns=columns
)
correls = df1.corrwith(df2, axis=1, drop=True, method="kendall")

assert isinstance(correls, (tm.SubclassedSeries))

def test_asof(self):

N = 3
rng = pd.date_range("1/1/1990", periods=N, freq="53s")
df = tm.SubclassedDataFrame(
{
"A": [np.nan, np.nan, np.nan],
"B": [np.nan, np.nan, np.nan],
"C": [np.nan, np.nan, np.nan],
},
index=rng,
)

result = df.asof(rng[-2:])
assert isinstance(result, tm.SubclassedDataFrame)

result = df.asof(rng[-2])
assert isinstance(result, tm.SubclassedSeries)

result = df.asof("1989-12-31")
assert isinstance(result, tm.SubclassedSeries)
15 changes: 15 additions & 0 deletions pandas/tests/series/test_subclass.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import pandas._testing as tm
import pandas as pd
import numpy as np


class TestSeriesSubclassing:
Expand Down Expand Up @@ -35,3 +37,16 @@ def test_subclass_empty_repr(self):
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
sub_series = tm.SubclassedSeries()
assert "SubclassedSeries" in repr(sub_series)

def test_asof(self):
N = 3
rng = pd.date_range("1/1/1990", periods=N, freq="53s")
s = tm.SubclassedSeries({"A": [np.nan, np.nan, np.nan]}, index=rng)

result = s.asof(rng[-2:])
assert isinstance(result, tm.SubclassedSeries)

def test_explode(self):
s = tm.SubclassedSeries([[1, 2, 3], "foo", [], [3, 4]])
result = s.explode()
assert isinstance(result, tm.SubclassedSeries)