-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: fix issue with concat creating SparseFrame if not all series are sparse. #18924
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
ba8dc29
05a0717
6fc6369
4116da7
2ee0391
44aa41a
1b9e976
3c0a4da
90a5003
126db41
6d01387
81aba2e
0768990
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,14 +89,16 @@ def _get_series_result_type(result, objs=None): | |
def _get_frame_result_type(result, objs): | ||
""" | ||
return appropriate class of DataFrame-like concat | ||
if any block is SparseBlock, return SparseDataFrame | ||
if all blocks are SparseBlock, return SparseDataFrame | ||
otherwise, return 1st obj | ||
""" | ||
if any(b.is_sparse for b in result.blocks): | ||
from pandas.core.sparse.api import SparseDataFrame | ||
|
||
from pandas.core.sparse.api import SparseDataFrame | ||
|
||
if result.blocks and all(b.is_sparse for b in result.blocks): | ||
return SparseDataFrame | ||
else: | ||
return objs[0] | ||
return next(obj for obj in objs if not type(obj) == SparseDataFrame) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use ABCSparseDataFrame There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually need to add this in pandas/core/dtypes/generic.py (and a test for it) use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm you can just return a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to keep this as similar to the original as possible. For instance someone might have their own subclass defined of DataFrame. |
||
|
||
|
||
def _concat_compat(to_concat, axis=0): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# pylint: disable-msg=E1101,W0612 | ||
import pytest | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
@@ -317,37 +318,43 @@ def test_concat_axis1(self): | |
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
def test_concat_sparse_dense(self): | ||
sparse = self.dense1.to_sparse() | ||
|
||
@pytest.mark.parametrize('fill_value', [None, 0]) | ||
def test_concat_sparse_dense(self, fill_value): | ||
sparse = self.dense1.to_sparse(fill_value=fill_value) | ||
res = pd.concat([sparse, self.dense2]) | ||
exp = pd.concat([self.dense1, self.dense2]) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
res = pd.concat([self.dense2, sparse]) | ||
exp = pd.concat([self.dense2, self.dense1]) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
sparse = self.dense1.to_sparse(fill_value=0) | ||
|
||
res = pd.concat([sparse, self.dense2]) | ||
exp = pd.concat([self.dense1, self.dense2]) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
res = pd.concat([self.dense2, sparse]) | ||
exp = pd.concat([self.dense2, self.dense1]) | ||
|
||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
res = pd.concat([self.dense3, sparse], axis=1) | ||
exp = pd.concat([self.dense3, self.dense1], axis=1) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
# See GH18914 and #18686 for why this should be | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you just construct the dataframe and compare? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to follow the style of this test. I can definitely do that but figured since the rest of the test was following that standard I'd keep it similar. |
||
# A DataFrame | ||
assert type(res) is pd.DataFrame | ||
# See GH16874 | ||
assert not res.isnull().empty | ||
assert not res[res.columns[0]].empty | ||
assert res.iloc[0, 0] == self.dense3.iloc[0, 0] | ||
|
||
for column in self.dense3.columns: | ||
tm.assert_series_equal(res[column], exp[column]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would be all for splitting this long test up a bit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alright split it up a bit. Also added some new test cases since it wasn't covering all aspects like concating onself. |
||
|
||
tm.assert_frame_equal(res, exp) | ||
|
||
res = pd.concat([sparse, self.dense3], axis=1) | ||
exp = pd.concat([self.dense1, self.dense3], axis=1) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
assert type(res) is pd.DataFrame | ||
# See GH16874 | ||
assert not res.isnull().empty | ||
assert not res[res.columns[0]].empty | ||
assert res.iloc[0, 0] == sparse.iloc[0, 0] | ||
for column in self.dense3.columns: | ||
tm.assert_series_equal(res[column], exp[column]) | ||
tm.assert_frame_equal(res, exp) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could put this right before you actually use SparseDataFrame