Skip to content

BUG: empty Series concat has no effect #12846

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ API changes
- ``read_csv`` no longer allows a combination of strings and integers for the ``usecols`` parameter (:issue:`12678`)
- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
- Provide a proper ``__name__`` and ``__qualname__`` attributes for generic functions (:issue:`12021`)
- ``pd.concat(ignore_index=True)`` now uses ``RangeIndex`` as default (:issue:`12695`)

.. _whatsnew_0181.apply_resample:

Expand Down Expand Up @@ -233,6 +234,7 @@ Bug Fixes


- Bug in ``concat`` raises ``AttributeError`` when input data contains tz-aware datetime and timedelta (:issue:`12620`)
- Bug in ``concat`` doesn't handle empty ``Series`` properly (:issue:`11082`)


- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
Expand Down
60 changes: 29 additions & 31 deletions pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pandas.core.internals import (items_overlap_with_suffix,
concatenate_block_managers)
from pandas.util.decorators import Appender, Substitution
from pandas.core.common import ABCSeries, isnull
from pandas.core.common import ABCSeries

import pandas.core.algorithms as algos
import pandas.core.common as com
Expand Down Expand Up @@ -906,13 +906,14 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
break

else:
# filter out the empties
# if we have not multi-index possibiltes
df = DataFrame([obj.shape for obj in objs]).sum(1)
non_empties = df[df != 0]
# filter out the empties if we have not multi-index possibiltes
# note to keep empty Series as it affect to result columns / name
non_empties = [obj for obj in objs
if sum(obj.shape) > 0 or isinstance(obj, Series)]

if (len(non_empties) and (keys is None and names is None and
levels is None and join_axes is None)):
objs = [objs[i] for i in non_empties.index]
objs = non_empties
sample = objs[0]

if sample is None:
Expand Down Expand Up @@ -979,7 +980,14 @@ def get_result(self):

# stack blocks
if self.axis == 0:
new_data = com._concat_compat([x._values for x in self.objs])
# concat Series with length to keep dtype as much
non_empties = [x for x in self.objs if len(x) > 0]
if len(non_empties) > 0:
values = [x._values for x in non_empties]
else:
values = [x._values for x in self.objs]
new_data = com._concat_compat(values)

name = com._consensus_name_attr(self.objs)
return (Series(new_data, index=self.new_axes[0],
name=name,
Expand All @@ -991,18 +999,6 @@ def get_result(self):
data = dict(zip(range(len(self.objs)), self.objs))
index, columns = self.new_axes
tmpdf = DataFrame(data, index=index)
# checks if the column variable already stores valid column
# names (because set via the 'key' argument in the 'concat'
# function call. If that's not the case, use the series names
# as column names
if (columns.equals(Index(np.arange(len(self.objs)))) and
not self.ignore_index):
columns = np.array([data[i].name
for i in range(len(data))],
dtype='object')
indexer = isnull(columns)
if indexer.any():
columns[indexer] = np.arange(len(indexer[indexer]))
tmpdf.columns = columns
return tmpdf.__finalize__(self, method='concat')

Expand Down Expand Up @@ -1082,32 +1078,34 @@ def _get_concat_axis(self):
if self.axis == 0:
indexes = [x.index for x in self.objs]
elif self.ignore_index:
idx = Index(np.arange(len(self.objs)))
idx.is_unique = True # arange is always unique
idx = com._default_index(len(self.objs))
return idx
elif self.keys is None:
names = []
for x in self.objs:
names = [None] * len(self.objs)
num = 0
has_names = False
for i, x in enumerate(self.objs):
if not isinstance(x, Series):
raise TypeError("Cannot concatenate type 'Series' "
"with object of type "
"%r" % type(x).__name__)
if x.name is not None:
names.append(x.name)
names[i] = x.name
has_names = True
else:
idx = Index(np.arange(len(self.objs)))
idx.is_unique = True
return idx

return Index(names)
names[i] = num
num += 1
if has_names:
return Index(names)
else:
return com._default_index(len(self.objs))
else:
return _ensure_index(self.keys)
else:
indexes = [x._data.axes[self.axis] for x in self.objs]

if self.ignore_index:
idx = Index(np.arange(sum(len(i) for i in indexes)))
idx.is_unique = True
idx = com._default_index(sum(len(i) for i in indexes))
return idx

if self.keys is None:
Expand Down
60 changes: 60 additions & 0 deletions pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1252,6 +1252,66 @@ def test_concat_period_series(self):
tm.assert_series_equal(result, expected)
self.assertEqual(result.dtype, 'object')

def test_concat_empty_series(self):
# GH 11082
s1 = pd.Series([1, 2, 3], name='x')
s2 = pd.Series(name='y')
res = pd.concat([s1, s2], axis=1)
exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]})
tm.assert_frame_equal(res, exp)

s1 = pd.Series([1, 2, 3], name='x')
s2 = pd.Series(name='y')
res = pd.concat([s1, s2], axis=0)
# name will be reset
exp = pd.Series([1, 2, 3])
tm.assert_series_equal(res, exp)

# empty Series with no name
s1 = pd.Series([1, 2, 3], name='x')
s2 = pd.Series(name=None)
res = pd.concat([s1, s2], axis=1)
exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
columns=['x', 0])
tm.assert_frame_equal(res, exp)

def test_default_index(self):
# is_series and ignore_index
s1 = pd.Series([1, 2, 3], name='x')
s2 = pd.Series([4, 5, 6], name='y')
res = pd.concat([s1, s2], axis=1, ignore_index=True)
self.assertIsInstance(res.columns, pd.RangeIndex)
exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
# use check_index_type=True to check the result have
# RangeIndex (default index)
tm.assert_frame_equal(res, exp, check_index_type=True,
check_column_type=True)

# is_series and all inputs have no names
s1 = pd.Series([1, 2, 3])
s2 = pd.Series([4, 5, 6])
res = pd.concat([s1, s2], axis=1, ignore_index=False)
self.assertIsInstance(res.columns, pd.RangeIndex)
exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
exp.columns = pd.RangeIndex(2)
tm.assert_frame_equal(res, exp, check_index_type=True,
check_column_type=True)

# is_dataframe and ignore_index
df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]})
df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]})

res = pd.concat([df1, df2], axis=0, ignore_index=True)
exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]],
columns=['A', 'B'])
tm.assert_frame_equal(res, exp, check_index_type=True,
check_column_type=True)

res = pd.concat([df1, df2], axis=1, ignore_index=True)
exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
tm.assert_frame_equal(res, exp, check_index_type=True,
check_column_type=True)

def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
df1 = DataFrame({'col1': [0, 1], 'col_left': [
Expand Down