Skip to content

Commit c03f545

Browse files
sinhrksjreback
authored andcommitted
BUG: empty Series concat has no effect
closes #11082 closes #12695 closes #12696 Author: sinhrks <[email protected]> Closes #12846 from sinhrks/concat_empty and squashes the following commits: 781b1fe [sinhrks] BUG: empty Series concat has no effect
1 parent ea9a5a8 commit c03f545

File tree

3 files changed

+91
-31
lines changed

3 files changed

+91
-31
lines changed

doc/source/whatsnew/v0.18.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ API changes
108108
- ``read_csv`` no longer allows a combination of strings and integers for the ``usecols`` parameter (:issue:`12678`)
109109
- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
110110
- Provide a proper ``__name__`` and ``__qualname__`` attributes for generic functions (:issue:`12021`)
111+
- ``pd.concat(ignore_index=True)`` now uses ``RangeIndex`` as default (:issue:`12695`)
111112

112113
.. _whatsnew_0181.apply_resample:
113114

@@ -234,6 +235,7 @@ Bug Fixes
234235

235236

236237
- Bug in ``concat`` raises ``AttributeError`` when input data contains tz-aware datetime and timedelta (:issue:`12620`)
238+
- Bug in ``concat`` doesn't handle empty ``Series`` properly (:issue:`11082`)
237239

238240

239241
- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)

pandas/tools/merge.py

+29-31
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pandas.core.internals import (items_overlap_with_suffix,
1616
concatenate_block_managers)
1717
from pandas.util.decorators import Appender, Substitution
18-
from pandas.core.common import ABCSeries, isnull
18+
from pandas.core.common import ABCSeries
1919

2020
import pandas.core.algorithms as algos
2121
import pandas.core.common as com
@@ -906,13 +906,14 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
906906
break
907907

908908
else:
909-
# filter out the empties
910-
# if we have not multi-index possibiltes
911-
df = DataFrame([obj.shape for obj in objs]).sum(1)
912-
non_empties = df[df != 0]
909+
# filter out the empties if we have not multi-index possibiltes
910+
# note to keep empty Series as it affect to result columns / name
911+
non_empties = [obj for obj in objs
912+
if sum(obj.shape) > 0 or isinstance(obj, Series)]
913+
913914
if (len(non_empties) and (keys is None and names is None and
914915
levels is None and join_axes is None)):
915-
objs = [objs[i] for i in non_empties.index]
916+
objs = non_empties
916917
sample = objs[0]
917918

918919
if sample is None:
@@ -979,7 +980,14 @@ def get_result(self):
979980

980981
# stack blocks
981982
if self.axis == 0:
982-
new_data = com._concat_compat([x._values for x in self.objs])
983+
# concat Series with length to keep dtype as much
984+
non_empties = [x for x in self.objs if len(x) > 0]
985+
if len(non_empties) > 0:
986+
values = [x._values for x in non_empties]
987+
else:
988+
values = [x._values for x in self.objs]
989+
new_data = com._concat_compat(values)
990+
983991
name = com._consensus_name_attr(self.objs)
984992
return (Series(new_data, index=self.new_axes[0],
985993
name=name,
@@ -991,18 +999,6 @@ def get_result(self):
991999
data = dict(zip(range(len(self.objs)), self.objs))
9921000
index, columns = self.new_axes
9931001
tmpdf = DataFrame(data, index=index)
994-
# checks if the column variable already stores valid column
995-
# names (because set via the 'key' argument in the 'concat'
996-
# function call. If that's not the case, use the series names
997-
# as column names
998-
if (columns.equals(Index(np.arange(len(self.objs)))) and
999-
not self.ignore_index):
1000-
columns = np.array([data[i].name
1001-
for i in range(len(data))],
1002-
dtype='object')
1003-
indexer = isnull(columns)
1004-
if indexer.any():
1005-
columns[indexer] = np.arange(len(indexer[indexer]))
10061002
tmpdf.columns = columns
10071003
return tmpdf.__finalize__(self, method='concat')
10081004

@@ -1082,32 +1078,34 @@ def _get_concat_axis(self):
10821078
if self.axis == 0:
10831079
indexes = [x.index for x in self.objs]
10841080
elif self.ignore_index:
1085-
idx = Index(np.arange(len(self.objs)))
1086-
idx.is_unique = True # arange is always unique
1081+
idx = com._default_index(len(self.objs))
10871082
return idx
10881083
elif self.keys is None:
1089-
names = []
1090-
for x in self.objs:
1084+
names = [None] * len(self.objs)
1085+
num = 0
1086+
has_names = False
1087+
for i, x in enumerate(self.objs):
10911088
if not isinstance(x, Series):
10921089
raise TypeError("Cannot concatenate type 'Series' "
10931090
"with object of type "
10941091
"%r" % type(x).__name__)
10951092
if x.name is not None:
1096-
names.append(x.name)
1093+
names[i] = x.name
1094+
has_names = True
10971095
else:
1098-
idx = Index(np.arange(len(self.objs)))
1099-
idx.is_unique = True
1100-
return idx
1101-
1102-
return Index(names)
1096+
names[i] = num
1097+
num += 1
1098+
if has_names:
1099+
return Index(names)
1100+
else:
1101+
return com._default_index(len(self.objs))
11031102
else:
11041103
return _ensure_index(self.keys)
11051104
else:
11061105
indexes = [x._data.axes[self.axis] for x in self.objs]
11071106

11081107
if self.ignore_index:
1109-
idx = Index(np.arange(sum(len(i) for i in indexes)))
1110-
idx.is_unique = True
1108+
idx = com._default_index(sum(len(i) for i in indexes))
11111109
return idx
11121110

11131111
if self.keys is None:

pandas/tools/tests/test_merge.py

+60
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,66 @@ def test_concat_period_series(self):
12521252
tm.assert_series_equal(result, expected)
12531253
self.assertEqual(result.dtype, 'object')
12541254

1255+
def test_concat_empty_series(self):
1256+
# GH 11082
1257+
s1 = pd.Series([1, 2, 3], name='x')
1258+
s2 = pd.Series(name='y')
1259+
res = pd.concat([s1, s2], axis=1)
1260+
exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]})
1261+
tm.assert_frame_equal(res, exp)
1262+
1263+
s1 = pd.Series([1, 2, 3], name='x')
1264+
s2 = pd.Series(name='y')
1265+
res = pd.concat([s1, s2], axis=0)
1266+
# name will be reset
1267+
exp = pd.Series([1, 2, 3])
1268+
tm.assert_series_equal(res, exp)
1269+
1270+
# empty Series with no name
1271+
s1 = pd.Series([1, 2, 3], name='x')
1272+
s2 = pd.Series(name=None)
1273+
res = pd.concat([s1, s2], axis=1)
1274+
exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
1275+
columns=['x', 0])
1276+
tm.assert_frame_equal(res, exp)
1277+
1278+
def test_default_index(self):
1279+
# is_series and ignore_index
1280+
s1 = pd.Series([1, 2, 3], name='x')
1281+
s2 = pd.Series([4, 5, 6], name='y')
1282+
res = pd.concat([s1, s2], axis=1, ignore_index=True)
1283+
self.assertIsInstance(res.columns, pd.RangeIndex)
1284+
exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
1285+
# use check_index_type=True to check the result have
1286+
# RangeIndex (default index)
1287+
tm.assert_frame_equal(res, exp, check_index_type=True,
1288+
check_column_type=True)
1289+
1290+
# is_series and all inputs have no names
1291+
s1 = pd.Series([1, 2, 3])
1292+
s2 = pd.Series([4, 5, 6])
1293+
res = pd.concat([s1, s2], axis=1, ignore_index=False)
1294+
self.assertIsInstance(res.columns, pd.RangeIndex)
1295+
exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
1296+
exp.columns = pd.RangeIndex(2)
1297+
tm.assert_frame_equal(res, exp, check_index_type=True,
1298+
check_column_type=True)
1299+
1300+
# is_dataframe and ignore_index
1301+
df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]})
1302+
df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]})
1303+
1304+
res = pd.concat([df1, df2], axis=0, ignore_index=True)
1305+
exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]],
1306+
columns=['A', 'B'])
1307+
tm.assert_frame_equal(res, exp, check_index_type=True,
1308+
check_column_type=True)
1309+
1310+
res = pd.concat([df1, df2], axis=1, ignore_index=True)
1311+
exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
1312+
tm.assert_frame_equal(res, exp, check_index_type=True,
1313+
check_column_type=True)
1314+
12551315
def test_indicator(self):
12561316
# PR #10054. xref #7412 and closes #8790.
12571317
df1 = DataFrame({'col1': [0, 1], 'col_left': [

0 commit comments

Comments
 (0)