BUG: empty Series concat has no effect

sinhrks · jreback · commit c03f5456676e · 2016-04-10T10:20:13.000-04:00
closes #11082 closes #12695 closes #12696 Author: sinhrks <sinhrks@gmail.com> Closes #12846 from sinhrks/concat_empty and squashes the following commits: 781b1fe [sinhrks] BUG: empty Series concat has no effect
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -108,6 +108,7 @@ API changes
 - ``read_csv`` no longer allows a combination of strings and integers for the ``usecols`` parameter (:issue:`12678`)
 - ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
 - Provide a proper ``__name__`` and ``__qualname__`` attributes for generic functions (:issue:`12021`)
+- ``pd.concat(ignore_index=True)`` now uses ``RangeIndex`` as default (:issue:`12695`)
 
 .. _whatsnew_0181.apply_resample:
 
@@ -234,6 +235,7 @@ Bug Fixes
 
 
 - Bug in ``concat`` raises ``AttributeError`` when input data contains tz-aware datetime and timedelta (:issue:`12620`)
+- Bug in ``concat`` doesn't handle empty ``Series`` properly (:issue:`11082`)
 
 
 - Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
@@ -15,7 +15,7 @@
 from pandas.core.internals import (items_overlap_with_suffix,
                                    concatenate_block_managers)
 from pandas.util.decorators import Appender, Substitution
-from pandas.core.common import ABCSeries, isnull
+from pandas.core.common import ABCSeries
 
 import pandas.core.algorithms as algos
 import pandas.core.common as com
@@ -906,13 +906,14 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
                     break
 
         else:
-            # filter out the empties
-            # if we have not multi-index possibiltes
-            df = DataFrame([obj.shape for obj in objs]).sum(1)
-            non_empties = df[df != 0]
+            # filter out the empties if we have not multi-index possibiltes
+            # note to keep empty Series as it affect to result columns / name
+            non_empties = [obj for obj in objs
+                           if sum(obj.shape) > 0 or isinstance(obj, Series)]
+
             if (len(non_empties) and (keys is None and names is None and
                                       levels is None and join_axes is None)):
-                objs = [objs[i] for i in non_empties.index]
+                objs = non_empties
                 sample = objs[0]
 
         if sample is None:
@@ -979,7 +980,14 @@ def get_result(self):
 
             # stack blocks
             if self.axis == 0:
-                new_data = com._concat_compat([x._values for x in self.objs])
+                # concat Series with length to keep dtype as much
+                non_empties = [x for x in self.objs if len(x) > 0]
+                if len(non_empties) > 0:
+                    values = [x._values for x in non_empties]
+                else:
+                    values = [x._values for x in self.objs]
+                new_data = com._concat_compat(values)
+
                 name = com._consensus_name_attr(self.objs)
                 return (Series(new_data, index=self.new_axes[0],
                                name=name,
@@ -991,18 +999,6 @@ def get_result(self):
                 data = dict(zip(range(len(self.objs)), self.objs))
                 index, columns = self.new_axes
                 tmpdf = DataFrame(data, index=index)
-                # checks if the column variable already stores valid column
-                # names (because set via the 'key' argument in the 'concat'
-                # function call. If that's not the case, use the series names
-                # as column names
-                if (columns.equals(Index(np.arange(len(self.objs)))) and
-                        not self.ignore_index):
-                    columns = np.array([data[i].name
-                                        for i in range(len(data))],
-                                       dtype='object')
-                    indexer = isnull(columns)
-                    if indexer.any():
-                        columns[indexer] = np.arange(len(indexer[indexer]))
                 tmpdf.columns = columns
                 return tmpdf.__finalize__(self, method='concat')
 
@@ -1082,32 +1078,34 @@ def _get_concat_axis(self):
             if self.axis == 0:
                 indexes = [x.index for x in self.objs]
             elif self.ignore_index:
-                idx = Index(np.arange(len(self.objs)))
-                idx.is_unique = True  # arange is always unique
+                idx = com._default_index(len(self.objs))
                 return idx
             elif self.keys is None:
-                names = []
-                for x in self.objs:
+                names = [None] * len(self.objs)
+                num = 0
+                has_names = False
+                for i, x in enumerate(self.objs):
                     if not isinstance(x, Series):
                         raise TypeError("Cannot concatenate type 'Series' "
                                         "with object of type "
                                         "%r" % type(x).__name__)
                     if x.name is not None:
-                        names.append(x.name)
+                        names[i] = x.name
+                        has_names = True
                     else:
-                        idx = Index(np.arange(len(self.objs)))
-                        idx.is_unique = True
-                        return idx
-
-                return Index(names)
+                        names[i] = num
+                        num += 1
+                if has_names:
+                    return Index(names)
+                else:
+                    return com._default_index(len(self.objs))
             else:
                 return _ensure_index(self.keys)
         else:
             indexes = [x._data.axes[self.axis] for x in self.objs]
 
         if self.ignore_index:
-            idx = Index(np.arange(sum(len(i) for i in indexes)))
-            idx.is_unique = True
+            idx = com._default_index(sum(len(i) for i in indexes))
             return idx
 
         if self.keys is None:
diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
@@ -1252,6 +1252,66 @@ def test_concat_period_series(self):
         tm.assert_series_equal(result, expected)
         self.assertEqual(result.dtype, 'object')
 
+    def test_concat_empty_series(self):
+        # GH 11082
+        s1 = pd.Series([1, 2, 3], name='x')
+        s2 = pd.Series(name='y')
+        res = pd.concat([s1, s2], axis=1)
+        exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]})
+        tm.assert_frame_equal(res, exp)
+
+        s1 = pd.Series([1, 2, 3], name='x')
+        s2 = pd.Series(name='y')
+        res = pd.concat([s1, s2], axis=0)
+        # name will be reset
+        exp = pd.Series([1, 2, 3])
+        tm.assert_series_equal(res, exp)
+
+        # empty Series with no name
+        s1 = pd.Series([1, 2, 3], name='x')
+        s2 = pd.Series(name=None)
+        res = pd.concat([s1, s2], axis=1)
+        exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
+                           columns=['x', 0])
+        tm.assert_frame_equal(res, exp)
+
+    def test_default_index(self):
+        # is_series and ignore_index
+        s1 = pd.Series([1, 2, 3], name='x')
+        s2 = pd.Series([4, 5, 6], name='y')
+        res = pd.concat([s1, s2], axis=1, ignore_index=True)
+        self.assertIsInstance(res.columns, pd.RangeIndex)
+        exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
+        # use check_index_type=True to check the result have
+        # RangeIndex (default index)
+        tm.assert_frame_equal(res, exp, check_index_type=True,
+                              check_column_type=True)
+
+        # is_series and all inputs have no names
+        s1 = pd.Series([1, 2, 3])
+        s2 = pd.Series([4, 5, 6])
+        res = pd.concat([s1, s2], axis=1, ignore_index=False)
+        self.assertIsInstance(res.columns, pd.RangeIndex)
+        exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]])
+        exp.columns = pd.RangeIndex(2)
+        tm.assert_frame_equal(res, exp, check_index_type=True,
+                              check_column_type=True)
+
+        # is_dataframe and ignore_index
+        df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]})
+        df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]})
+
+        res = pd.concat([df1, df2], axis=0, ignore_index=True)
+        exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]],
+                           columns=['A', 'B'])
+        tm.assert_frame_equal(res, exp, check_index_type=True,
+                              check_column_type=True)
+
+        res = pd.concat([df1, df2], axis=1, ignore_index=True)
+        exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
+        tm.assert_frame_equal(res, exp, check_index_type=True,
+                              check_column_type=True)
+
     def test_indicator(self):
         # PR #10054. xref #7412 and closes #8790.
         df1 = DataFrame({'col1': [0, 1], 'col_left': [