Skip to content

Commit 207efc2

Browse files
committed
Merge pull request pandas-dev#10723 from IamGianluca/issue_10698_fix
BUG: concat of Series w/o names pandas-dev#10698
2 parents 582eb17 + fa29a13 commit 207efc2

File tree

4 files changed

+71
-6
lines changed

4 files changed

+71
-6
lines changed

doc/source/merging.rst

+18-1
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,24 @@ Passing ``ignore_index=True`` will drop all name references.
352352
More concatenating with group keys
353353
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
354354

355-
Let's consider a variation on the first example presented:
355+
A fairly common use of the ``keys`` argument is to override the column names when creating a new DataFrame based on existing Series.
356+
Notice how the default behaviour consists on letting the resulting DataFrame inherits the parent Series' name, when these existed.
357+
358+
.. ipython:: python
359+
360+
s3 = pd.Series([0, 1, 2, 3], name='foo')
361+
s4 = pd.Series([0, 1, 2, 3])
362+
s5 = pd.Series([0, 1, 4, 5])
363+
364+
pd.concat([s3, s4, s5], axis=1)
365+
366+
Through the ``keys`` argument we can override the existing column names.
367+
368+
.. ipython:: python
369+
370+
pd.concat([s3, s4, s5], axis=1, keys=['red','blue','yellow'])
371+
372+
Let's consider now a variation on the very first example presented:
356373

357374
.. ipython:: python
358375

doc/source/whatsnew/v0.17.0.txt

+24
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,30 @@ Other enhancements
231231

232232
- ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`).
233233

234+
- ``concat`` will now use existing Series names if provided (:issue:`10698`).
235+
236+
.. ipython:: python
237+
238+
foo = pd.Series([1,2], name='foo')
239+
bar = pd.Series([1,2])
240+
baz = pd.Series([4,5])
241+
242+
Previous Behavior:
243+
244+
.. code-block:: python
245+
246+
In [1] pd.concat([foo, bar, baz], 1)
247+
Out[1]:
248+
0 1 2
249+
0 1 1 4
250+
1 2 2 5
251+
252+
New Behavior:
253+
254+
.. ipython:: python
255+
256+
pd.concat([foo, bar, baz], 1)
257+
234258
.. _whatsnew_0170.api:
235259

236260
.. _whatsnew_0170.api_breaking:

pandas/tools/merge.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
import numpy as np
6-
from pandas.compat import range, long, lrange, lzip, zip, map, filter
6+
from pandas.compat import range, lrange, lzip, zip, map, filter
77
import pandas.compat as compat
88
from pandas.core.categorical import Categorical
99
from pandas.core.frame import DataFrame, _merge_doc
@@ -15,7 +15,7 @@
1515
from pandas.core.internals import (items_overlap_with_suffix,
1616
concatenate_block_managers)
1717
from pandas.util.decorators import Appender, Substitution
18-
from pandas.core.common import ABCSeries
18+
from pandas.core.common import ABCSeries, isnull
1919

2020
import pandas.core.common as com
2121

@@ -912,8 +912,14 @@ def get_result(self):
912912
data = dict(zip(range(len(self.objs)), self.objs))
913913
index, columns = self.new_axes
914914
tmpdf = DataFrame(data, index=index)
915-
if columns is not None:
916-
tmpdf.columns = columns
915+
# checks if the column variable already stores valid column names (because set via the 'key' argument
916+
# in the 'concat' function call. If that's not the case, use the series names as column names
917+
if columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index:
918+
columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object')
919+
indexer = isnull(columns)
920+
if indexer.any():
921+
columns[indexer] = np.arange(len(indexer[indexer]))
922+
tmpdf.columns = columns
917923
return tmpdf.__finalize__(self, method='concat')
918924

919925
# combine block managers

pandas/tools/tests/test_merge.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,24 @@ def test_concat_dataframe_keys_bug(self):
18791879
self.assertEqual(list(result.columns), [('t1', 'value'),
18801880
('t2', 'value')])
18811881

1882+
def test_concat_series_partial_columns_names(self):
1883+
# GH10698
1884+
foo = pd.Series([1,2], name='foo')
1885+
bar = pd.Series([1,2])
1886+
baz = pd.Series([4,5])
1887+
1888+
result = pd.concat([foo, bar, baz], axis=1)
1889+
expected = DataFrame({'foo' : [1,2], 0 : [1,2], 1 : [4,5]}, columns=['foo',0,1])
1890+
tm.assert_frame_equal(result, expected)
1891+
1892+
result = pd.concat([foo, bar, baz], axis=1, keys=['red','blue','yellow'])
1893+
expected = DataFrame({'red' : [1,2], 'blue' : [1,2], 'yellow' : [4,5]}, columns=['red','blue','yellow'])
1894+
tm.assert_frame_equal(result, expected)
1895+
1896+
result = pd.concat([foo, bar, baz], axis=1, ignore_index=True)
1897+
expected = DataFrame({0 : [1,2], 1 : [1,2], 2 : [4,5]})
1898+
tm.assert_frame_equal(result, expected)
1899+
18821900
def test_concat_dict(self):
18831901
frames = {'foo': DataFrame(np.random.randn(4, 3)),
18841902
'bar': DataFrame(np.random.randn(4, 3)),
@@ -2412,7 +2430,7 @@ def test_concat_series_axis1(self):
24122430

24132431
s2.name = None
24142432
result = concat([s, s2], axis=1)
2415-
self.assertTrue(np.array_equal(result.columns, lrange(2)))
2433+
self.assertTrue(np.array_equal(result.columns, Index(['A', 0], dtype='object')))
24162434

24172435
# must reindex, #2603
24182436
s = Series(randn(3), index=['c', 'a', 'b'], name='A')

0 commit comments

Comments
 (0)