Skip to content

Commit 0323336

Browse files
sinhrksjorisvandenbossche
authored andcommitted
BUG: concat/append misc fixes (#13660)
closes #13626 closes #7795
1 parent 59524af commit 0323336

File tree

17 files changed

+734
-252
lines changed

17 files changed

+734
-252
lines changed

doc/source/whatsnew/v0.19.0.txt

+7
Original file line numberDiff line numberDiff line change
@@ -1356,6 +1356,13 @@ Bug Fixes
13561356
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
13571357
- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`)
13581358

1359+
- Bug in ``pd.concat`` and ``.append`` may coerces ``datetime64`` and ``timedelta`` to ``object`` dtype containing python built-in ``datetime`` or ``timedelta`` rather than ``Timestamp`` or ``Timedelta`` (:issue:`13626`)
1360+
- Bug in ``PeriodIndex.append`` may raises ``AttributeError`` when the result is ``object`` dtype (:issue:`13221`)
1361+
- Bug in ``CategoricalIndex.append`` may accept normal ``list`` (:issue:`13626`)
1362+
- Bug in ``pd.concat`` and ``.append`` with the same timezone get reset to UTC (:issue:`7795`)
1363+
- Bug in ``Series`` and ``DataFrame`` ``.append`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13626`)
1364+
1365+
13591366
- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`)
13601367
- Bug in ``DataFrame.describe()`` raising ``ValueError`` with only boolean columns (:issue:`13898`)
13611368
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)

pandas/core/frame.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -4384,14 +4384,20 @@ def append(self, other, ignore_index=False, verify_integrity=False):
43844384
raise TypeError('Can only append a Series if ignore_index=True'
43854385
' or if the Series has a name')
43864386

4387-
index = None if other.name is None else [other.name]
4387+
if other.name is None:
4388+
index = None
4389+
else:
4390+
# other must have the same index name as self, otherwise
4391+
# index name will be reset
4392+
index = Index([other.name], name=self.index.name)
4393+
43884394
combined_columns = self.columns.tolist() + self.columns.union(
43894395
other.index).difference(self.columns).tolist()
43904396
other = other.reindex(combined_columns, copy=False)
43914397
other = DataFrame(other.values.reshape((1, len(other))),
4392-
index=index, columns=combined_columns)
4398+
index=index,
4399+
columns=combined_columns)
43934400
other = other._convert(datetime=True, timedelta=True)
4394-
43954401
if not self.columns.equals(combined_columns):
43964402
self = self.reindex(columns=combined_columns)
43974403
elif isinstance(other, list) and not isinstance(other[0], DataFrame):

pandas/core/series.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -289,16 +289,18 @@ def _set_axis(self, axis, labels, fastpath=False):
289289

290290
is_all_dates = labels.is_all_dates
291291
if is_all_dates:
292-
293292
if not isinstance(labels,
294293
(DatetimeIndex, PeriodIndex, TimedeltaIndex)):
295294
try:
296295
labels = DatetimeIndex(labels)
297296
# need to set here becuase we changed the index
298297
if fastpath:
299298
self._data.set_axis(axis, labels)
300-
except tslib.OutOfBoundsDatetime:
299+
except (tslib.OutOfBoundsDatetime, ValueError):
300+
# labels may exceeds datetime bounds,
301+
# or not be a DatetimeIndex
301302
pass
303+
302304
self._set_subtyp(is_all_dates)
303305

304306
object.__setattr__(self, '_index', labels)

pandas/indexes/base.py

+26-49
Original file line numberDiff line numberDiff line change
@@ -1392,15 +1392,19 @@ def __getitem__(self, key):
13921392
else:
13931393
return result
13941394

1395-
def _ensure_compat_append(self, other):
1395+
def append(self, other):
13961396
"""
1397-
prepare the append
1397+
Append a collection of Index options together
1398+
1399+
Parameters
1400+
----------
1401+
other : Index or list/tuple of indices
13981402
13991403
Returns
14001404
-------
1401-
list of to_concat, name of result Index
1405+
appended : Index
14021406
"""
1403-
name = self.name
1407+
14041408
to_concat = [self]
14051409

14061410
if isinstance(other, (list, tuple)):
@@ -1409,46 +1413,29 @@ def _ensure_compat_append(self, other):
14091413
to_concat.append(other)
14101414

14111415
for obj in to_concat:
1412-
if (isinstance(obj, Index) and obj.name != name and
1413-
obj.name is not None):
1414-
name = None
1415-
break
1416+
if not isinstance(obj, Index):
1417+
raise TypeError('all inputs must be Index')
14161418

1417-
to_concat = self._ensure_compat_concat(to_concat)
1418-
to_concat = [x._values if isinstance(x, Index) else x
1419-
for x in to_concat]
1420-
return to_concat, name
1419+
names = set([obj.name for obj in to_concat])
1420+
name = None if len(names) > 1 else self.name
14211421

1422-
def append(self, other):
1423-
"""
1424-
Append a collection of Index options together
1422+
typs = _concat.get_dtype_kinds(to_concat)
14251423

1426-
Parameters
1427-
----------
1428-
other : Index or list/tuple of indices
1424+
if 'category' in typs:
1425+
# if any of the to_concat is category
1426+
from pandas.indexes.category import CategoricalIndex
1427+
return CategoricalIndex._append_same_dtype(self, to_concat, name)
14291428

1430-
Returns
1431-
-------
1432-
appended : Index
1433-
"""
1434-
to_concat, name = self._ensure_compat_append(other)
1435-
attribs = self._get_attributes_dict()
1436-
attribs['name'] = name
1437-
return self._shallow_copy_with_infer(
1438-
np.concatenate(to_concat), **attribs)
1439-
1440-
@staticmethod
1441-
def _ensure_compat_concat(indexes):
1442-
from pandas.tseries.api import (DatetimeIndex, PeriodIndex,
1443-
TimedeltaIndex)
1444-
klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex
1445-
1446-
is_ts = [isinstance(idx, klasses) for idx in indexes]
1429+
if len(typs) == 1:
1430+
return self._append_same_dtype(to_concat, name=name)
1431+
return _concat._concat_index_asobject(to_concat, name=name)
14471432

1448-
if any(is_ts) and not all(is_ts):
1449-
return [_maybe_box(idx) for idx in indexes]
1450-
1451-
return indexes
1433+
def _append_same_dtype(self, to_concat, name):
1434+
"""
1435+
Concatenate to_concat which has the same class
1436+
"""
1437+
# must be overrided in specific classes
1438+
return _concat._concat_index_asobject(to_concat, name)
14521439

14531440
_index_shared_docs['take'] = """
14541441
return a new %(klass)s of the values selected by the indices
@@ -3634,16 +3621,6 @@ def _ensure_has_len(seq):
36343621
return seq
36353622

36363623

3637-
def _maybe_box(idx):
3638-
from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex
3639-
klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex
3640-
3641-
if isinstance(idx, klasses):
3642-
return idx.asobject
3643-
3644-
return idx
3645-
3646-
36473624
def _trim_front(strings):
36483625
"""
36493626
Trims zeros and decimal points

pandas/indexes/category.py

+6-15
Original file line numberDiff line numberDiff line change
@@ -569,26 +569,17 @@ def insert(self, loc, item):
569569
codes = np.concatenate((codes[:loc], code, codes[loc:]))
570570
return self._create_from_codes(codes)
571571

572-
def append(self, other):
572+
def _append_same_dtype(self, to_concat, name):
573573
"""
574-
Append a collection of CategoricalIndex options together
575-
576-
Parameters
577-
----------
578-
other : Index or list/tuple of indices
579-
580-
Returns
581-
-------
582-
appended : Index
583-
584-
Raises
585-
------
574+
Concatenate to_concat which has the same class
586575
ValueError if other is not in the categories
587576
"""
588-
to_concat, name = self._ensure_compat_append(other)
589577
to_concat = [self._is_dtype_compat(c) for c in to_concat]
590578
codes = np.concatenate([c.codes for c in to_concat])
591-
return self._create_from_codes(codes, name=name)
579+
result = self._create_from_codes(codes, name=name)
580+
# if name is None, _create_from_codes sets self.name
581+
result.name = name
582+
return result
592583

593584
@classmethod
594585
def _add_comparison_methods(cls):

pandas/tests/indexes/test_category.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,12 @@ def test_append(self):
271271
lambda: ci.append(ci.values.reorder_categories(list('abc'))))
272272

273273
# with objects
274-
result = ci.append(['c', 'a'])
274+
result = ci.append(Index(['c', 'a']))
275275
expected = CategoricalIndex(list('aabbcaca'), categories=categories)
276276
tm.assert_index_equal(result, expected, exact=True)
277277

278278
# invalid objects
279-
self.assertRaises(TypeError, lambda: ci.append(['a', 'd']))
279+
self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd'])))
280280

281281
def test_insert(self):
282282

pandas/tests/indexes/test_multi.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import re
77
import warnings
88

9-
from pandas import (DataFrame, date_range, MultiIndex, Index, CategoricalIndex,
10-
compat)
9+
from pandas import (DataFrame, date_range, period_range, MultiIndex, Index,
10+
CategoricalIndex, compat)
1111
from pandas.core.common import PerformanceWarning
1212
from pandas.indexes.base import InvalidIndexError
1313
from pandas.compat import range, lrange, u, PY3, long, lzip
@@ -769,6 +769,40 @@ def test_append(self):
769769
result = self.index.append([])
770770
self.assertTrue(result.equals(self.index))
771771

772+
def test_append_mixed_dtypes(self):
773+
# GH 13660
774+
dti = date_range('2011-01-01', freq='M', periods=3,)
775+
dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern')
776+
pi = period_range('2011-01', freq='M', periods=3)
777+
778+
mi = MultiIndex.from_arrays([[1, 2, 3],
779+
[1.1, np.nan, 3.3],
780+
['a', 'b', 'c'],
781+
dti, dti_tz, pi])
782+
self.assertEqual(mi.nlevels, 6)
783+
784+
res = mi.append(mi)
785+
exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3],
786+
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
787+
['a', 'b', 'c', 'a', 'b', 'c'],
788+
dti.append(dti),
789+
dti_tz.append(dti_tz),
790+
pi.append(pi)])
791+
tm.assert_index_equal(res, exp)
792+
793+
other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'],
794+
['x', 'y', 'z'], ['x', 'y', 'z'],
795+
['x', 'y', 'z'], ['x', 'y', 'z']])
796+
797+
res = mi.append(other)
798+
exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'],
799+
[1.1, np.nan, 3.3, 'x', 'y', 'z'],
800+
['a', 'b', 'c', 'x', 'y', 'z'],
801+
dti.append(pd.Index(['x', 'y', 'z'])),
802+
dti_tz.append(pd.Index(['x', 'y', 'z'])),
803+
pi.append(pd.Index(['x', 'y', 'z']))])
804+
tm.assert_index_equal(res, exp)
805+
772806
def test_get_level_values(self):
773807
result = self.index.get_level_values(0)
774808
expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'],

pandas/tests/types/test_concat.py

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import nose
4+
import pandas as pd
5+
import pandas.types.concat as _concat
6+
import pandas.util.testing as tm
7+
8+
9+
class TestConcatCompat(tm.TestCase):
10+
11+
_multiprocess_can_split_ = True
12+
13+
def check_concat(self, to_concat, exp):
14+
for klass in [pd.Index, pd.Series]:
15+
to_concat_klass = [klass(c) for c in to_concat]
16+
res = _concat.get_dtype_kinds(to_concat_klass)
17+
self.assertEqual(res, set(exp))
18+
19+
def test_get_dtype_kinds(self):
20+
to_concat = [['a'], [1, 2]]
21+
self.check_concat(to_concat, ['i', 'object'])
22+
23+
to_concat = [[3, 4], [1, 2]]
24+
self.check_concat(to_concat, ['i'])
25+
26+
to_concat = [[3, 4], [1, 2.1]]
27+
self.check_concat(to_concat, ['i', 'f'])
28+
29+
def test_get_dtype_kinds_datetimelike(self):
30+
to_concat = [pd.DatetimeIndex(['2011-01-01']),
31+
pd.DatetimeIndex(['2011-01-02'])]
32+
self.check_concat(to_concat, ['datetime'])
33+
34+
to_concat = [pd.TimedeltaIndex(['1 days']),
35+
pd.TimedeltaIndex(['2 days'])]
36+
self.check_concat(to_concat, ['timedelta'])
37+
38+
def test_get_dtype_kinds_datetimelike_object(self):
39+
to_concat = [pd.DatetimeIndex(['2011-01-01']),
40+
pd.DatetimeIndex(['2011-01-02'], tz='US/Eastern')]
41+
self.check_concat(to_concat,
42+
['datetime', 'datetime64[ns, US/Eastern]'])
43+
44+
to_concat = [pd.DatetimeIndex(['2011-01-01'], tz='Asia/Tokyo'),
45+
pd.DatetimeIndex(['2011-01-02'], tz='US/Eastern')]
46+
self.check_concat(to_concat,
47+
['datetime64[ns, Asia/Tokyo]',
48+
'datetime64[ns, US/Eastern]'])
49+
50+
# timedelta has single type
51+
to_concat = [pd.TimedeltaIndex(['1 days']),
52+
pd.TimedeltaIndex(['2 hours'])]
53+
self.check_concat(to_concat, ['timedelta'])
54+
55+
to_concat = [pd.DatetimeIndex(['2011-01-01'], tz='Asia/Tokyo'),
56+
pd.TimedeltaIndex(['1 days'])]
57+
self.check_concat(to_concat,
58+
['datetime64[ns, Asia/Tokyo]', 'timedelta'])
59+
60+
def test_get_dtype_kinds_period(self):
61+
# because we don't have Period dtype (yet),
62+
# Series results in object dtype
63+
to_concat = [pd.PeriodIndex(['2011-01'], freq='M'),
64+
pd.PeriodIndex(['2011-01'], freq='M')]
65+
res = _concat.get_dtype_kinds(to_concat)
66+
self.assertEqual(res, set(['period[M]']))
67+
68+
to_concat = [pd.Series([pd.Period('2011-01', freq='M')]),
69+
pd.Series([pd.Period('2011-02', freq='M')])]
70+
res = _concat.get_dtype_kinds(to_concat)
71+
self.assertEqual(res, set(['object']))
72+
73+
to_concat = [pd.PeriodIndex(['2011-01'], freq='M'),
74+
pd.PeriodIndex(['2011-01'], freq='D')]
75+
res = _concat.get_dtype_kinds(to_concat)
76+
self.assertEqual(res, set(['period[M]', 'period[D]']))
77+
78+
to_concat = [pd.Series([pd.Period('2011-01', freq='M')]),
79+
pd.Series([pd.Period('2011-02', freq='D')])]
80+
res = _concat.get_dtype_kinds(to_concat)
81+
self.assertEqual(res, set(['object']))
82+
83+
84+
if __name__ == '__main__':
85+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
86+
exit=False)

pandas/tools/pivot.py

+6
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,9 @@ def _normalize(table, normalize, margins):
523523
column_margin = table.loc[:, 'All'].drop('All')
524524
index_margin = table.loc['All', :].drop('All')
525525
table = table.drop('All', axis=1).drop('All')
526+
# to keep index and columns names
527+
table_index_names = table.index.names
528+
table_columns_names = table.columns.names
526529

527530
# Normalize core
528531
table = _normalize(table, normalize=normalize, margins=False)
@@ -550,6 +553,9 @@ def _normalize(table, normalize, margins):
550553
else:
551554
raise ValueError("Not a valid normalize argument")
552555

556+
table.index.names = table_index_names
557+
table.columns.names = table_columns_names
558+
553559
else:
554560
raise ValueError("Not a valid margins argument")
555561

0 commit comments

Comments
 (0)