diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 777bc01e71833..ca5f3dfc2a8f2 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1356,6 +1356,13 @@ Bug Fixes - Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) - Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) +- Bug in ``pd.concat`` and ``.append`` may coerces ``datetime64`` and ``timedelta`` to ``object`` dtype containing python built-in ``datetime`` or ``timedelta`` rather than ``Timestamp`` or ``Timedelta`` (:issue:`13626`) +- Bug in ``PeriodIndex.append`` may raises ``AttributeError`` when the result is ``object`` dtype (:issue:`13221`) +- Bug in ``CategoricalIndex.append`` may accept normal ``list`` (:issue:`13626`) +- Bug in ``pd.concat`` and ``.append`` with the same timezone get reset to UTC (:issue:`7795`) +- Bug in ``Series`` and ``DataFrame`` ``.append`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13626`) + + - Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`) - Bug in ``DataFrame.describe()`` raising ``ValueError`` with only boolean columns (:issue:`13898`) - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 46a1d22a4114b..ac3e5d2aabef7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4384,14 +4384,20 @@ def append(self, other, ignore_index=False, verify_integrity=False): raise TypeError('Can only append a Series if ignore_index=True' ' or if the Series has a name') - index = None if other.name is None else [other.name] + if other.name is None: + index = None + else: + # other must have the same index name as self, otherwise + # index name will be reset + index = Index([other.name], name=self.index.name) + combined_columns = self.columns.tolist() + self.columns.union( other.index).difference(self.columns).tolist() other = other.reindex(combined_columns, copy=False) other = DataFrame(other.values.reshape((1, len(other))), - index=index, columns=combined_columns) + index=index, + columns=combined_columns) other = other._convert(datetime=True, timedelta=True) - if not self.columns.equals(combined_columns): self = self.reindex(columns=combined_columns) elif isinstance(other, list) and not isinstance(other[0], DataFrame): diff --git a/pandas/core/series.py b/pandas/core/series.py index 01d6f6f078d17..8379c8bcdcae8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -289,7 +289,6 @@ def _set_axis(self, axis, labels, fastpath=False): is_all_dates = labels.is_all_dates if is_all_dates: - if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): try: @@ -297,8 +296,11 @@ def _set_axis(self, axis, labels, fastpath=False): # need to set here becuase we changed the index if fastpath: self._data.set_axis(axis, labels) - except tslib.OutOfBoundsDatetime: + except (tslib.OutOfBoundsDatetime, ValueError): + # labels may exceeds datetime bounds, + # or not be a DatetimeIndex pass + self._set_subtyp(is_all_dates) object.__setattr__(self, '_index', labels) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 15cd2064624d9..d6b6d01b1e444 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1392,15 +1392,19 @@ def __getitem__(self, key): else: return result - def _ensure_compat_append(self, other): + def append(self, other): """ - prepare the append + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices Returns ------- - list of to_concat, name of result Index + appended : Index """ - name = self.name + to_concat = [self] if isinstance(other, (list, tuple)): @@ -1409,46 +1413,29 @@ def _ensure_compat_append(self, other): to_concat.append(other) for obj in to_concat: - if (isinstance(obj, Index) and obj.name != name and - obj.name is not None): - name = None - break + if not isinstance(obj, Index): + raise TypeError('all inputs must be Index') - to_concat = self._ensure_compat_concat(to_concat) - to_concat = [x._values if isinstance(x, Index) else x - for x in to_concat] - return to_concat, name + names = set([obj.name for obj in to_concat]) + name = None if len(names) > 1 else self.name - def append(self, other): - """ - Append a collection of Index options together + typs = _concat.get_dtype_kinds(to_concat) - Parameters - ---------- - other : Index or list/tuple of indices + if 'category' in typs: + # if any of the to_concat is category + from pandas.indexes.category import CategoricalIndex + return CategoricalIndex._append_same_dtype(self, to_concat, name) - Returns - ------- - appended : Index - """ - to_concat, name = self._ensure_compat_append(other) - attribs = self._get_attributes_dict() - attribs['name'] = name - return self._shallow_copy_with_infer( - np.concatenate(to_concat), **attribs) - - @staticmethod - def _ensure_compat_concat(indexes): - from pandas.tseries.api import (DatetimeIndex, PeriodIndex, - TimedeltaIndex) - klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex - - is_ts = [isinstance(idx, klasses) for idx in indexes] + if len(typs) == 1: + return self._append_same_dtype(to_concat, name=name) + return _concat._concat_index_asobject(to_concat, name=name) - if any(is_ts) and not all(is_ts): - return [_maybe_box(idx) for idx in indexes] - - return indexes + def _append_same_dtype(self, to_concat, name): + """ + Concatenate to_concat which has the same class + """ + # must be overrided in specific classes + return _concat._concat_index_asobject(to_concat, name) _index_shared_docs['take'] = """ return a new %(klass)s of the values selected by the indices @@ -3634,16 +3621,6 @@ def _ensure_has_len(seq): return seq -def _maybe_box(idx): - from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex - klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex - - if isinstance(idx, klasses): - return idx.asobject - - return idx - - def _trim_front(strings): """ Trims zeros and decimal points diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 251886ebdd974..1666d8f7bc078 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -569,26 +569,17 @@ def insert(self, loc, item): codes = np.concatenate((codes[:loc], code, codes[loc:])) return self._create_from_codes(codes) - def append(self, other): + def _append_same_dtype(self, to_concat, name): """ - Append a collection of CategoricalIndex options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : Index - - Raises - ------ + Concatenate to_concat which has the same class ValueError if other is not in the categories """ - to_concat, name = self._ensure_compat_append(other) to_concat = [self._is_dtype_compat(c) for c in to_concat] codes = np.concatenate([c.codes for c in to_concat]) - return self._create_from_codes(codes, name=name) + result = self._create_from_codes(codes, name=name) + # if name is None, _create_from_codes sets self.name + result.name = name + return result @classmethod def _add_comparison_methods(cls): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index b0e50491b8e9d..cb8452479f616 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -271,12 +271,12 @@ def test_append(self): lambda: ci.append(ci.values.reorder_categories(list('abc')))) # with objects - result = ci.append(['c', 'a']) + result = ci.append(Index(['c', 'a'])) expected = CategoricalIndex(list('aabbcaca'), categories=categories) tm.assert_index_equal(result, expected, exact=True) # invalid objects - self.assertRaises(TypeError, lambda: ci.append(['a', 'd'])) + self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd']))) def test_insert(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 675193e1538b2..d49ac40631d37 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -6,8 +6,8 @@ import re import warnings -from pandas import (DataFrame, date_range, MultiIndex, Index, CategoricalIndex, - compat) +from pandas import (DataFrame, date_range, period_range, MultiIndex, Index, + CategoricalIndex, compat) from pandas.core.common import PerformanceWarning from pandas.indexes.base import InvalidIndexError from pandas.compat import range, lrange, u, PY3, long, lzip @@ -769,6 +769,40 @@ def test_append(self): result = self.index.append([]) self.assertTrue(result.equals(self.index)) + def test_append_mixed_dtypes(self): + # GH 13660 + dti = date_range('2011-01-01', freq='M', periods=3,) + dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') + pi = period_range('2011-01', freq='M', periods=3) + + mi = MultiIndex.from_arrays([[1, 2, 3], + [1.1, np.nan, 3.3], + ['a', 'b', 'c'], + dti, dti_tz, pi]) + self.assertEqual(mi.nlevels, 6) + + res = mi.append(mi) + exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ['a', 'b', 'c', 'a', 'b', 'c'], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi)]) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z']]) + + res = mi.append(other) + exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], + [1.1, np.nan, 3.3, 'x', 'y', 'z'], + ['a', 'b', 'c', 'x', 'y', 'z'], + dti.append(pd.Index(['x', 'y', 'z'])), + dti_tz.append(pd.Index(['x', 'y', 'z'])), + pi.append(pd.Index(['x', 'y', 'z']))]) + tm.assert_index_equal(res, exp) + def test_get_level_values(self): result = self.index.get_level_values(0) expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], diff --git a/pandas/tests/types/test_concat.py b/pandas/tests/types/test_concat.py new file mode 100644 index 0000000000000..6403dcb5a5350 --- /dev/null +++ b/pandas/tests/types/test_concat.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +import nose +import pandas as pd +import pandas.types.concat as _concat +import pandas.util.testing as tm + + +class TestConcatCompat(tm.TestCase): + + _multiprocess_can_split_ = True + + def check_concat(self, to_concat, exp): + for klass in [pd.Index, pd.Series]: + to_concat_klass = [klass(c) for c in to_concat] + res = _concat.get_dtype_kinds(to_concat_klass) + self.assertEqual(res, set(exp)) + + def test_get_dtype_kinds(self): + to_concat = [['a'], [1, 2]] + self.check_concat(to_concat, ['i', 'object']) + + to_concat = [[3, 4], [1, 2]] + self.check_concat(to_concat, ['i']) + + to_concat = [[3, 4], [1, 2.1]] + self.check_concat(to_concat, ['i', 'f']) + + def test_get_dtype_kinds_datetimelike(self): + to_concat = [pd.DatetimeIndex(['2011-01-01']), + pd.DatetimeIndex(['2011-01-02'])] + self.check_concat(to_concat, ['datetime']) + + to_concat = [pd.TimedeltaIndex(['1 days']), + pd.TimedeltaIndex(['2 days'])] + self.check_concat(to_concat, ['timedelta']) + + def test_get_dtype_kinds_datetimelike_object(self): + to_concat = [pd.DatetimeIndex(['2011-01-01']), + pd.DatetimeIndex(['2011-01-02'], tz='US/Eastern')] + self.check_concat(to_concat, + ['datetime', 'datetime64[ns, US/Eastern]']) + + to_concat = [pd.DatetimeIndex(['2011-01-01'], tz='Asia/Tokyo'), + pd.DatetimeIndex(['2011-01-02'], tz='US/Eastern')] + self.check_concat(to_concat, + ['datetime64[ns, Asia/Tokyo]', + 'datetime64[ns, US/Eastern]']) + + # timedelta has single type + to_concat = [pd.TimedeltaIndex(['1 days']), + pd.TimedeltaIndex(['2 hours'])] + self.check_concat(to_concat, ['timedelta']) + + to_concat = [pd.DatetimeIndex(['2011-01-01'], tz='Asia/Tokyo'), + pd.TimedeltaIndex(['1 days'])] + self.check_concat(to_concat, + ['datetime64[ns, Asia/Tokyo]', 'timedelta']) + + def test_get_dtype_kinds_period(self): + # because we don't have Period dtype (yet), + # Series results in object dtype + to_concat = [pd.PeriodIndex(['2011-01'], freq='M'), + pd.PeriodIndex(['2011-01'], freq='M')] + res = _concat.get_dtype_kinds(to_concat) + self.assertEqual(res, set(['period[M]'])) + + to_concat = [pd.Series([pd.Period('2011-01', freq='M')]), + pd.Series([pd.Period('2011-02', freq='M')])] + res = _concat.get_dtype_kinds(to_concat) + self.assertEqual(res, set(['object'])) + + to_concat = [pd.PeriodIndex(['2011-01'], freq='M'), + pd.PeriodIndex(['2011-01'], freq='D')] + res = _concat.get_dtype_kinds(to_concat) + self.assertEqual(res, set(['period[M]', 'period[D]'])) + + to_concat = [pd.Series([pd.Period('2011-01', freq='M')]), + pd.Series([pd.Period('2011-02', freq='D')])] + res = _concat.get_dtype_kinds(to_concat) + self.assertEqual(res, set(['object'])) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 3e2b7c3af460e..94b464f6fca6c 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -523,6 +523,9 @@ def _normalize(table, normalize, margins): column_margin = table.loc[:, 'All'].drop('All') index_margin = table.loc['All', :].drop('All') table = table.drop('All', axis=1).drop('All') + # to keep index and columns names + table_index_names = table.index.names + table_columns_names = table.columns.names # Normalize core table = _normalize(table, normalize=normalize, margins=False) @@ -550,6 +553,9 @@ def _normalize(table, normalize, margins): else: raise ValueError("Not a valid normalize argument") + table.index.names = table_index_names + table.columns.names = table_columns_names + else: raise ValueError("Not a valid margins argument") diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py index 17ccfb27d4b42..102f21bcdc535 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tools/tests/test_concat.py @@ -4,7 +4,7 @@ from numpy.random import randn from datetime import datetime -from pandas.compat import StringIO +from pandas.compat import StringIO, iteritems import pandas as pd from pandas import (DataFrame, concat, read_csv, isnull, Series, date_range, @@ -27,6 +27,430 @@ def setUp(self): self.mixed_frame['foo'] = 'bar' +class TestConcatAppendCommon(ConcatenateBase): + + """ + Test common dtype coercion rules between concat and append. + """ + + def setUp(self): + + dt_data = [pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-01-02'), + pd.Timestamp('2011-01-03')] + tz_data = [pd.Timestamp('2011-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-02', tz='US/Eastern'), + pd.Timestamp('2011-01-03', tz='US/Eastern')] + + td_data = [pd.Timedelta('1 days'), + pd.Timedelta('2 days'), + pd.Timedelta('3 days')] + + period_data = [pd.Period('2011-01', freq='M'), + pd.Period('2011-02', freq='M'), + pd.Period('2011-03', freq='M')] + + self.data = {'bool': [True, False, True], + 'int64': [1, 2, 3], + 'float64': [1.1, np.nan, 3.3], + 'category': pd.Categorical(['X', 'Y', 'Z']), + 'object': ['a', 'b', 'c'], + 'datetime64[ns]': dt_data, + 'datetime64[ns, US/Eastern]': tz_data, + 'timedelta64[ns]': td_data, + 'period[M]': period_data} + + def _check_expected_dtype(self, obj, label): + """ + Check whether obj has expected dtype depending on label + considering not-supported dtypes + """ + if isinstance(obj, pd.Index): + if label == 'bool': + self.assertEqual(obj.dtype, 'object') + else: + self.assertEqual(obj.dtype, label) + elif isinstance(obj, pd.Series): + if label.startswith('period'): + self.assertEqual(obj.dtype, 'object') + else: + self.assertEqual(obj.dtype, label) + else: + raise ValueError + + def test_dtypes(self): + # to confirm test case covers intended dtypes + for typ, vals in iteritems(self.data): + self._check_expected_dtype(pd.Index(vals), typ) + self._check_expected_dtype(pd.Series(vals), typ) + + def test_concatlike_same_dtypes(self): + # GH 13660 + for typ1, vals1 in iteritems(self.data): + + vals2 = vals1 + vals3 = vals1 + + if typ1 == 'category': + exp_data = pd.Categorical(list(vals1) + list(vals2)) + exp_data3 = pd.Categorical(list(vals1) + list(vals2) + + list(vals3)) + else: + exp_data = vals1 + vals2 + exp_data3 = vals1 + vals2 + vals3 + + # ----- Index ----- # + + # index.append + res = pd.Index(vals1).append(pd.Index(vals2)) + exp = pd.Index(exp_data) + tm.assert_index_equal(res, exp) + + # 3 elements + res = pd.Index(vals1).append([pd.Index(vals2), pd.Index(vals3)]) + exp = pd.Index(exp_data3) + tm.assert_index_equal(res, exp) + + # index.append name mismatch + i1 = pd.Index(vals1, name='x') + i2 = pd.Index(vals2, name='y') + res = i1.append(i2) + exp = pd.Index(exp_data) + tm.assert_index_equal(res, exp) + + # index.append name match + i1 = pd.Index(vals1, name='x') + i2 = pd.Index(vals2, name='x') + res = i1.append(i2) + exp = pd.Index(exp_data, name='x') + tm.assert_index_equal(res, exp) + + # cannot append non-index + with tm.assertRaisesRegexp(TypeError, 'all inputs must be Index'): + pd.Index(vals1).append(vals2) + + with tm.assertRaisesRegexp(TypeError, 'all inputs must be Index'): + pd.Index(vals1).append([pd.Index(vals2), vals3]) + + # ----- Series ----- # + + # series.append + res = pd.Series(vals1).append(pd.Series(vals2), + ignore_index=True) + exp = pd.Series(exp_data) + tm.assert_series_equal(res, exp, check_index_type=True) + + # concat + res = pd.concat([pd.Series(vals1), pd.Series(vals2)], + ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # 3 elements + res = pd.Series(vals1).append([pd.Series(vals2), pd.Series(vals3)], + ignore_index=True) + exp = pd.Series(exp_data3) + tm.assert_series_equal(res, exp) + + res = pd.concat([pd.Series(vals1), pd.Series(vals2), + pd.Series(vals3)], ignore_index=True) + tm.assert_series_equal(res, exp) + + # name mismatch + s1 = pd.Series(vals1, name='x') + s2 = pd.Series(vals2, name='y') + res = s1.append(s2, ignore_index=True) + exp = pd.Series(exp_data) + tm.assert_series_equal(res, exp, check_index_type=True) + + res = pd.concat([s1, s2], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # name match + s1 = pd.Series(vals1, name='x') + s2 = pd.Series(vals2, name='x') + res = s1.append(s2, ignore_index=True) + exp = pd.Series(exp_data, name='x') + tm.assert_series_equal(res, exp, check_index_type=True) + + res = pd.concat([s1, s2], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # cannot append non-index + msg = "cannot concatenate a non-NDFrame object" + with tm.assertRaisesRegexp(TypeError, msg): + pd.Series(vals1).append(vals2) + + with tm.assertRaisesRegexp(TypeError, msg): + pd.Series(vals1).append([pd.Series(vals2), vals3]) + + with tm.assertRaisesRegexp(TypeError, msg): + pd.concat([pd.Series(vals1), vals2]) + + with tm.assertRaisesRegexp(TypeError, msg): + pd.concat([pd.Series(vals1), pd.Series(vals2), vals3]) + + def test_concatlike_dtypes_coercion(self): + # GH 13660 + for typ1, vals1 in iteritems(self.data): + for typ2, vals2 in iteritems(self.data): + + vals3 = vals2 + + # basically infer + exp_index_dtype = None + exp_series_dtype = None + + if typ1 == typ2: + # same dtype is tested in test_concatlike_same_dtypes + continue + elif typ1 == 'category' or typ2 == 'category': + # ToDo: suspicious + continue + + # specify expected dtype + if typ1 == 'bool' and typ2 in ('int64', 'float64'): + # series coerces to numeric based on numpy rule + # index doesn't because bool is object dtype + exp_series_dtype = typ2 + elif typ2 == 'bool' and typ1 in ('int64', 'float64'): + exp_series_dtype = typ1 + elif (typ1 == 'datetime64[ns, US/Eastern]' or + typ2 == 'datetime64[ns, US/Eastern]' or + typ1 == 'timedelta64[ns]' or + typ2 == 'timedelta64[ns]'): + exp_index_dtype = object + exp_series_dtype = object + + exp_data = vals1 + vals2 + exp_data3 = vals1 + vals2 + vals3 + + # ----- Index ----- # + + # index.append + res = pd.Index(vals1).append(pd.Index(vals2)) + exp = pd.Index(exp_data, dtype=exp_index_dtype) + tm.assert_index_equal(res, exp) + + # 3 elements + res = pd.Index(vals1).append([pd.Index(vals2), + pd.Index(vals3)]) + exp = pd.Index(exp_data3, dtype=exp_index_dtype) + tm.assert_index_equal(res, exp) + + # ----- Series ----- # + + # series.append + res = pd.Series(vals1).append(pd.Series(vals2), + ignore_index=True) + exp = pd.Series(exp_data, dtype=exp_series_dtype) + tm.assert_series_equal(res, exp, check_index_type=True) + + # concat + res = pd.concat([pd.Series(vals1), pd.Series(vals2)], + ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # 3 elements + res = pd.Series(vals1).append([pd.Series(vals2), + pd.Series(vals3)], + ignore_index=True) + exp = pd.Series(exp_data3, dtype=exp_series_dtype) + tm.assert_series_equal(res, exp) + + res = pd.concat([pd.Series(vals1), pd.Series(vals2), + pd.Series(vals3)], ignore_index=True) + tm.assert_series_equal(res, exp) + + def test_concatlike_common_coerce_to_pandas_object(self): + # GH 13626 + # result must be Timestamp/Timedelta, not datetime.datetime/timedelta + dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02']) + tdi = pd.TimedeltaIndex(['1 days', '2 days']) + + exp = pd.Index([pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-01-02'), + pd.Timedelta('1 days'), + pd.Timedelta('2 days')]) + + res = dti.append(tdi) + tm.assert_index_equal(res, exp) + tm.assertIsInstance(res[0], pd.Timestamp) + tm.assertIsInstance(res[-1], pd.Timedelta) + + dts = pd.Series(dti) + tds = pd.Series(tdi) + res = dts.append(tds) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + tm.assertIsInstance(res.iloc[0], pd.Timestamp) + tm.assertIsInstance(res.iloc[-1], pd.Timedelta) + + res = pd.concat([dts, tds]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + tm.assertIsInstance(res.iloc[0], pd.Timestamp) + tm.assertIsInstance(res.iloc[-1], pd.Timedelta) + + def test_concatlike_datetimetz(self): + # GH 7795 + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo']: + dti1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) + dti2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'], tz=tz) + + exp = pd.DatetimeIndex(['2011-01-01', '2011-01-02', + '2012-01-01', '2012-01-02'], tz=tz) + + res = dti1.append(dti2) + tm.assert_index_equal(res, exp) + + dts1 = pd.Series(dti1) + dts2 = pd.Series(dti2) + res = dts1.append(dts2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_datetimetz_short(self): + # GH 7795 + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo', 'EST5EDT']: + + ix1 = pd.DatetimeIndex(start='2014-07-15', end='2014-07-17', + freq='D', tz=tz) + ix2 = pd.DatetimeIndex(['2014-07-11', '2014-07-21'], tz=tz) + df1 = pd.DataFrame(0, index=ix1, columns=['A', 'B']) + df2 = pd.DataFrame(0, index=ix2, columns=['A', 'B']) + + exp_idx = pd.DatetimeIndex(['2014-07-15', '2014-07-16', + '2014-07-17', '2014-07-11', + '2014-07-21'], tz=tz) + exp = pd.DataFrame(0, index=exp_idx, columns=['A', 'B']) + + tm.assert_frame_equal(df1.append(df2), exp) + tm.assert_frame_equal(pd.concat([df1, df2]), exp) + + def test_concatlike_datetimetz_to_object(self): + # GH 13660 + + # different tz coerces to object + for tz in ['UTC', 'US/Eastern', 'Asia/Tokyo']: + dti1 = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], tz=tz) + dti2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02']) + + exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2011-01-02', tz=tz), + pd.Timestamp('2012-01-01'), + pd.Timestamp('2012-01-02')], dtype=object) + + res = dti1.append(dti2) + tm.assert_index_equal(res, exp) + + dts1 = pd.Series(dti1) + dts2 = pd.Series(dti2) + res = dts1.append(dts2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + # different tz + dti3 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'], + tz='US/Pacific') + + exp = pd.Index([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2011-01-02', tz=tz), + pd.Timestamp('2012-01-01', tz='US/Pacific'), + pd.Timestamp('2012-01-02', tz='US/Pacific')], + dtype=object) + + res = dti1.append(dti3) + # tm.assert_index_equal(res, exp) + + dts1 = pd.Series(dti1) + dts3 = pd.Series(dti3) + res = dts1.append(dts3) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts3]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period(self): + # GH 13660 + pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + pi2 = pd.PeriodIndex(['2012-01', '2012-02'], freq='M') + + exp = pd.PeriodIndex(['2011-01', '2011-02', '2012-01', + '2012-02'], freq='M') + + res = pi1.append(pi2) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + ps2 = pd.Series(pi2) + res = ps1.append(ps2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, ps2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period_diff_freq_to_object(self): + # GH 13221 + pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + pi2 = pd.PeriodIndex(['2012-01-01', '2012-02-01'], freq='D') + + exp = pd.Index([pd.Period('2011-01', freq='M'), + pd.Period('2011-02', freq='M'), + pd.Period('2012-01-01', freq='D'), + pd.Period('2012-02-01', freq='D')], dtype=object) + + res = pi1.append(pi2) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + ps2 = pd.Series(pi2) + res = ps1.append(ps2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, ps2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period_mixed_dt_to_object(self): + # GH 13221 + # different datetimelike + pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + tdi = pd.TimedeltaIndex(['1 days', '2 days']) + exp = pd.Index([pd.Period('2011-01', freq='M'), + pd.Period('2011-02', freq='M'), + pd.Timedelta('1 days'), + pd.Timedelta('2 days')], dtype=object) + + res = pi1.append(tdi) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + tds = pd.Series(tdi) + res = ps1.append(tds) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, tds]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + # inverse + exp = pd.Index([pd.Timedelta('1 days'), + pd.Timedelta('2 days'), + pd.Period('2011-01', freq='M'), + pd.Period('2011-02', freq='M')], dtype=object) + + res = tdi.append(pi1) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + tds = pd.Series(tdi) + res = tds.append(ps1) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([tds, ps1]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + class TestAppend(ConcatenateBase): def test_append(self): diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index cda2343fbb842..75c6db23b4bc7 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -895,7 +895,9 @@ def test_crosstab_margins(self): all_cols = result['All', ''] exp_cols = df.groupby(['a']).size().astype('i8') - exp_cols = exp_cols.append(Series([len(df)], index=['All'])) + # to keep index.name + exp_margin = Series([len(df)], index=Index(['All'], name='a')) + exp_cols = exp_cols.append(exp_margin) exp_cols.name = ('All', '') tm.assert_series_equal(all_cols, exp_cols) @@ -1084,7 +1086,6 @@ def test_crosstab_normalize(self): dtype='object'), columns=pd.Index([3, 4, 'All'], name='b')) - tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index', margins=True), row_normal_margins) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns', diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index f0c6e334925c4..45e2a2d6c0720 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -26,6 +26,7 @@ from pandas.core.index import Index from pandas.indexes.base import _index_shared_docs from pandas.util.decorators import Appender, cache_readonly +import pandas.types.concat as _concat import pandas.tseries.frequencies as frequencies import pandas.algos as _algos @@ -795,6 +796,23 @@ def summary(self, name=None): result = result.replace("'", "") return result + def _append_same_dtype(self, to_concat, name): + """ + Concatenate to_concat which has the same class + """ + attribs = self._get_attributes_dict() + attribs['name'] = name + + if not isinstance(self, ABCPeriodIndex): + # reset freq + attribs['freq'] = None + + if getattr(self, 'tz', None) is not None: + return _concat._concat_datetimetz(to_concat, name) + else: + new_data = np.concatenate([c.asi8 for c in to_concat]) + return self._simple_new(new_data, **attribs) + def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index f78574521ffeb..ee0e88b993f55 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1008,36 +1008,6 @@ def union_many(self, others): this.offset = to_offset(this.inferred_freq) return this - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : Index - """ - name = self.name - to_concat = [self] - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - for obj in to_concat: - if isinstance(obj, Index) and obj.name != name: - name = None - break - - to_concat = self._ensure_compat_concat(to_concat) - to_concat, factory = _process_concat_data(to_concat, name) - - return factory(to_concat) - def join(self, other, how='left', level=None, return_indexers=False): """ See Index.join @@ -2180,56 +2150,3 @@ def _use_cached_range(offset, _normalized, start, end): def _time_to_micros(time): seconds = time.hour * 60 * 60 + 60 * time.minute + time.second return 1000000 * seconds + time.microsecond - - -def _process_concat_data(to_concat, name): - klass = Index - kwargs = {} - concat = np.concatenate - - all_dti = True - need_utc_convert = False - has_naive = False - tz = None - - for x in to_concat: - if not isinstance(x, DatetimeIndex): - all_dti = False - else: - if tz is None: - tz = x.tz - - if x.tz is None: - has_naive = True - - if x.tz != tz: - need_utc_convert = True - tz = 'UTC' - - if all_dti: - need_obj_convert = False - if has_naive and tz is not None: - need_obj_convert = True - - if need_obj_convert: - to_concat = [x.asobject.values for x in to_concat] - - else: - if need_utc_convert: - to_concat = [x.tz_convert('UTC').values for x in to_concat] - else: - to_concat = [x.values for x in to_concat] - - # well, technically not a "class" anymore...oh well - klass = DatetimeIndex._simple_new - kwargs = {'tz': tz} - concat = _concat._concat_compat - else: - for i, x in enumerate(to_concat): - if isinstance(x, DatetimeIndex): - to_concat[i] = x.asobject.values - elif isinstance(x, Index): - to_concat[i] = x.values - - factory_func = lambda x: klass(concat(x), name=name, **kwargs) - return to_concat, factory_func diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 7fb0f19b04486..363f2419889d1 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -974,45 +974,6 @@ def _format_native_types(self, na_rep=u('NaT'), date_format=None, values = np.array([formatter(dt) for dt in values]) return values - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : Index - """ - name = self.name - to_concat = [self] - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - for obj in to_concat: - if isinstance(obj, Index) and obj.name != name: - name = None - break - - to_concat = self._ensure_compat_concat(to_concat) - - if isinstance(to_concat[0], PeriodIndex): - if len(set([x.freq for x in to_concat])) > 1: - # box - to_concat = [x.asobject.values for x in to_concat] - else: - cat_values = np.concatenate([x._values for x in to_concat]) - return PeriodIndex(cat_values, freq=self.freq, name=name) - - to_concat = [x._values if isinstance(x, Index) else x - for x in to_concat] - return Index(com._concat_compat(to_concat), name=name) - def __setstate__(self, state): """Necessary for making this object picklable""" diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index a17eda3ac4288..7c7cac83aef53 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -505,34 +505,6 @@ def union(self, other): result.freq = to_offset(result.inferred_freq) return result - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : Index - """ - name = self.name - to_concat = [self] - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - for obj in to_concat: - if isinstance(obj, Index) and obj.name != name: - name = None - break - - to_concat = self._ensure_compat_concat(to_concat) - return Index(_concat._concat_compat(to_concat), name=name) - def join(self, other, how='left', level=None, return_indexers=False): """ See Index.join diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 7ec0d09c20841..a7a015f273320 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1296,27 +1296,59 @@ def test_append_aware(self): tz='US/Eastern') rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='US/Eastern') - ts1 = Series(np.random.randn(len(rng1)), index=rng1) - ts2 = Series(np.random.randn(len(rng2)), index=rng2) + ts1 = Series([1], index=rng1) + ts2 = Series([2], index=rng2) ts_result = ts1.append(ts2) + + exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], + tz='US/Eastern') + exp = Series([1, 2], index=exp_index) + self.assert_series_equal(ts_result, exp) self.assertEqual(ts_result.index.tz, rng1.tz) rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='UTC') - ts1 = Series(np.random.randn(len(rng1)), index=rng1) - ts2 = Series(np.random.randn(len(rng2)), index=rng2) + ts1 = Series([1], index=rng1) + ts2 = Series([2], index=rng2) ts_result = ts1.append(ts2) + + exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], + tz='UTC') + exp = Series([1, 2], index=exp_index) + self.assert_series_equal(ts_result, exp) utc = rng1.tz self.assertEqual(utc, ts_result.index.tz) + # GH 7795 + # different tz coerces to object dtype, not UTC rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='US/Eastern') rng2 = date_range('1/1/2011 02:00', periods=1, freq='H', tz='US/Central') - ts1 = Series(np.random.randn(len(rng1)), index=rng1) - ts2 = Series(np.random.randn(len(rng2)), index=rng2) + ts1 = Series([1], index=rng1) + ts2 = Series([2], index=rng2) ts_result = ts1.append(ts2) - self.assertEqual(utc, ts_result.index.tz) + exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'), + Timestamp('1/1/2011 02:00', tz='US/Central')]) + exp = Series([1, 2], index=exp_index) + self.assert_series_equal(ts_result, exp) + + def test_append_dst(self): + rng1 = date_range('1/1/2016 01:00', periods=3, freq='H', + tz='US/Eastern') + rng2 = date_range('8/1/2016 01:00', periods=3, freq='H', + tz='US/Eastern') + ts1 = Series([1, 2, 3], index=rng1) + ts2 = Series([10, 11, 12], index=rng2) + ts_result = ts1.append(ts2) + + exp_index = DatetimeIndex(['2016-01-01 01:00', '2016-01-01 02:00', + '2016-01-01 03:00', '2016-08-01 01:00', + '2016-08-01 02:00', '2016-08-01 03:00'], + tz='US/Eastern') + exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) + tm.assert_series_equal(ts_result, exp) + self.assertEqual(ts_result.index.tz, rng1.tz) def test_append_aware_naive(self): rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') diff --git a/pandas/types/concat.py b/pandas/types/concat.py index a7fd692cfb9cf..29a0fe7d9f8d0 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -12,11 +12,14 @@ is_datetimetz, is_datetime64_dtype, is_timedelta64_dtype, + is_period_dtype, is_object_dtype, is_bool_dtype, is_dtype_equal, _NS_DTYPE, _TD_DTYPE) +from pandas.types.generic import (ABCDatetimeIndex, ABCTimedeltaIndex, + ABCPeriodIndex) def get_dtype_kinds(l): @@ -39,7 +42,9 @@ def get_dtype_kinds(l): elif is_sparse(arr): typ = 'sparse' elif is_datetimetz(arr): - typ = 'datetimetz' + # if to_concat contains different tz, + # the result must be object dtype + typ = str(arr.dtype) elif is_datetime64_dtype(dtype): typ = 'datetime' elif is_timedelta64_dtype(dtype): @@ -48,6 +53,8 @@ def get_dtype_kinds(l): typ = 'object' elif is_bool_dtype(dtype): typ = 'bool' + elif is_period_dtype(dtype): + typ = str(arr.dtype) else: typ = dtype.kind typs.add(typ) @@ -127,7 +134,10 @@ def is_nonempty(x): typs = get_dtype_kinds(to_concat) # these are mandated to handle empties as well - if 'datetime' in typs or 'datetimetz' in typs or 'timedelta' in typs: + _contains_datetime = any(typ.startswith('datetime') for typ in typs) + _contains_period = any(typ.startswith('period') for typ in typs) + + if _contains_datetime or 'timedelta' in typs or _contains_period: return _concat_datetime(to_concat, axis=axis, typs=typs) elif 'sparse' in typs: @@ -319,12 +329,13 @@ def convert_to_pydatetime(x, axis): x = x.asobject.values else: shape = x.shape - x = tslib.ints_to_pydatetime(x.view(np.int64).ravel()) + x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), + box=True) x = x.reshape(shape) elif x.dtype == _TD_DTYPE: shape = x.shape - x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel()) + x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True) x = x.reshape(shape) if axis == 1: @@ -336,34 +347,71 @@ def convert_to_pydatetime(x, axis): # must be single dtype if len(typs) == 1: + _contains_datetime = any(typ.startswith('datetime') for typ in typs) + _contains_period = any(typ.startswith('period') for typ in typs) - if 'datetimetz' in typs: - # datetime with no tz should be stored as "datetime" in typs, - # thus no need to care - - # we require ALL of the same tz for datetimetz - tzs = set([str(x.tz) for x in to_concat]) - if len(tzs) == 1: - from pandas.tseries.index import DatetimeIndex - new_values = np.concatenate([x.tz_localize(None).asi8 - for x in to_concat]) - return DatetimeIndex(new_values, tz=list(tzs)[0]) + if _contains_datetime: - elif 'datetime' in typs: - new_values = np.concatenate([x.view(np.int64) for x in to_concat], - axis=axis) - return new_values.view(_NS_DTYPE) + if 'datetime' in typs: + new_values = np.concatenate([x.view(np.int64) for x in + to_concat], axis=axis) + return new_values.view(_NS_DTYPE) + else: + # when to_concat has different tz, len(typs) > 1. + # thus no need to care + return _concat_datetimetz(to_concat) elif 'timedelta' in typs: new_values = np.concatenate([x.view(np.int64) for x in to_concat], axis=axis) return new_values.view(_TD_DTYPE) + elif _contains_period: + # PeriodIndex must be handled by PeriodIndex, + # Thus can't meet this condition ATM + # Must be changed when we adding PeriodDtype + raise NotImplementedError + # need to coerce to object to_concat = [convert_to_pydatetime(x, axis) for x in to_concat] return np.concatenate(to_concat, axis=axis) +def _concat_datetimetz(to_concat, name=None): + """ + concat DatetimeIndex with the same tz + all inputs must be DatetimeIndex + it is used in DatetimeIndex.append also + """ + # do not pass tz to set because tzlocal cannot be hashed + if len(set([str(x.dtype) for x in to_concat])) != 1: + raise ValueError('to_concat must have the same tz') + tz = to_concat[0].tz + # no need to localize because internal repr will not be changed + new_values = np.concatenate([x.asi8 for x in to_concat]) + return to_concat[0]._simple_new(new_values, tz=tz, name=name) + + +def _concat_index_asobject(to_concat, name=None): + """ + concat all inputs as object. DatetimeIndex, TimedeltaIndex and + PeriodIndex are converted to object dtype before concatenation + """ + + klasses = ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex + to_concat = [x.asobject if isinstance(x, klasses) else x + for x in to_concat] + + from pandas import Index + self = to_concat[0] + attribs = self._get_attributes_dict() + attribs['name'] = name + + to_concat = [x._values if isinstance(x, Index) else x + for x in to_concat] + return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) + + def _concat_sparse(to_concat, axis=0, typs=None): """ provide concatenation of an sparse/dense array of arrays each of which is a