From d80afa0e32d0ca74b1d60d27d61b3a22de55147a Mon Sep 17 00:00:00 2001 From: Jake Zimmer Date: Fri, 30 Nov 2018 17:35:50 -0500 Subject: [PATCH 01/21] BUG: Fix concat series loss of timezone --- pandas/core/dtypes/concat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index f482f7e1927b7..b7bb3421579dc 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -193,7 +193,8 @@ def _concat_categorical(to_concat, axis=0): def _concat_asobject(to_concat): to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x).ravel() for x in to_concat] + else np.asarray(x).ravel() if not is_datetimetz(x) + else np.asarray(x.astype(object)) for x in to_concat] res = _concat_compat(to_concat) if axis == 1: return res.reshape(1, len(res)) From 159c4e62c74a5b3da4ad275c54befb4046bcf48a Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Sat, 1 Dec 2018 11:22:13 -0500 Subject: [PATCH 02/21] Fixed naming error for is_datetimetz since this function is no longer being imported in concat.py --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index f648b996c11d9..25221e3a9eb2a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -193,7 +193,7 @@ def _concat_categorical(to_concat, axis=0): def _concat_asobject(to_concat): to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x).ravel() if not is_datetimetz(x) + else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) else np.asarray(x.astype(object)) for x in to_concat] res = _concat_compat(to_concat) if axis == 1: From 2450097327cc22c88cd53066f9a359aa74b4f925 Mon Sep 17 00:00:00 2001 From: Jake Zimmer Date: Sat, 1 Dec 2018 12:26:20 -0500 Subject: [PATCH 03/21] Attempted to use _concat_compat to rectify the timezone bug --- pandas/core/dtypes/concat.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b7bb3421579dc..e0745edb399ae 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -193,7 +193,6 @@ def _concat_categorical(to_concat, axis=0): def _concat_asobject(to_concat): to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x).ravel() if not is_datetimetz(x) else np.asarray(x.astype(object)) for x in to_concat] res = _concat_compat(to_concat) if axis == 1: @@ -213,7 +212,7 @@ def _concat_asobject(to_concat): # when all categories are identical first = to_concat[0] if all(first.is_dtype_equal(other) for other in to_concat[1:]): - return union_categoricals(categoricals) + return _concat_compat(categoricals) return _concat_asobject(to_concat) From 7f9dd5219ab7ea2e425b71ba82542d4ce4a82344 Mon Sep 17 00:00:00 2001 From: Jake Zimmer Date: Sat, 1 Dec 2018 12:57:20 -0500 Subject: [PATCH 04/21] Attempt to fix tz error with concat compat instead of union --- pandas/core/dtypes/concat.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 0eafb2a71f509..599e5ef4b5001 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, is_bool_dtype, is_categorical_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, is_dtype_equal, + is_datetime64_dtype, is_datetimetz, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_interval_dtype, is_object_dtype, is_period_dtype, is_sparse, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( @@ -212,7 +212,9 @@ def _concat_asobject(to_concat): # when all categories are identical first = to_concat[0] if all(first.is_dtype_equal(other) for other in to_concat[1:]): - return _concat_compat(categoricals) + return (union_categoricals(categoricals) + if not is_datetime64_dtype(categoricals[0]) + else _concat_compat(categoricals)) return _concat_asobject(to_concat) @@ -373,8 +375,10 @@ def _maybe_unwrap(x): if sort_categories: categories = categories.sort_values() - new_codes = [_recode_for_categories(c.codes, c.categories, categories) - for c in to_union] + new_codes = [] + for c in to_union: + new_codes.append(_recode_for_categories(c.codes, c.categories, + categories)) new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message From 6cb202277015f92cdc630363c0cd272ce3a49512 Mon Sep 17 00:00:00 2001 From: Jake Zimmer Date: Sat, 1 Dec 2018 13:57:33 -0500 Subject: [PATCH 05/21] changing behavior to be based on tz --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 599e5ef4b5001..b7ad3cc1459b1 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -213,7 +213,7 @@ def _concat_asobject(to_concat): first = to_concat[0] if all(first.is_dtype_equal(other) for other in to_concat[1:]): return (union_categoricals(categoricals) - if not is_datetime64_dtype(categoricals[0]) + if not is_datetime64tz_dtype(categoricals[0]) else _concat_compat(categoricals)) return _concat_asobject(to_concat) From a4da44973728eeb3742734654e500bcd225839d6 Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Sat, 1 Dec 2018 21:18:10 -0500 Subject: [PATCH 06/21] Attempting to fix differing dimensions bug --- pandas/core/dtypes/concat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b7ad3cc1459b1..21d7ff0ba46ee 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -192,8 +192,6 @@ def _concat_categorical(to_concat, axis=0): """ def _concat_asobject(to_concat): - to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x.astype(object)) for x in to_concat] res = _concat_compat(to_concat) if axis == 1: return res.reshape(1, len(res)) From fe83e6d597e0a8c2c958a8ecba1f423b9b8403d2 Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Sat, 1 Dec 2018 22:19:35 -0500 Subject: [PATCH 07/21] Another attempt to fix dimensions bug --- pandas/core/dtypes/concat.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 21d7ff0ba46ee..3d6a69cf31f4e 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, is_bool_dtype, is_categorical_dtype, - is_datetime64_dtype, is_datetimetz, is_datetime64tz_dtype, is_dtype_equal, + is_datetime64_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_interval_dtype, is_object_dtype, is_period_dtype, is_sparse, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( @@ -192,6 +192,7 @@ def _concat_categorical(to_concat, axis=0): """ def _concat_asobject(to_concat): + to_concat = [x.get_values() for x in to_concat] res = _concat_compat(to_concat) if axis == 1: return res.reshape(1, len(res)) @@ -210,9 +211,7 @@ def _concat_asobject(to_concat): # when all categories are identical first = to_concat[0] if all(first.is_dtype_equal(other) for other in to_concat[1:]): - return (union_categoricals(categoricals) - if not is_datetime64tz_dtype(categoricals[0]) - else _concat_compat(categoricals)) + return union_categoricals(categoricals) return _concat_asobject(to_concat) @@ -373,10 +372,8 @@ def _maybe_unwrap(x): if sort_categories: categories = categories.sort_values() - new_codes = [] - for c in to_union: - new_codes.append(_recode_for_categories(c.codes, c.categories, - categories)) + new_codes = [_recode_for_categories(c.codes, c.categories, categories) + for c in to_union] new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message From 2cbb53381473c5577a47c706d74c366fd1de85c8 Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Sat, 1 Dec 2018 22:49:11 -0500 Subject: [PATCH 08/21] Just trying to test different versions here --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 3d6a69cf31f4e..75eb1684df90d 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -192,7 +192,7 @@ def _concat_categorical(to_concat, axis=0): """ def _concat_asobject(to_concat): - to_concat = [x.get_values() for x in to_concat] + to_concat = [np.asarray(x.astype(object)) for x in to_concat] res = _concat_compat(to_concat) if axis == 1: return res.reshape(1, len(res)) From f527dccbb4380586d86b9485f5552c9bd284f83e Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Sun, 2 Dec 2018 10:25:30 -0500 Subject: [PATCH 09/21] Trying to fix dimensions bug now that Travis CI is passing but others aren't --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 75eb1684df90d..6bd2a1ce483d4 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -211,7 +211,7 @@ def _concat_asobject(to_concat): # when all categories are identical first = to_concat[0] if all(first.is_dtype_equal(other) for other in to_concat[1:]): - return union_categoricals(categoricals) + return _concat_compat(categoricals) return _concat_asobject(to_concat) From 583ce4958aef67094e9cefeac50e2acd97f20963 Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Sun, 2 Dec 2018 11:31:24 -0500 Subject: [PATCH 10/21] tests failed so changing it back to when travis ci succeeded --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 6bd2a1ce483d4..75eb1684df90d 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -211,7 +211,7 @@ def _concat_asobject(to_concat): # when all categories are identical first = to_concat[0] if all(first.is_dtype_equal(other) for other in to_concat[1:]): - return _concat_compat(categoricals) + return union_categoricals(categoricals) return _concat_asobject(to_concat) From 01a2c10b654d56875c3f21e5d9cf5a6e0b09ceed Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Sun, 2 Dec 2018 17:49:37 -0500 Subject: [PATCH 11/21] Changing it back because we're trying to figure out if concat_compat actually takes care of the if checks --- pandas/core/dtypes/concat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 75eb1684df90d..25221e3a9eb2a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -192,7 +192,9 @@ def _concat_categorical(to_concat, axis=0): """ def _concat_asobject(to_concat): - to_concat = [np.asarray(x.astype(object)) for x in to_concat] + to_concat = [x.get_values() if is_categorical_dtype(x.dtype) + else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) + else np.asarray(x.astype(object)) for x in to_concat] res = _concat_compat(to_concat) if axis == 1: return res.reshape(1, len(res)) From 683dccf9c92040d68397f13e0d4b4d59a9531ce0 Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Mon, 3 Dec 2018 00:15:07 -0500 Subject: [PATCH 12/21] Reverting back to version when all tests passed --- pandas/core/dtypes/concat.py | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 25221e3a9eb2a..6ed0bb9065a71 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -23,7 +23,6 @@ def get_dtype_kinds(l): Parameters ---------- l : list of arrays - Returns ------- a set of kinds that exist in this list of arrays @@ -107,12 +106,10 @@ def _concat_compat(to_concat, axis=0): 'normalized' dtypes (in that for example, if it's object, then it is a non-datetimelike and provide a combined dtype for the resulting array that preserves the overall dtype if possible) - Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation - Returns ------- a single array, preserving the combined dtypes @@ -177,14 +174,12 @@ def is_nonempty(x): def _concat_categorical(to_concat, axis=0): """Concatenate an object/categorical array of arrays, each of which is a single dtype - Parameters ---------- to_concat : array of arrays axis : int Axis to provide concatenation in the current implementation this is always 0, e.g. we only have 1D categoricals - Returns ------- Categorical @@ -222,9 +217,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. - .. versionadded:: 0.19.0 - Parameters ---------- to_union : list-like of Categorical, CategoricalIndex, @@ -235,13 +228,10 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): ignore_order : boolean, default False If true, the ordered attribute of the Categoricals will be ignored. Results in an unordered categorical. - .. versionadded:: 0.20.0 - Returns ------- result : Categorical - Raises ------ TypeError @@ -251,69 +241,52 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed - Notes ----- - To learn more about categories, see `link `__ - Examples -------- - >>> from pandas.api.types import union_categoricals - If you want to combine categoricals that do not necessarily have the same categories, `union_categoricals` will combine a list-like of categoricals. The new categories will be the union of the categories being combined. - >>> a = pd.Categorical(["b", "c"]) >>> b = pd.Categorical(["a", "b"]) >>> union_categoricals([a, b]) [b, c, a, b] Categories (3, object): [b, c, a] - By default, the resulting categories will be ordered as they appear in the `categories` of the data. If you want the categories to be lexsorted, use `sort_categories=True` argument. - >>> union_categoricals([a, b], sort_categories=True) [b, c, a, b] Categories (3, object): [a, b, c] - `union_categoricals` also works with the case of combining two categoricals of the same categories and order information (e.g. what you could also `append` for). - >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "a"], ordered=True) >>> union_categoricals([a, b]) [a, b, a, b, a] Categories (2, object): [a < b] - Raises `TypeError` because the categories are ordered and not identical. - >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "c"], ordered=True) >>> union_categoricals([a, b]) TypeError: to union ordered Categoricals, all categories must be the same - New in version 0.20.0 - Ordered categoricals with different categories or orderings can be combined by using the `ignore_ordered=True` argument. - >>> a = pd.Categorical(["a", "b", "c"], ordered=True) >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> union_categoricals([a, b], ignore_order=True) [a, b, c, c, b, a] Categories (3, object): [a, b, c] - `union_categoricals` also works with a `CategoricalIndex`, or `Series` containing categorical data, but note that the resulting array will always be a plain `Categorical` - >>> a = pd.Series(["b", "c"], dtype='category') >>> b = pd.Series(["a", "b"], dtype='category') >>> union_categoricals([a, b]) @@ -404,13 +377,11 @@ def _concat_datetime(to_concat, axis=0, typs=None): """ provide concatenation of an datetimelike array of arrays each of which is a single M8[ns], datetimet64[ns, tz] or m8[ns] dtype - Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes - Returns ------- a single array, preserving the combined dtypes @@ -510,13 +481,11 @@ def _concat_sparse(to_concat, axis=0, typs=None): """ provide concatenation of an sparse/dense array of arrays each of which is a single dtype - Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes - Returns ------- a single array, preserving the combined dtypes @@ -580,4 +549,4 @@ def _concat_rangeindex_same_dtype(indexes): # Here all "indexes" had 0 length, i.e. were empty. # In this case return an empty range index. - return RangeIndex(0, 0) + return RangeIndex(0, 0) \ No newline at end of file From 857c6beeeabd1f16e68de6f81875341fed5d5d25 Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Mon, 3 Dec 2018 15:48:35 -0500 Subject: [PATCH 13/21] Restored blank lines --- pandas/core/dtypes/concat.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 6ed0bb9065a71..d5b4fa044fb2a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -23,6 +23,7 @@ def get_dtype_kinds(l): Parameters ---------- l : list of arrays + Returns ------- a set of kinds that exist in this list of arrays @@ -106,10 +107,12 @@ def _concat_compat(to_concat, axis=0): 'normalized' dtypes (in that for example, if it's object, then it is a non-datetimelike and provide a combined dtype for the resulting array that preserves the overall dtype if possible) + Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation + Returns ------- a single array, preserving the combined dtypes @@ -174,12 +177,14 @@ def is_nonempty(x): def _concat_categorical(to_concat, axis=0): """Concatenate an object/categorical array of arrays, each of which is a single dtype + Parameters ---------- to_concat : array of arrays axis : int Axis to provide concatenation in the current implementation this is always 0, e.g. we only have 1D categoricals + Returns ------- Categorical @@ -217,7 +222,9 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): """ Combine list-like of Categorical-like, unioning categories. All categories must have the same dtype. + .. versionadded:: 0.19.0 + Parameters ---------- to_union : list-like of Categorical, CategoricalIndex, @@ -228,10 +235,13 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): ignore_order : boolean, default False If true, the ordered attribute of the Categoricals will be ignored. Results in an unordered categorical. + .. versionadded:: 0.20.0 + Returns ------- result : Categorical + Raises ------ TypeError @@ -241,52 +251,69 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): - sort_categories=True and Categoricals are ordered ValueError Empty list of categoricals passed + Notes ----- + To learn more about categories, see `link `__ + Examples -------- + >>> from pandas.api.types import union_categoricals + If you want to combine categoricals that do not necessarily have the same categories, `union_categoricals` will combine a list-like of categoricals. The new categories will be the union of the categories being combined. + >>> a = pd.Categorical(["b", "c"]) >>> b = pd.Categorical(["a", "b"]) >>> union_categoricals([a, b]) [b, c, a, b] Categories (3, object): [b, c, a] + By default, the resulting categories will be ordered as they appear in the `categories` of the data. If you want the categories to be lexsorted, use `sort_categories=True` argument. + >>> union_categoricals([a, b], sort_categories=True) [b, c, a, b] Categories (3, object): [a, b, c] + `union_categoricals` also works with the case of combining two categoricals of the same categories and order information (e.g. what you could also `append` for). + >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "a"], ordered=True) >>> union_categoricals([a, b]) [a, b, a, b, a] Categories (2, object): [a < b] + Raises `TypeError` because the categories are ordered and not identical. + >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "c"], ordered=True) >>> union_categoricals([a, b]) TypeError: to union ordered Categoricals, all categories must be the same + New in version 0.20.0 + Ordered categoricals with different categories or orderings can be combined by using the `ignore_ordered=True` argument. + >>> a = pd.Categorical(["a", "b", "c"], ordered=True) >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> union_categoricals([a, b], ignore_order=True) [a, b, c, c, b, a] Categories (3, object): [a, b, c] + `union_categoricals` also works with a `CategoricalIndex`, or `Series` containing categorical data, but note that the resulting array will always be a plain `Categorical` + >>> a = pd.Series(["b", "c"], dtype='category') >>> b = pd.Series(["a", "b"], dtype='category') >>> union_categoricals([a, b]) @@ -377,11 +404,13 @@ def _concat_datetime(to_concat, axis=0, typs=None): """ provide concatenation of an datetimelike array of arrays each of which is a single M8[ns], datetimet64[ns, tz] or m8[ns] dtype + Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes + Returns ------- a single array, preserving the combined dtypes @@ -481,11 +510,13 @@ def _concat_sparse(to_concat, axis=0, typs=None): """ provide concatenation of an sparse/dense array of arrays each of which is a single dtype + Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes + Returns ------- a single array, preserving the combined dtypes From 64da4c0bf6dca7dd33b3feded8550847376c353f Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Mon, 3 Dec 2018 16:45:30 -0500 Subject: [PATCH 14/21] Added test case for the new tz output --- pandas/tests/reshape/test_concat.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 4113fb7f0f11e..13388853dcb6f 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2552,3 +2552,15 @@ def test_concat_series_name_npscalar_tuple(s1name, s2name): result = pd.concat([s1, s2]) expected = pd.Series({'a': 1, 'b': 2, 'c': 5, 'd': 6}) tm.assert_series_equal(result, expected) + +def test_concat_categorical_tz(): + # GH-23816 + a = pd.Series(pd.date_range('2017-01-01', periods=2, tz='US/Pacific')) + b = pd.Series(['a', 'b'], dtype='category') + result = pd.concat([a, b], ignore_index=True) + expected = pd.Series([ + pd.Timestamp('2017-01-01', tz="US/Pacific"), + pd.Timestamp('2017-01-02', tz="US/Pacific"), + 'a', 'b' + ]) + tm.assert_series_equal(result, expected) From 9e699e42b6bc01a2d689d1e3a23a3dbcb79163fe Mon Sep 17 00:00:00 2001 From: Evangeline Liu Date: Mon, 3 Dec 2018 18:23:08 -0500 Subject: [PATCH 15/21] Fixed style issues --- pandas/core/dtypes/concat.py | 5 +++-- pandas/tests/reshape/test_concat.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index d5b4fa044fb2a..f3c4af8e7a678 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -516,7 +516,7 @@ def _concat_sparse(to_concat, axis=0, typs=None): to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes - + Returns ------- a single array, preserving the combined dtypes @@ -580,4 +580,5 @@ def _concat_rangeindex_same_dtype(indexes): # Here all "indexes" had 0 length, i.e. were empty. # In this case return an empty range index. - return RangeIndex(0, 0) \ No newline at end of file + return RangeIndex(0, 0) + \ No newline at end of file diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 13388853dcb6f..bb002f151b455 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2553,6 +2553,7 @@ def test_concat_series_name_npscalar_tuple(s1name, s2name): expected = pd.Series({'a': 1, 'b': 2, 'c': 5, 'd': 6}) tm.assert_series_equal(result, expected) + def test_concat_categorical_tz(): # GH-23816 a = pd.Series(pd.date_range('2017-01-01', periods=2, tz='US/Pacific')) From 64182c537062415359c508622b5edfb86fd023ef Mon Sep 17 00:00:00 2001 From: Jake Zimmer Date: Mon, 3 Dec 2018 18:56:31 -0500 Subject: [PATCH 16/21] Fixed the whitespace issue in linting --- pandas/core/dtypes/concat.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index f3c4af8e7a678..25221e3a9eb2a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -257,7 +257,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): To learn more about categories, see `link `__ - + Examples -------- @@ -581,4 +581,3 @@ def _concat_rangeindex_same_dtype(indexes): # Here all "indexes" had 0 length, i.e. were empty. # In this case return an empty range index. return RangeIndex(0, 0) - \ No newline at end of file From c7dcdb445506a0a00fbd9363d6551b0a44d5c259 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 3 Dec 2018 22:37:42 -0500 Subject: [PATCH 17/21] fix up --- pandas/core/dtypes/concat.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 25221e3a9eb2a..0df0c01dbd47a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -191,16 +191,6 @@ def _concat_categorical(to_concat, axis=0): A single array, preserving the combined dtypes """ - def _concat_asobject(to_concat): - to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) - else np.asarray(x.astype(object)) for x in to_concat] - res = _concat_compat(to_concat) - if axis == 1: - return res.reshape(1, len(res)) - else: - return res - # we could have object blocks and categoricals here # if we only have a single categoricals then combine everything # else its a non-compat categorical @@ -215,7 +205,14 @@ def _concat_asobject(to_concat): if all(first.is_dtype_equal(other) for other in to_concat[1:]): return union_categoricals(categoricals) - return _concat_asobject(to_concat) + # extract the categoricals & coerce to object if needed + to_concat = [x.get_values() if is_categorical_dtype(x.dtype) + else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) + else np.asarray(x.astype(object)) for x in to_concat] + result = _concat_compat(to_concat) + if axis == 1: + result = result.reshape(1, len(result)) + return result def union_categoricals(to_union, sort_categories=False, ignore_order=False): From 165689e863a0f1700796da8265209745904cb51e Mon Sep 17 00:00:00 2001 From: Jake Zimmer Date: Tue, 4 Dec 2018 14:08:30 -0500 Subject: [PATCH 18/21] updated whatsnew (v0.24.0) to reflect changes --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 93ac9caa42e3e..c9587600a87d3 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1541,6 +1541,7 @@ Reshaping - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) - Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`) - Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`) +- Bug in :func:`pandas.concat` when joining series datetimetz with series category would lose timezone (:issue:`23816`) .. _whatsnew_0240.bug_fixes.sparse: From 634c7365c710e1540de84b29ca35542d06820b10 Mon Sep 17 00:00:00 2001 From: jakezimmer Date: Wed, 5 Dec 2018 02:14:13 -0500 Subject: [PATCH 19/21] no changes since @jreback's fix up commit attempting to rerun the tests --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 0df0c01dbd47a..d0a9334882cef 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -207,7 +207,7 @@ def _concat_categorical(to_concat, axis=0): # extract the categoricals & coerce to object if needed to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) + else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) else np.asarray(x.astype(object)) for x in to_concat] result = _concat_compat(to_concat) if axis == 1: From 0b86ef91c522ca084e122b1ed26fd33beeccee95 Mon Sep 17 00:00:00 2001 From: jakezimmer Date: Wed, 5 Dec 2018 13:36:03 -0500 Subject: [PATCH 20/21] Update v0.24.0.rst added double backticks on Series --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 4581a455c8c0e..e6a08362a70f7 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1545,7 +1545,7 @@ Reshaping - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) - Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`) - Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`) -- Bug in :func:`pandas.concat` when joining series datetimetz with series category would lose timezone (:issue:`23816`) +- Bug in :func:`pandas.concat` when joining ``Series`` datetimetz with ``Series`` category would lose timezone (:issue:`23816`) - Bug in :meth:`DataFrame.join` when joining on partial MultiIndex would drop names (:issue:`20452`). .. _whatsnew_0240.bug_fixes.sparse: From 1867b3ad6514a1de18cb5ce7fdfe2cbbb95f57c6 Mon Sep 17 00:00:00 2001 From: jakezimmer Date: Wed, 5 Dec 2018 17:33:37 -0500 Subject: [PATCH 21/21] removed trailing whitespace --- pandas/core/dtypes/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index d0a9334882cef..0df0c01dbd47a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -207,7 +207,7 @@ def _concat_categorical(to_concat, axis=0): # extract the categoricals & coerce to object if needed to_concat = [x.get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) + else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) else np.asarray(x.astype(object)) for x in to_concat] result = _concat_compat(to_concat) if axis == 1: