diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8d6e403783fc9..d3a93a576e55f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -568,7 +568,9 @@ def _get_axes(N, K, index=index, columns=columns): # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type - if dtype is None and is_object_dtype(values): + if dtype is None and is_object_dtype(values) and values.shape[0] == 1: + # only do this inference for single-column DataFrame, otherwise + # create_block_manager_from_blocks will raise a ValueError values = maybe_infer_to_datetimelike(values) return create_block_manager_from_blocks([values], [columns, index]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dde671993a56b..e048306c32e23 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -22,8 +22,9 @@ is_bool_dtype, is_numeric_dtype, is_datetime64_any_dtype, - is_timedelta64_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, is_list_like, is_dict_like, is_re_compilable, @@ -31,10 +32,12 @@ is_object_dtype, is_extension_array_dtype, pandas_dtype) -from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask +from pandas.core.dtypes.cast import ( + maybe_promote, maybe_upcast_putmask, maybe_infer_to_datetimelike) from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame +from pandas.core.dtypes.generic import ( + ABCSeries, ABCPanel, ABCDataFrame, ABCDatetimeIndex) from pandas.core.base import PandasObject, SelectionMixin from pandas.core.index import (Index, MultiIndex, ensure_index, @@ -683,12 +686,20 @@ def transpose(self, *args, **kwargs): new_axes = self._construct_axes_dict_from(self, [self._get_axis(x) for x in axes_names]) - new_values = self.values.transpose(axes_numbers) + values = self.values + if isinstance(values, ABCDatetimeIndex): + # we must case to numpy array otherwise transpose raises ValueError + values = np.array(values.astype(np.object)).reshape(self.shape) + + new_values = values.transpose(axes_numbers) if kwargs.pop('copy', None) or (len(args) and args[-1]): new_values = new_values.copy() nv.validate_transpose_for_generic(self, kwargs) - return self._constructor(new_values, **new_axes).__finalize__(self) + result = self._constructor(new_values, **new_axes) + + result = maybe_restore_dtypes(result, self) + return result.__finalize__(self) def swapaxes(self, axis1, axis2, copy=True): """ @@ -10753,6 +10764,35 @@ def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, return set_function_name(logical_func, name, cls) +def maybe_restore_dtypes(result, orig): + # GH#23730 + if orig.ndim != 2: + return result + + if orig.size == 0: + # ensure both orig.dtypes and result.dtypes have length >= 1 + return result + + if ((result.dtypes == np.object_).all() and + not (orig.dtypes == np.object_).any()): + # the transpose was lossy + if (orig.dtypes == orig.dtypes[0]).all(): + if is_datetime64tz_dtype(orig.dtypes[0]): + tz = orig.dtypes[0].tz + for col in result.columns: + result[col] = maybe_infer_to_datetimelike(result[col]) + if (is_datetime64_dtype(result[col]) and + isna(result[col]).all()): + # all-NaT gets inferred as tz-naive + result[col] = pd.DatetimeIndex(result[col], tz=tz) + + else: + # TODO: consider doing something useful in this case? + pass + + return result + + # install the indexes for _name, _indexer in indexing.get_indexers_list(): NDFrame._create_indexer(_name, _indexer) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c3762d9819153..376271492744d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -15,8 +15,9 @@ find_common_type, infer_dtype_from_scalar, maybe_convert_objects, maybe_promote) from pandas.core.dtypes.common import ( - _NS_DTYPE, is_datetimelike_v_numeric, is_extension_array_dtype, - is_extension_type, is_numeric_v_string_like, is_scalar) + _NS_DTYPE, is_datetime64tz_dtype, is_datetimelike_v_numeric, + is_extension_array_dtype, is_extension_type, is_numeric_v_string_like, + is_scalar) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries from pandas.core.dtypes.missing import isna @@ -773,6 +774,9 @@ def _interleave(self): dtype = dtype.subtype elif is_extension_array_dtype(dtype): dtype = 'object' + elif is_datetime64tz_dtype(dtype): + # TODO: avoid this conversion by allowing 2D DatetimeArray + dtype = 'object' result = np.empty(self.shape, dtype=dtype) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index 2714b68fa6ff4..567d5c236b989 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -163,6 +163,19 @@ def box_df_fail(request): return request.param +@pytest.fixture(params=[(pd.Index, False), + (pd.Series, False), + (pd.DataFrame, False), + (pd.DataFrame, True)], + ids=lambda x: x[0].__name__ + '-' + str(x[1])) +def box_with_transpose(request): + """ + Fixture similar to `box` but testing both transpose cases for DataFrame + """ + # GH#23620 + return request.param + + @pytest.fixture(params=[(pd.Index, False), (pd.Series, False), (pd.DataFrame, False), @@ -178,6 +191,20 @@ def box_transpose_fail(request): return request.param +@pytest.fixture(params=[(pd.Index, False), + (pd.Series, False), + (pd.DataFrame, False), + (pd.DataFrame, True), + (tm.to_array, False)], + ids=id_func) +def box_T_with_array(request): + """ + Like `box`, but specific to datetime64 for also testing pandas Array, + and both transpose cases for DataFrame + """ + return request.param + + @pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func) def box_with_array(request): diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 873c7c92cbaf6..f3c2eb22d35fb 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -650,62 +650,62 @@ class TestDatetime64Arithmetic(object): # Addition/Subtraction of timedelta-like def test_dt64arr_add_timedeltalike_scalar(self, tz_naive_fixture, - two_hours, box_with_array): + two_hours, box_T_with_array): # GH#22005, GH#22163 check DataFrame doesn't raise TypeError + box, transpose = box_T_with_array tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) result = rng + two_hours tm.assert_equal(result, expected) def test_dt64arr_iadd_timedeltalike_scalar(self, tz_naive_fixture, - two_hours, box_with_array): + two_hours, box_T_with_array): + box, transpose = box_T_with_array tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) rng += two_hours tm.assert_equal(rng, expected) def test_dt64arr_sub_timedeltalike_scalar(self, tz_naive_fixture, - two_hours, box_with_array): + two_hours, box_T_with_array): + box, transpose = box_T_with_array tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) result = rng - two_hours tm.assert_equal(result, expected) def test_dt64arr_isub_timedeltalike_scalar(self, tz_naive_fixture, - two_hours, box_with_array): + two_hours, box_T_with_array): + box, transpose = box_T_with_array tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) rng -= two_hours tm.assert_equal(rng, expected) @@ -735,18 +735,18 @@ def test_dt64arr_add_td64_scalar(self, box_with_array): result = np.timedelta64(5, 'ms') + dtarr tm.assert_equal(result, expected) - def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture): + def test_dt64arr_add_sub_td64_nat(self, box_T_with_array, + tz_naive_fixture): # GH#23320 special handling for timedelta64("NaT") + box, transpose = box_T_with_array tz = tz_naive_fixture dti = pd.date_range("1994-04-01", periods=9, tz=tz, freq="QS") other = np.timedelta64("NaT") expected = pd.DatetimeIndex(["NaT"] * 9, tz=tz) - # FIXME: fails with transpose=True due to tz-aware DataFrame - # transpose bug - obj = tm.box_expected(dti, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + obj = tm.box_expected(dti, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) result = obj + other tm.assert_equal(result, expected) @@ -759,9 +759,6 @@ def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture): def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array): - if box_with_array is pd.DataFrame: - pytest.xfail("FIXME: ValueError with transpose; " - "alignment error without") tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) @@ -823,39 +820,40 @@ def test_dt64arr_sub_datetime64_not_ns(self, box_with_array): result = dt64 - dtarr tm.assert_equal(result, -expected) - def test_dt64arr_sub_timestamp(self, box_with_array): + def test_dt64arr_sub_timestamp(self, box_T_with_array): + box, transpose = box_T_with_array + ser = pd.date_range('2014-03-17', periods=2, freq='D', tz='US/Eastern') ts = ser[0] - # FIXME: transpose raises ValueError - ser = tm.box_expected(ser, box_with_array, transpose=False) + ser = tm.box_expected(ser, box, transpose=transpose) delta_series = pd.Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) - expected = tm.box_expected(delta_series, box_with_array, - transpose=False) + expected = tm.box_expected(delta_series, box, transpose=transpose) tm.assert_equal(ser - ts, expected) tm.assert_equal(ts - ser, -expected) - def test_dt64arr_sub_NaT(self, box_with_array): + def test_dt64arr_sub_NaT(self, box_T_with_array): # GH#18808 + box, transpose = box_T_with_array + dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) - ser = tm.box_expected(dti, box_with_array, transpose=False) + ser = tm.box_expected(dti, box, transpose=transpose) result = ser - pd.NaT expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') - # FIXME: raises ValueError with transpose - expected = tm.box_expected(expected, box_with_array, transpose=False) + expected = tm.box_expected(expected, box, transpose=transpose) tm.assert_equal(result, expected) dti_tz = dti.tz_localize('Asia/Tokyo') - ser_tz = tm.box_expected(dti_tz, box_with_array, transpose=False) + ser_tz = tm.box_expected(dti_tz, box, transpose=transpose) result = ser_tz - pd.NaT expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') - expected = tm.box_expected(expected, box_with_array, transpose=False) + expected = tm.box_expected(expected, box, transpose=transpose) tm.assert_equal(result, expected) # ------------------------------------------------------------- @@ -875,9 +873,6 @@ def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): def test_dt64arr_aware_sub_dt64ndarray_raises(self, tz_aware_fixture, box_with_array): - if box_with_array is pd.DataFrame: - pytest.xfail("FIXME: ValueError with transpose; " - "alignment error without") tz = tz_aware_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) @@ -895,9 +890,6 @@ def test_dt64arr_aware_sub_dt64ndarray_raises(self, tz_aware_fixture, def test_dt64arr_add_dt64ndarray_raises(self, tz_naive_fixture, box_with_array): - if box_with_array is pd.DataFrame: - pytest.xfail("FIXME: ValueError with transpose; " - "alignment error without") tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) @@ -1030,8 +1022,9 @@ def test_dt64arr_add_sub_tick_DateOffset_smoke(self, cls_name, offset_cls(5) + ser ser - offset_cls(5) - def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): + def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_T_with_array): # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype + box, transpose = box_T_with_array tz = tz_aware_fixture if tz == 'US/Pacific': dates = date_range('2012-11-01', periods=3, tz=tz) @@ -1043,9 +1036,8 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', '2010-11-01 07:00'], freq='H', tz=tz) - # FIXME: these raise ValueError with transpose=True - dates = tm.box_expected(dates, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + dates = tm.box_expected(dates, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) # TODO: parametrize over the scalar being added? radd? sub? offset = dates + pd.offsets.Hour(5) @@ -2074,45 +2066,45 @@ def test_dt64_with_DateOffsets(box_with_array, normalize, cls_and_kwargs): offset - vec -def test_datetime64_with_DateOffset(box_with_array): +def test_datetime64_with_DateOffset(box_T_with_array): # GH#10699 - if box_with_array is tm.to_array: + box, transpose = box_T_with_array + if box is tm.to_array: pytest.xfail("DateOffset.apply_index uses _shallow_copy") s = date_range('2000-01-01', '2000-01-31', name='a') - s = tm.box_expected(s, box_with_array) + s = tm.box_expected(s, box, transpose=transpose) result = s + pd.DateOffset(years=1) result2 = pd.DateOffset(years=1) + s exp = date_range('2001-01-01', '2001-01-31', name='a') - exp = tm.box_expected(exp, box_with_array) + exp = tm.box_expected(exp, box, transpose=transpose) tm.assert_equal(result, exp) tm.assert_equal(result2, exp) result = s - pd.DateOffset(years=1) exp = date_range('1999-01-01', '1999-01-31', name='a') - exp = tm.box_expected(exp, box_with_array) + exp = tm.box_expected(exp, box, transpose=transpose) tm.assert_equal(result, exp) s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), Timestamp('2000-02-15', tz='US/Central')], name='a') - # FIXME: ValueError with tzaware DataFrame transpose - s = tm.box_expected(s, box_with_array, transpose=False) + s = tm.box_expected(s, box, transpose=transpose) result = s + pd.offsets.Day() result2 = pd.offsets.Day() + s exp = DatetimeIndex([Timestamp('2000-01-16 00:15:00', tz='US/Central'), Timestamp('2000-02-16', tz='US/Central')], name='a') - exp = tm.box_expected(exp, box_with_array, transpose=False) + exp = tm.box_expected(exp, box, transpose=transpose) tm.assert_equal(result, exp) tm.assert_equal(result2, exp) s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), Timestamp('2000-02-15', tz='US/Central')], name='a') - s = tm.box_expected(s, box_with_array, transpose=False) + s = tm.box_expected(s, box, transpose=transpose) result = s + pd.offsets.MonthEnd() result2 = pd.offsets.MonthEnd() + s exp = DatetimeIndex([Timestamp('2000-01-31 00:15:00', tz='US/Central'), Timestamp('2000-02-29', tz='US/Central')], name='a') - exp = tm.box_expected(exp, box_with_array, transpose=False) + exp = tm.box_expected(exp, box, transpose=transpose) tm.assert_equal(result, exp) tm.assert_equal(result2, exp) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 07c48554c65b8..de0cfdf6a5460 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -415,19 +415,19 @@ def test_td64arr_sub_timestamp_raises(self, box_with_array): with pytest.raises(TypeError, match=msg): idx - Timestamp('2011-01-01') - def test_td64arr_add_timestamp(self, box_with_array, tz_naive_fixture): + def test_td64arr_add_timestamp(self, box_T_with_array, tz_naive_fixture): # GH#23215 # TODO: parametrize over scalar datetime types? + box, transpose = box_T_with_array + tz = tz_naive_fixture other = Timestamp('2011-01-01', tz=tz) idx = TimedeltaIndex(['1 day', '2 day']) expected = DatetimeIndex(['2011-01-02', '2011-01-03'], tz=tz) - # FIXME: fails with transpose=True because of tz-aware DataFrame - # transpose bug - idx = tm.box_expected(idx, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + idx = tm.box_expected(idx, box, transpose=transpose) + expected = tm.box_expected(expected, box, transpose=transpose) result = idx + other tm.assert_equal(result, expected) @@ -438,8 +438,7 @@ def test_td64arr_add_timestamp(self, box_with_array, tz_naive_fixture): def test_td64arr_add_sub_timestamp(self, box_with_array): # GH#11925 if box_with_array is tm.to_array: - pytest.xfail("DatetimeArray.__sub__ returns ndarray instead " - "of TimedeltaArray") + pytest.xfail("Timestamp doesnt recognize TimedeltaArray _typ") ts = Timestamp('2012-01-01') # TODO: parametrize over types of datetime scalar? diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index a53b01466c7a4..beb046bd6bd06 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -939,3 +939,55 @@ def test_unstack_fill_frame_object(): index=list('xyz') ) assert_frame_equal(result, expected) + + +def test_transpose_dt64tz(): + # GH#23730 transposing a DataFrame with a single datetime64tz column should + # not raise ValueError + + dti = pd.date_range('1977-04-15', periods=3, freq='MS', tz='US/Hawaii') + + # For reasons unknown this error shows up differently depending on how the + # DataFrame was constructed, so we do this several different ways. + + df1 = dti.to_series(keep_tz=True).to_frame() + df2 = pd.DataFrame(dti, index=dti) + df3 = pd.Series(dti, index=dti).to_frame() + + tm.assert_frame_equal(df1, df2) + tm.assert_frame_equal(df2, df3) + + for frame in [df1, df2, df3]: + frame.T + tm.assert_frame_equal(frame.T.T, frame) + + # Now going the other direction, we have to manually construct the + # transposed dataframe + df = pd.DataFrame(np.arange(9).reshape(3, 3)) + df[0] = dti[0] + df[1] = dti[1] + df[2] = dti[2] + + df.T + tm.assert_frame_equal(df.T.T, df) + + +def test_transpose_dt64tz_mixed_tz(): + # GH#23730 transposing two datetimetz columns with different tzs + dti = pd.date_range('1977-04-15', periods=3, freq='MS', tz='US/Hawaii') + dti2 = pd.date_range('1977-04-15', periods=3, freq='MS', tz='UTC') + + df = pd.DataFrame({"A": dti, "B": dti2}, columns=["A", "B"]) + df.T + tm.assert_frame_equal(df.T.T, df.astype(object)) + + +def test_transpose_dt64tz_mixed(): + # GH#23730 transposing with datetimetz column and numeric column, + # did not raise before but covering our bases + + dti = pd.date_range('1977-04-15', periods=3, freq='MS', tz='US/Hawaii') + df = pd.DataFrame({"A": dti, "B": [3, 4, 5]}, columns=["A", "B"]) + + df.T + tm.assert_frame_equal(df.T.T, df.astype(object))