From 8e2bbee1fbae0cc57ec1d13ec7e41a924ab0ad8b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 20 Aug 2019 17:56:57 -0700 Subject: [PATCH 1/4] BUG: retain extension dtypes in transpose --- pandas/core/generic.py | 13 ++++++++++++- pandas/tests/arithmetic/test_datetime64.py | 12 ++++-------- pandas/tests/arithmetic/test_period.py | 20 ++++++++------------ pandas/tests/frame/test_operators.py | 16 ++++++++++++++++ 4 files changed, 40 insertions(+), 21 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fac5e0f085fc6..cccf8215de196 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -725,7 +725,18 @@ def transpose(self, *args, **kwargs): new_values = new_values.copy() nv.validate_transpose(tuple(), kwargs) - return self._constructor(new_values, **new_axes).__finalize__(self) + result = self._constructor(new_values, **new_axes).__finalize__(self) + + if len(self.columns) and (self.dtypes == self.dtypes.iloc[0]).all(): + # FIXME: self.dtypes[0] can fail in tests + if is_extension_array_dtype(self.dtypes.iloc[0]): + # Retain ExtensionArray dtypes through transpose; + # TODO: this can be made cleaner if/when (N, 1) EA are allowed + dtype = self.dtypes[0] + for col in result.columns: + result[col] = result[col].astype(dtype) + + return result def swapaxes(self, axis1, axis2, copy=True): """ diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 5931cd93cc8c5..a7e43bfa25819 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -153,12 +153,10 @@ def test_dt64arr_nat_comparison(self, tz_naive_fixture, box_with_array): ts = pd.Timestamp.now(tz) ser = pd.Series([ts, pd.NaT]) - # FIXME: Can't transpose because that loses the tz dtype on - # the NaT column - obj = tm.box_expected(ser, box, transpose=False) + obj = tm.box_expected(ser, box) expected = pd.Series([True, False], dtype=np.bool_) - expected = tm.box_expected(expected, xbox, transpose=False) + expected = tm.box_expected(expected, xbox) result = obj == ts tm.assert_equal(result, expected) @@ -879,10 +877,8 @@ def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture): other = np.timedelta64("NaT") expected = pd.DatetimeIndex(["NaT"] * 9, tz=tz) - # FIXME: fails with transpose=True due to tz-aware DataFrame - # transpose bug - obj = tm.box_expected(dti, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + obj = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) result = obj + other tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index ed693d873efb8..f0019f58f0432 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -755,18 +755,16 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_offset_n_gt1(self, box_transpose_fail): + def test_pi_add_offset_n_gt1(self, box): # GH#23215 # add offset to PeriodIndex with freq.n > 1 - box, transpose = box_transpose_fail - per = pd.Period("2016-01", freq="2M") pi = pd.PeriodIndex([per]) expected = pd.PeriodIndex(["2016-03"], freq="2M") - pi = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + pi = tm.box_expected(pi, box) + expected = tm.box_expected(expected, box) result = pi + per.freq tm.assert_equal(result, expected) @@ -780,9 +778,8 @@ def test_pi_add_offset_n_gt1_not_divisible(self, box_with_array): pi = pd.PeriodIndex(["2016-01"], freq="2M") expected = pd.PeriodIndex(["2016-04"], freq="2M") - # FIXME: with transposing these tests fail - pi = tm.box_expected(pi, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + pi = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) result = pi + to_offset("3M") tm.assert_equal(result, expected) @@ -984,16 +981,15 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): with pytest.raises(IncompatibleFrequency, match=msg): rng -= other - def test_parr_add_sub_td64_nat(self, box_transpose_fail): + def test_parr_add_sub_td64_nat(self, box): # GH#23320 special handling for timedelta64("NaT") - box, transpose = box_transpose_fail pi = pd.period_range("1994-04-01", periods=9, freq="19D") other = np.timedelta64("NaT") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") - obj = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + obj = tm.box_expected(pi, box) + expected = tm.box_expected(expected, box) result = obj + other tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index bffdf17a49750..bfee6a18f51b8 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -844,6 +844,22 @@ def test_no_warning(self, all_arithmetic_operators): class TestTranspose: + @pytest.mark.parametrize( + "ser", + [ + pd.date_range("2016-04-05 04:30", periods=3, tz="UTC"), + pd.period_range("1994", freq="A", periods=3), + pd.period_range("1969", freq="9s", periods=1), + pd.date_range("2016-04-05 04:30", periods=3).astype("category"), + pd.date_range("2016-04-05 04:30", periods=3, tz="UTC").astype("category"), + ], + ) + def test_transpose_retains_extension_dtype(self, ser): + # case with more than 1 column, must have same dtype + df = pd.DataFrame({"a": ser, "b": ser}) + result = df.T + assert (result.dtypes == ser.dtype).all() + def test_transpose_tzaware_1col_single_tz(self): # GH#26825 dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") From fa775bde83b0c8727a0c1746150bdd2f4381ef4e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 26 Aug 2019 17:31:28 -0700 Subject: [PATCH 2/4] use is_homogeneous_type --- pandas/core/generic.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cccf8215de196..7898d375cc376 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -727,12 +727,11 @@ def transpose(self, *args, **kwargs): nv.validate_transpose(tuple(), kwargs) result = self._constructor(new_values, **new_axes).__finalize__(self) - if len(self.columns) and (self.dtypes == self.dtypes.iloc[0]).all(): - # FIXME: self.dtypes[0] can fail in tests + if self.ndim == 2 and self._is_homogeneous_type and len(self.columns): if is_extension_array_dtype(self.dtypes.iloc[0]): # Retain ExtensionArray dtypes through transpose; # TODO: this can be made cleaner if/when (N, 1) EA are allowed - dtype = self.dtypes[0] + dtype = self.dtypes.iloc[0] for col in result.columns: result[col] = result[col].astype(dtype) From 29c245309c748c12d647b2afb4792e8a8c9abe3c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Sep 2019 12:35:06 -0700 Subject: [PATCH 3/4] single assignment --- pandas/core/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8be56264a742c..28a20895e5f18 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -732,8 +732,7 @@ def transpose(self, *args, **kwargs): # Retain ExtensionArray dtypes through transpose; # TODO: this can be made cleaner if/when (N, 1) EA are allowed dtype = self.dtypes.iloc[0] - for col in result.columns: - result[col] = result[col].astype(dtype) + result = result.astype(dtype) return result From 80c4ee92ad4e834727a3abab8ddef29c9d4b46fb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 28 Nov 2019 12:57:24 -0800 Subject: [PATCH 4/4] un-xfail fixed test --- pandas/tests/arithmetic/test_period.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index f0019f58f0432..30992747a11f5 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1007,10 +1007,8 @@ def test_parr_add_sub_td64_nat(self, box): TimedeltaArray._from_sequence(["NaT"] * 9), ], ) - def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other): - # FIXME: DataFrame fails because when when operating column-wise - # timedelta64 entries become NaT and are treated like datetimes - box = box_df_fail + def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): + box = box_with_array pi = pd.period_range("1994-04-01", periods=9, freq="19D") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")