From c9130f8be5a5c3c0c151546b1b81c1d1782e4e54 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Jun 2019 21:04:40 -0700 Subject: [PATCH 01/16] BUG: fix tzaware dataframe transpose bug --- pandas/core/groupby/generic.py | 27 ++++--- pandas/core/internals/construction.py | 24 +++++- pandas/tests/arithmetic/test_datetime64.py | 93 ++++++++-------------- pandas/tests/frame/test_constructors.py | 66 +++++++++++++++ pandas/tests/frame/test_dtypes.py | 6 +- pandas/tests/groupby/test_function.py | 6 +- pandas/tests/groupby/test_groupby.py | 10 ++- 7 files changed, 154 insertions(+), 78 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 57d14cb4c15d7..19da750d6d053 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -288,7 +288,6 @@ def _decide_output_index(self, output, labels): def _wrap_applied_output(self, keys, values, not_indexed_same=False): from pandas.core.index import _all_indexes_same - from pandas.core.tools.numeric import to_numeric if len(keys) == 0: return DataFrame(index=keys) @@ -360,7 +359,6 @@ def first_not_none(values): # provide a reduction (Frame -> Series) if groups are # unique if self.squeeze: - # assign the name to this series if singular_series: values[0].name = keys[0] @@ -434,15 +432,22 @@ def first_not_none(values): # if we have date/time like in the original, then coerce dates # as we are stacking can easily have object dtypes here so = self._selected_obj - if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()): - result = result.apply( - lambda x: to_numeric(x, errors='ignore')) - date_cols = self._selected_obj.select_dtypes( - include=['datetime', 'timedelta']).columns - date_cols = date_cols.intersection(result.columns) - result[date_cols] = (result[date_cols] - ._convert(datetime=True, - coerce=True)) + if so.ndim == 2 and so.dtypes.apply(is_datetimelike).any(): + ocols = [idx for idx in range(len(result.columns)) + if result.dtypes[idx] == object] + for cidx in ocols: + # TODO: get maybe_convert_objects working here + # TODO: should we use skipna=True? + cvals = result.iloc[:, cidx] + exdtype = lib.infer_dtype(cvals.values, + skipna=False) + if exdtype == 'integer': + result.iloc[:, cidx] = cvals.astype(int) + if exdtype in ['float', 'mixed-integer-float']: + result.iloc[:, cidx] = cvals.astype(float) + if exdtype == 'datetime': + # TODO: what about z-aware? + result.iloc[:, cidx] = cvals.astype('M8[ns]') else: result = result._convert(datetime=True) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 863b9f7fb16d7..cebeb025e1cdd 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -160,7 +160,29 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values): - values = maybe_infer_to_datetimelike(values) + + if values.ndim == 2 and values.shape[0] != 1: + # kludge to transpose and separate blocks + # unnecessary if we ever allow 2D DatetimeArray + + dvals_list = [maybe_infer_to_datetimelike(values[n, :]) + for n in range(len(values))] + for n in range(len(dvals_list)): + if isinstance(dvals_list[n], np.ndarray): + dvals_list[n] = dvals_list[n].reshape(1, -1) + + from pandas.core.internals.blocks import make_block + + # TODO: What about re-joining object columns? + bdvals = [make_block(dvals_list[n], placement=[n]) + for n in range(len(dvals_list))] + return create_block_manager_from_blocks(bdvals, + [columns, index]) + + else: + dvals = maybe_infer_to_datetimelike(values) + + values = dvals return create_block_manager_from_blocks([values], [columns, index]) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index afd29852fea7e..cc9940a2c5524 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -438,8 +438,7 @@ def test_dti_cmp_null_scalar_inequality(self, tz_naive_fixture, other, # GH#19301 tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=2, tz=tz) - # FIXME: ValueError with transpose - dtarr = tm.box_expected(dti, box_with_array, transpose=False) + dtarr = tm.box_expected(dti, box_with_array) msg = 'Invalid comparison between' with pytest.raises(TypeError, match=msg): dtarr < other @@ -592,15 +591,15 @@ def test_comparison_tzawareness_compat(self, op, box_with_array): dr = pd.date_range('2016-01-01', periods=6) dz = dr.tz_localize('US/Pacific') - # FIXME: ValueError with transpose - dr = tm.box_expected(dr, box_with_array, transpose=False) - dz = tm.box_expected(dz, box_with_array, transpose=False) + dr = tm.box_expected(dr, box_with_array) + dz = tm.box_expected(dz, box_with_array) msg = 'Cannot compare tz-naive and tz-aware' with pytest.raises(TypeError, match=msg): op(dr, dz) if box_with_array is not pd.DataFrame: - # DataFrame op is invalid until transpose bug is fixed + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities with pytest.raises(TypeError, match=msg): op(dr, list(dz)) with pytest.raises(TypeError, match=msg): @@ -609,7 +608,8 @@ def test_comparison_tzawareness_compat(self, op, box_with_array): with pytest.raises(TypeError, match=msg): op(dz, dr) if box_with_array is not pd.DataFrame: - # DataFrame op is invalid until transpose bug is fixed + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities with pytest.raises(TypeError, match=msg): op(dz, list(dr)) with pytest.raises(TypeError, match=msg): @@ -620,8 +620,8 @@ def test_comparison_tzawareness_compat(self, op, box_with_array): assert_all(dr == dr) assert_all(dz == dz) if box_with_array is not pd.DataFrame: - # DataFrame doesn't align the lists correctly unless we transpose, - # which we cannot do at the moment + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities assert (dr == list(dr)).all() assert (dz == list(dz)).all() @@ -652,8 +652,7 @@ def test_scalar_comparison_tzawareness(self, op, other, tz_aware_fixture, tz = tz_aware_fixture dti = pd.date_range('2016-01-01', periods=2, tz=tz) - # FIXME: ValueError with transpose - dtarr = tm.box_expected(dti, box_with_array, transpose=False) + dtarr = tm.box_expected(dti, box_with_array) msg = 'Cannot compare tz-naive and tz-aware' with pytest.raises(TypeError, match=msg): op(dtarr, other) @@ -715,17 +714,16 @@ def test_dt64arr_cmp_scalar_invalid(self, other, tz_naive_fixture, xbox = box_with_array if box_with_array is not pd.Index else np.ndarray rng = date_range('1/1/2000', periods=10, tz=tz) - # FIXME: ValueError with transpose - rng = tm.box_expected(rng, box_with_array, transpose=False) + rng = tm.box_expected(rng, box_with_array) result = rng == other expected = np.array([False] * 10) - expected = tm.box_expected(expected, xbox, transpose=False) + expected = tm.box_expected(expected, xbox) tm.assert_equal(result, expected) result = rng != other expected = np.array([True] * 10) - expected = tm.box_expected(expected, xbox, transpose=False) + expected = tm.box_expected(expected, xbox) tm.assert_equal(result, expected) msg = 'Invalid comparison between' with pytest.raises(TypeError, match=msg): @@ -816,9 +814,8 @@ def test_dt64arr_add_timedeltalike_scalar(self, tz_naive_fixture, expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) result = rng + two_hours tm.assert_equal(result, expected) @@ -831,9 +828,8 @@ def test_dt64arr_iadd_timedeltalike_scalar(self, tz_naive_fixture, expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) rng += two_hours tm.assert_equal(rng, expected) @@ -846,9 +842,8 @@ def test_dt64arr_sub_timedeltalike_scalar(self, tz_naive_fixture, expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) result = rng - two_hours tm.assert_equal(result, expected) @@ -861,9 +856,8 @@ def test_dt64arr_isub_timedeltalike_scalar(self, tz_naive_fixture, expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) - # FIXME: calling with transpose=True raises ValueError - rng = tm.box_expected(rng, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) rng -= two_hours tm.assert_equal(rng, expected) @@ -918,9 +912,6 @@ def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture): def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array): - if box_with_array is pd.DataFrame: - pytest.xfail("FIXME: ValueError with transpose; " - "alignment error without") tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) @@ -942,7 +933,7 @@ def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, result = dtarr - tdarr tm.assert_equal(result, expected) - msg = 'cannot subtract' + msg = 'cannot subtract|bad operand type for unary -' with pytest.raises(TypeError, match=msg): tdarr - dtarr @@ -987,13 +978,11 @@ def test_dt64arr_sub_timestamp(self, box_with_array): tz='US/Eastern') ts = ser[0] - # FIXME: transpose raises ValueError - ser = tm.box_expected(ser, box_with_array, transpose=False) + ser = tm.box_expected(ser, box_with_array) delta_series = pd.Series([np.timedelta64(0, 'D'), np.timedelta64(1, 'D')]) - expected = tm.box_expected(delta_series, box_with_array, - transpose=False) + expected = tm.box_expected(delta_series, box_with_array) tm.assert_equal(ser - ts, expected) tm.assert_equal(ts - ser, -expected) @@ -1001,20 +990,19 @@ def test_dt64arr_sub_timestamp(self, box_with_array): def test_dt64arr_sub_NaT(self, box_with_array): # GH#18808 dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) - ser = tm.box_expected(dti, box_with_array, transpose=False) + ser = tm.box_expected(dti, box_with_array) result = ser - pd.NaT expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') - # FIXME: raises ValueError with transpose - expected = tm.box_expected(expected, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array) tm.assert_equal(result, expected) dti_tz = dti.tz_localize('Asia/Tokyo') - ser_tz = tm.box_expected(dti_tz, box_with_array, transpose=False) + ser_tz = tm.box_expected(dti_tz, box_with_array) result = ser_tz - pd.NaT expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]') - expected = tm.box_expected(expected, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array) tm.assert_equal(result, expected) # ------------------------------------------------------------- @@ -1034,16 +1022,13 @@ def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): def test_dt64arr_aware_sub_dt64ndarray_raises(self, tz_aware_fixture, box_with_array): - if box_with_array is pd.DataFrame: - pytest.xfail("FIXME: ValueError with transpose; " - "alignment error without") tz = tz_aware_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) dt64vals = dti.values dtarr = tm.box_expected(dti, box_with_array) - msg = 'DatetimeArray subtraction must have the same timezones or' + msg = 'subtraction must have the same timezones or' with pytest.raises(TypeError, match=msg): dtarr - dt64vals with pytest.raises(TypeError, match=msg): @@ -1054,9 +1039,6 @@ def test_dt64arr_aware_sub_dt64ndarray_raises(self, tz_aware_fixture, def test_dt64arr_add_dt64ndarray_raises(self, tz_naive_fixture, box_with_array): - if box_with_array is pd.DataFrame: - pytest.xfail("FIXME: ValueError with transpose; " - "alignment error without") tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) @@ -1204,9 +1186,8 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): expected = DatetimeIndex(['2010-11-01 05:00', '2010-11-01 06:00', '2010-11-01 07:00'], freq='H', tz=tz) - # FIXME: these raise ValueError with transpose=True - dates = tm.box_expected(dates, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + dates = tm.box_expected(dates, box_with_array) + expected = tm.box_expected(expected, box_with_array) # TODO: parametrize over the scalar being added? radd? sub? offset = dates + pd.offsets.Hour(5) @@ -1359,26 +1340,25 @@ def test_dt64arr_add_sub_DateOffset(self, box_with_array): s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), Timestamp('2000-02-15', tz='US/Central')], name='a') - # FIXME: ValueError with tzaware DataFrame transpose - s = tm.box_expected(s, box_with_array, transpose=False) + s = tm.box_expected(s, box_with_array) result = s + pd.offsets.Day() result2 = pd.offsets.Day() + s exp = DatetimeIndex([Timestamp('2000-01-16 00:15:00', tz='US/Central'), Timestamp('2000-02-16', tz='US/Central')], name='a') - exp = tm.box_expected(exp, box_with_array, transpose=False) + exp = tm.box_expected(exp, box_with_array) tm.assert_equal(result, exp) tm.assert_equal(result2, exp) s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), Timestamp('2000-02-15', tz='US/Central')], name='a') - s = tm.box_expected(s, box_with_array, transpose=False) + s = tm.box_expected(s, box_with_array) result = s + pd.offsets.MonthEnd() result2 = pd.offsets.MonthEnd() + s exp = DatetimeIndex([Timestamp('2000-01-31 00:15:00', tz='US/Central'), Timestamp('2000-02-29', tz='US/Central')], name='a') - exp = tm.box_expected(exp, box_with_array, transpose=False) + exp = tm.box_expected(exp, box_with_array) tm.assert_equal(result, exp) tm.assert_equal(result2, exp) @@ -1415,9 +1395,6 @@ def test_dt64arr_add_mixed_offset_array(self, box_with_array): def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array): # GH#18849 - if box_with_array is pd.DataFrame: - pytest.xfail("FIXME: ValueError with transpose; " - "alignment error without") tz = tz_naive_fixture dti = pd.date_range('2017-01-01', periods=2, tz=tz) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 68017786eb6a6..1040a21cd248f 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2399,3 +2399,69 @@ def test_nested_dict_construction(self): index=pd.Index([2001, 2002, 2003]) ) tm.assert_frame_equal(result, expected) + + def test_from_tzaware_object_array(self): + # 2D object array of tzaware timestamps should not raise + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + data = dti._data.astype(object).reshape(1, -1) + df = pd.DataFrame(data) + assert df.shape == (1, 3) + assert (df.dtypes == dti.dtype).all() + assert (df == dti).all().all() + + def test_from_tzaware_mixed_object_array(self): + arr = np.array([ + [Timestamp('2013-01-01 00:00:00'), + Timestamp('2013-01-02 00:00:00'), + Timestamp('2013-01-03 00:00:00')], + [Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'), + pd.NaT, + Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern')], + [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), + pd.NaT, + Timestamp('2013-01-03 00:00:00+0100', tz='CET')]], + dtype=object).T + res = DataFrame(arr, columns=['A', 'B', 'C']) + + expected_dtypes = ['datetime64[ns]', + 'datetime64[ns, US/Eastern]', + 'datetime64[ns, CET]'] + assert (res.dtypes == expected_dtypes).all() + + +class TestTranspose: + # FIXME: belongs somewhere else, but im not sure where + def test_transpose_tzaware_1col_single_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + + df = pd.DataFrame(dti) + assert (df.dtypes == dti.dtype).all() + res = df.T + assert (res.dtypes == dti.dtype).all() + + def test_transpose_tzaware_2col_single_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + + df3 = pd.DataFrame({'A': dti, 'B': dti}) + assert (df3.dtypes == dti.dtype).all() + res3 = df3.T + assert (res3.dtypes == dti.dtype).all() + + def test_transpose_tzaware_2col_mixed_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + dti2 = dti.tz_convert('US/Pacific') + + df4 = pd.DataFrame({'A': dti, 'B': dti2}) + assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() + assert (df4.T.dtypes == object).all() + tm.assert_frame_equal(df4.T.T, df4) + + def test_transpose_object_to_tzaware_mixed_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + dti2 = dti.tz_convert('US/Pacific') + + # mixed all-tzaware dtypes + df2 = pd.DataFrame([dti, dti2]) + assert (df2.dtypes == object).all() + res2 = df2.T + assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 96cf70483d4e7..7ed601e4f7046 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -978,9 +978,11 @@ def test_astype(self): Timestamp('2013-01-03 00:00:00+0100', tz='CET')]], dtype=object).T + expected = DataFrame(expected, + index=self.tzframe.index, + columns=self.tzframe.columns, dtype=object) result = self.tzframe.astype(object) - assert_frame_equal(result, DataFrame( - expected, index=self.tzframe.index, columns=self.tzframe.columns)) + assert_frame_equal(result, expected) result = self.tzframe.astype('datetime64[ns]') expected = DataFrame({'A': date_range('20130101', periods=3), diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 3d9bfcd126377..14f27f0c4c7d8 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -963,12 +963,14 @@ def test_count(): df['9th'] = df['9th'].astype('category') - for key in '1st', '2nd', ['1st', '2nd']: + for key in ['1st', '2nd', ['1st', '2nd']]: left = df.groupby(key).count() right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1) tm.assert_frame_equal(left, right) - # GH5610 + +def test_count_non_nulls(): + # GH#5610 # count counts non-nulls df = pd.DataFrame([[1, 2, 'foo'], [1, np.nan, 'bar'], diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 87b57b0609b36..dcd0d3938c6a5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -129,12 +129,14 @@ def func(dataf): result = df.groupby('X', squeeze=False).count() assert isinstance(result, DataFrame) + +def test_inconsistent_return_type(): # GH5592 - # inconcistent return type + # inconsistent return type df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', - 'Pony', 'Pony'], B=Series( - np.arange(7), dtype='int64'), C=date_range( - '20130101', periods=7))) + 'Pony', 'Pony'], + B=Series(np.arange(7), dtype='int64'), + C=date_range('20130101', periods=7))) def f(grp): return grp.iloc[0] From 908465af45b34504bbcd296fadbb7c776518ec8f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Jun 2019 21:08:50 -0700 Subject: [PATCH 02/16] move TestTranspose --- pandas/tests/frame/test_constructors.py | 1 - pandas/tests/frame/test_operators.py | 37 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1040a21cd248f..5abf66210b2ee 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2430,7 +2430,6 @@ def test_from_tzaware_mixed_object_array(self): class TestTranspose: - # FIXME: belongs somewhere else, but im not sure where def test_transpose_tzaware_1col_single_tz(self): dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index f1c8445bf98e0..55643f37ab3cf 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -793,3 +793,40 @@ def test_no_warning(self, all_arithmetic_operators): b = df['B'] with tm.assert_produces_warning(None): getattr(df, all_arithmetic_operators)(b, 0) + + +class TestTranspose: + def test_transpose_tzaware_1col_single_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + + df = pd.DataFrame(dti) + assert (df.dtypes == dti.dtype).all() + res = df.T + assert (res.dtypes == dti.dtype).all() + + def test_transpose_tzaware_2col_single_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + + df3 = pd.DataFrame({'A': dti, 'B': dti}) + assert (df3.dtypes == dti.dtype).all() + res3 = df3.T + assert (res3.dtypes == dti.dtype).all() + + def test_transpose_tzaware_2col_mixed_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + dti2 = dti.tz_convert('US/Pacific') + + df4 = pd.DataFrame({'A': dti, 'B': dti2}) + assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() + assert (df4.T.dtypes == object).all() + tm.assert_frame_equal(df4.T.T, df4) + + def test_transpose_object_to_tzaware_mixed_tz(self): + dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') + dti2 = dti.tz_convert('US/Pacific') + + # mixed all-tzaware dtypes + df2 = pd.DataFrame([dti, dti2]) + assert (df2.dtypes == object).all() + res2 = df2.T + assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() From 2b89d3565d56936644e27dbf0b8b444c1ae658c1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Jun 2019 21:19:01 -0700 Subject: [PATCH 03/16] actually save --- pandas/tests/frame/test_constructors.py | 37 ------------------------- 1 file changed, 37 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 5abf66210b2ee..4f6ecf14e4d7f 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2427,40 +2427,3 @@ def test_from_tzaware_mixed_object_array(self): 'datetime64[ns, US/Eastern]', 'datetime64[ns, CET]'] assert (res.dtypes == expected_dtypes).all() - - -class TestTranspose: - def test_transpose_tzaware_1col_single_tz(self): - dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') - - df = pd.DataFrame(dti) - assert (df.dtypes == dti.dtype).all() - res = df.T - assert (res.dtypes == dti.dtype).all() - - def test_transpose_tzaware_2col_single_tz(self): - dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') - - df3 = pd.DataFrame({'A': dti, 'B': dti}) - assert (df3.dtypes == dti.dtype).all() - res3 = df3.T - assert (res3.dtypes == dti.dtype).all() - - def test_transpose_tzaware_2col_mixed_tz(self): - dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') - dti2 = dti.tz_convert('US/Pacific') - - df4 = pd.DataFrame({'A': dti, 'B': dti2}) - assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() - assert (df4.T.dtypes == object).all() - tm.assert_frame_equal(df4.T.T, df4) - - def test_transpose_object_to_tzaware_mixed_tz(self): - dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') - dti2 = dti.tz_convert('US/Pacific') - - # mixed all-tzaware dtypes - df2 = pd.DataFrame([dti, dti2]) - assert (df2.dtypes == object).all() - res2 = df2.T - assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() From f5759e6d46f5dd67d9ba0c655955011ae67b8c22 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Jun 2019 07:15:38 -0700 Subject: [PATCH 04/16] troubleshoot windows fails --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 19da750d6d053..9bf5b3db1786b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -442,9 +442,9 @@ def first_not_none(values): exdtype = lib.infer_dtype(cvals.values, skipna=False) if exdtype == 'integer': - result.iloc[:, cidx] = cvals.astype(int) + result.iloc[:, cidx] = cvals.astype(np.int64) if exdtype in ['float', 'mixed-integer-float']: - result.iloc[:, cidx] = cvals.astype(float) + result.iloc[:, cidx] = cvals.astype(np.float64) if exdtype == 'datetime': # TODO: what about z-aware? result.iloc[:, cidx] = cvals.astype('M8[ns]') From 3419983deca2a55947732b2e5b1d52452aeedf4f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Jun 2019 07:20:20 -0700 Subject: [PATCH 05/16] Fix one more FIXME --- pandas/tests/arithmetic/test_timedelta64.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index ead9876e7c2a8..3ab3f51b4e46a 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -857,10 +857,8 @@ def test_td64arr_add_timestamp(self, box_with_array, tz_naive_fixture): idx = TimedeltaIndex(['1 day', '2 day']) expected = DatetimeIndex(['2011-01-02', '2011-01-03'], tz=tz) - # FIXME: fails with transpose=True because of tz-aware DataFrame - # transpose bug - idx = tm.box_expected(idx, box_with_array, transpose=False) - expected = tm.box_expected(expected, box_with_array, transpose=False) + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) result = idx + other tm.assert_equal(result, expected) From 528015eedd5ef5bcab8bb54039bdc023514660f9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Jun 2019 09:34:30 -0700 Subject: [PATCH 06/16] separate out _recast_datetimelike_Result --- pandas/core/groupby/generic.py | 49 +++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9bf5b3db1786b..3b86ec982cde5 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -433,21 +433,7 @@ def first_not_none(values): # as we are stacking can easily have object dtypes here so = self._selected_obj if so.ndim == 2 and so.dtypes.apply(is_datetimelike).any(): - ocols = [idx for idx in range(len(result.columns)) - if result.dtypes[idx] == object] - for cidx in ocols: - # TODO: get maybe_convert_objects working here - # TODO: should we use skipna=True? - cvals = result.iloc[:, cidx] - exdtype = lib.infer_dtype(cvals.values, - skipna=False) - if exdtype == 'integer': - result.iloc[:, cidx] = cvals.astype(np.int64) - if exdtype in ['float', 'mixed-integer-float']: - result.iloc[:, cidx] = cvals.astype(np.float64) - if exdtype == 'datetime': - # TODO: what about z-aware? - result.iloc[:, cidx] = cvals.astype('M8[ns]') + _recast_datetimelike_result(result) else: result = result._convert(datetime=True) @@ -1669,3 +1655,36 @@ def _normalize_keyword_aggregation(kwargs): order.append((column, com.get_callable_name(aggfunc) or aggfunc)) return aggspec, columns, order + + +def _recast_datetimelike_result(result): + """ + If we have date/time like in the original, then coerce dates + as we are stacking can easily have object dtypes here. + + Parameters + ---------- + result : DataFrame + + Notes + ----- + - Assumes Groupby._selected_obj has ndim==2 and at least one + datetimelike column + - Modifies `result` inplace + """ + ocols = [idx for idx in range(len(result.columns)) + if result.dtypes[idx] == object] + + for cidx in ocols: + # TODO: get maybe_convert_objects working here + cvals = result.iloc[:, cidx] + + # TODO: should we use skipna=True? + exdtype = lib.infer_dtype(cvals.values, skipna=False) + if exdtype == 'integer': + result.iloc[:, cidx] = cvals.astype(np.int64) + if exdtype in ['float', 'mixed-integer-float']: + result.iloc[:, cidx] = cvals.astype(np.float64) + if exdtype == 'datetime': + # TODO: what about z-aware? + result.iloc[:, cidx] = cvals.astype('M8[ns]') From 508f8aef99ddd47afe22d8039ad8570f7868b744 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Jun 2019 09:36:11 -0700 Subject: [PATCH 07/16] Add GH references to tests --- pandas/tests/frame/test_constructors.py | 3 ++- pandas/tests/frame/test_operators.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 4f6ecf14e4d7f..c44698eab86f4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2401,7 +2401,7 @@ def test_nested_dict_construction(self): tm.assert_frame_equal(result, expected) def test_from_tzaware_object_array(self): - # 2D object array of tzaware timestamps should not raise + # GH#26825 2D object array of tzaware timestamps should not raise dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') data = dti._data.astype(object).reshape(1, -1) df = pd.DataFrame(data) @@ -2410,6 +2410,7 @@ def test_from_tzaware_object_array(self): assert (df == dti).all().all() def test_from_tzaware_mixed_object_array(self): + # GH#26825 arr = np.array([ [Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 55643f37ab3cf..1e932879e9ad0 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -797,6 +797,7 @@ def test_no_warning(self, all_arithmetic_operators): class TestTranspose: def test_transpose_tzaware_1col_single_tz(self): + # GH#26825 dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') df = pd.DataFrame(dti) @@ -805,6 +806,7 @@ def test_transpose_tzaware_1col_single_tz(self): assert (res.dtypes == dti.dtype).all() def test_transpose_tzaware_2col_single_tz(self): + # GH#26825 dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') df3 = pd.DataFrame({'A': dti, 'B': dti}) @@ -813,6 +815,7 @@ def test_transpose_tzaware_2col_single_tz(self): assert (res3.dtypes == dti.dtype).all() def test_transpose_tzaware_2col_mixed_tz(self): + # GH#26825 dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') dti2 = dti.tz_convert('US/Pacific') @@ -822,6 +825,7 @@ def test_transpose_tzaware_2col_mixed_tz(self): tm.assert_frame_equal(df4.T.T, df4) def test_transpose_object_to_tzaware_mixed_tz(self): + # GH#26825 dti = pd.date_range('2016-04-05 04:30', periods=3, tz='UTC') dti2 = dti.tz_convert('US/Pacific') From c64d31f383040e5875074e2e933e9a89c2af236b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Jun 2019 09:39:22 -0700 Subject: [PATCH 08/16] add whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 76ee21b4c9a50..52bcf2e26e165 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -720,6 +720,7 @@ Reshaping - Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`) - Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`) - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) +- Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`) Sparse ^^^^^^ @@ -735,6 +736,7 @@ Other - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`) - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). - Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions (:issue:`26125`) +- Bug in :class:`DataFrame` where passing an object array of timezone-aware `datetime` objects would incorrectly raise ``ValueError`` (:issue:`13287`) .. _whatsnew_0.250.contributors: From 6bd1a0a73038b1861432bc77baea04bc16746155 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Jun 2019 13:00:36 -0700 Subject: [PATCH 09/16] annotation, typo fixup --- pandas/core/groupby/generic.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3b86ec982cde5..c81225c0bf4e5 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -24,7 +24,8 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_bool, is_datetimelike, - is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar) + is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_object_dtype, + is_scalar) from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algorithms @@ -1657,7 +1658,7 @@ def _normalize_keyword_aggregation(kwargs): return aggspec, columns, order -def _recast_datetimelike_result(result): +def _recast_datetimelike_result(result: DataFrame): """ If we have date/time like in the original, then coerce dates as we are stacking can easily have object dtypes here. @@ -1673,7 +1674,7 @@ def _recast_datetimelike_result(result): - Modifies `result` inplace """ ocols = [idx for idx in range(len(result.columns)) - if result.dtypes[idx] == object] + if is_object_dtype(result.dtypes[idx])] for cidx in ocols: # TODO: get maybe_convert_objects working here @@ -1686,5 +1687,5 @@ def _recast_datetimelike_result(result): if exdtype in ['float', 'mixed-integer-float']: result.iloc[:, cidx] = cvals.astype(np.float64) if exdtype == 'datetime': - # TODO: what about z-aware? + # TODO: what about tz-aware? result.iloc[:, cidx] = cvals.astype('M8[ns]') From baacaaf90bfb4e0bd58852ddba925e9d28c6191e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 13 Jun 2019 13:34:02 -0700 Subject: [PATCH 10/16] dont alter inplace --- pandas/core/groupby/generic.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c81225c0bf4e5..33f19ae0ae8e8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -434,7 +434,7 @@ def first_not_none(values): # as we are stacking can easily have object dtypes here so = self._selected_obj if so.ndim == 2 and so.dtypes.apply(is_datetimelike).any(): - _recast_datetimelike_result(result) + result = _recast_datetimelike_result(result) else: result = result._convert(datetime=True) @@ -1658,7 +1658,7 @@ def _normalize_keyword_aggregation(kwargs): return aggspec, columns, order -def _recast_datetimelike_result(result: DataFrame): +def _recast_datetimelike_result(result: DataFrame) -> DataFrame: """ If we have date/time like in the original, then coerce dates as we are stacking can easily have object dtypes here. @@ -1667,12 +1667,17 @@ def _recast_datetimelike_result(result: DataFrame): ---------- result : DataFrame + Returns + ------- + DataFrame + Notes ----- - Assumes Groupby._selected_obj has ndim==2 and at least one datetimelike column - - Modifies `result` inplace """ + result = result.copy() + ocols = [idx for idx in range(len(result.columns)) if is_object_dtype(result.dtypes[idx])] @@ -1689,3 +1694,5 @@ def _recast_datetimelike_result(result: DataFrame): if exdtype == 'datetime': # TODO: what about tz-aware? result.iloc[:, cidx] = cvals.astype('M8[ns]') + + return result From e39370c97adc87de6a85ade8b34c2a2b83405fb8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Jun 2019 13:53:37 -0700 Subject: [PATCH 11/16] use maybe_convert_objects --- pandas/core/groupby/generic.py | 18 +++++------------- pandas/core/internals/construction.py | 3 +-- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1619f2b614285..2f31a069cb3e4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -21,7 +21,8 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution -from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.cast import ( + maybe_convert_objects, maybe_downcast_to_dtype) from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_bool, is_datetimelike, is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_object_dtype, @@ -1728,17 +1729,8 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame: if is_object_dtype(result.dtypes[idx])] for cidx in ocols: - # TODO: get maybe_convert_objects working here - cvals = result.iloc[:, cidx] - - # TODO: should we use skipna=True? - exdtype = lib.infer_dtype(cvals.values, skipna=False) - if exdtype == 'integer': - result.iloc[:, cidx] = cvals.astype(np.int64) - if exdtype in ['float', 'mixed-integer-float']: - result.iloc[:, cidx] = cvals.astype(np.float64) - if exdtype == 'datetime': - # TODO: what about tz-aware? - result.iloc[:, cidx] = cvals.astype('M8[ns]') + cvals = result.iloc[:, cidx].values + result.iloc[:, cidx] = maybe_convert_objects(cvals, + convert_numeric=False) return result diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index deb406e437858..4e4cae1ea89e3 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -161,8 +161,7 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): if dtype is None and is_object_dtype(values): if values.ndim == 2 and values.shape[0] != 1: - # kludge to transpose and separate blocks - # unnecessary if we ever allow 2D DatetimeArray + # transpose and separate blocks dvals_list = [maybe_infer_to_datetimelike(values[n, :]) for n in range(len(values))] From 00b31e40c46e8e550e866d17364affaeafacbdd6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Jun 2019 14:01:37 -0700 Subject: [PATCH 12/16] xfail tests where possible --- pandas/tests/arithmetic/test_datetime64.py | 57 +++++++++++++--------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index cc9940a2c5524..a4b4ec321f638 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -586,50 +586,63 @@ def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): @pytest.mark.parametrize('op', [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le]) - def test_comparison_tzawareness_compat(self, op, box_with_array): + def test_comparison_tzawareness_compat(self, op, box_df_fail): # GH#18162 + box = box_df_fail + dr = pd.date_range('2016-01-01', periods=6) dz = dr.tz_localize('US/Pacific') - dr = tm.box_expected(dr, box_with_array) - dz = tm.box_expected(dz, box_with_array) + dr = tm.box_expected(dr, box) + dz = tm.box_expected(dz, box) msg = 'Cannot compare tz-naive and tz-aware' with pytest.raises(TypeError, match=msg): op(dr, dz) - if box_with_array is not pd.DataFrame: - # FIXME: DataFrame case fails to raise for == and !=, wrong - # message for inequalities - with pytest.raises(TypeError, match=msg): - op(dr, list(dz)) - with pytest.raises(TypeError, match=msg): - op(dr, np.array(list(dz), dtype=object)) + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities + with pytest.raises(TypeError, match=msg): + op(dr, list(dz)) + with pytest.raises(TypeError, match=msg): + op(dr, np.array(list(dz), dtype=object)) with pytest.raises(TypeError, match=msg): op(dz, dr) - if box_with_array is not pd.DataFrame: - # FIXME: DataFrame case fails to raise for == and !=, wrong - # message for inequalities - with pytest.raises(TypeError, match=msg): - op(dz, list(dr)) - with pytest.raises(TypeError, match=msg): - op(dz, np.array(list(dr), dtype=object)) + + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities + with pytest.raises(TypeError, match=msg): + op(dz, list(dr)) + with pytest.raises(TypeError, match=msg): + op(dz, np.array(list(dr), dtype=object)) # Check that there isn't a problem aware-aware and naive-naive do not # raise assert_all(dr == dr) assert_all(dz == dz) - if box_with_array is not pd.DataFrame: - # FIXME: DataFrame case fails to raise for == and !=, wrong - # message for inequalities - assert (dr == list(dr)).all() - assert (dz == list(dz)).all() + + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities + assert (dr == list(dr)).all() + assert (dz == list(dz)).all() + + @pytest.mark.parametrize('op', [operator.eq, operator.ne, + operator.gt, operator.ge, + operator.lt, operator.le]) + def test_comparison_tzawareness_compat_scalars(self, op, box_with_array): + # GH#18162 + dr = pd.date_range('2016-01-01', periods=6) + dz = dr.tz_localize('US/Pacific') + + dr = tm.box_expected(dr, box_with_array) + dz = tm.box_expected(dz, box_with_array) # Check comparisons against scalar Timestamps ts = pd.Timestamp('2000-03-14 01:59') ts_tz = pd.Timestamp('2000-03-14 01:59', tz='Europe/Amsterdam') assert_all(dr > ts) + msg = 'Cannot compare tz-naive and tz-aware' with pytest.raises(TypeError, match=msg): op(dr, ts_tz) From 0a9a886974c0b61a235d3d21bb61e84c899e200f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Jun 2019 14:07:01 -0700 Subject: [PATCH 13/16] simplify list comprehension --- pandas/core/internals/construction.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 4e4cae1ea89e3..359ef2b15cb5f 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -163,8 +163,7 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): if values.ndim == 2 and values.shape[0] != 1: # transpose and separate blocks - dvals_list = [maybe_infer_to_datetimelike(values[n, :]) - for n in range(len(values))] + dvals_list = [maybe_infer_to_datetimelike(row) for row in values] for n in range(len(dvals_list)): if isinstance(dvals_list[n], np.ndarray): dvals_list[n] = dvals_list[n].reshape(1, -1) From 5c38a761c80b215ade8903bf68eb07a0ef4c6dfc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 24 Jun 2019 08:50:01 -0700 Subject: [PATCH 14/16] single assignment --- pandas/core/groupby/generic.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2f31a069cb3e4..693d440a9ce59 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1725,12 +1725,13 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame: """ result = result.copy() - ocols = [idx for idx in range(len(result.columns)) - if is_object_dtype(result.dtypes[idx])] + obj_cols = [idx for idx in range(len(result.columns)) + if is_object_dtype(result.dtypes[idx])] - for cidx in ocols: - cvals = result.iloc[:, cidx].values - result.iloc[:, cidx] = maybe_convert_objects(cvals, - convert_numeric=False) + # See GH#26285 + converted = [maybe_convert_objects(result.iloc[:, n].values, + convert_numeric=False) + for n in obj_cols] + result.iloc[:, obj_cols] = converted return result From 657aa0c3dc2c03c84b4f67f19fd9ede1b95fc92a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 24 Jun 2019 08:52:52 -0700 Subject: [PATCH 15/16] fall through to create_block_manager_from_blocks --- pandas/core/internals/construction.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index d97ed10a42057..49edc82db8919 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -171,17 +171,16 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): from pandas.core.internals.blocks import make_block # TODO: What about re-joining object columns? - bdvals = [make_block(dvals_list[n], placement=[n]) - for n in range(len(dvals_list))] - return create_block_manager_from_blocks(bdvals, - [columns, index]) + block_values = [make_block(dvals_list[n], placement=[n]) + for n in range(len(dvals_list))] else: - dvals = maybe_infer_to_datetimelike(values) - - values = dvals + datelike_vals = maybe_infer_to_datetimelike(values) + block_values = [datelike_vals] + else: + block_values = [values] - return create_block_manager_from_blocks([values], [columns, index]) + return create_block_manager_from_blocks(block_values, [columns, index]) def init_dict(data, index, columns, dtype=None): From 820c4e40d341d41d4829c27a8ba28bd98852a208 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Jun 2019 14:38:27 -0700 Subject: [PATCH 16/16] Fix assignment error --- pandas/core/groupby/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7bca90d03480f..a10920b7a5afb 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1729,9 +1729,9 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame: if is_object_dtype(result.dtypes[idx])] # See GH#26285 - converted = [maybe_convert_objects(result.iloc[:, n].values, - convert_numeric=False) - for n in obj_cols] + for n in obj_cols: + converted = maybe_convert_objects(result.iloc[:, n].values, + convert_numeric=False) - result.iloc[:, obj_cols] = converted + result.iloc[:, n] = converted return result