Skip to content

Commit 9ee7594

Browse files
jbrockmendelvictor
authored and
victor
committed
dispatch scalar DataFrame ops to Series (pandas-dev#22163)
1 parent 4da24d1 commit 9ee7594

File tree

12 files changed

+238
-154
lines changed

12 files changed

+238
-154
lines changed

doc/source/whatsnew/v0.24.0.txt

+40-1
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ New Behavior:
216216
idx = pd.interval_range(0, 4)
217217
idx.values
218218

219-
This mirrors ``CateogricalIndex.values``, which returns a ``Categorical``.
219+
This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``.
220220

221221
For situations where you need an ``ndarray`` of ``Interval`` objects, use
222222
:meth:`numpy.asarray` or ``idx.astype(object)``.
@@ -406,6 +406,34 @@ Previous Behavior:
406406
In [3]: pi - pi[0]
407407
Out[3]: Int64Index([0, 1, 2], dtype='int64')
408408

409+
410+
.. _whatsnew_0240.api.timedelta64_subtract_nan
411+
412+
Addition/Subtraction of ``NaN`` from :class:``DataFrame``
413+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
414+
415+
Adding or subtracting ``NaN`` from a :class:`DataFrame` column with
416+
`timedelta64[ns]` dtype will now raise a ``TypeError`` instead of returning
417+
all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and
418+
``Series`` behavior (:issue:`22163`)
419+
420+
.. ipython:: python
421+
422+
df = pd.DataFrame([pd.Timedelta(days=1)])
423+
df - np.nan
424+
425+
Previous Behavior:
426+
427+
.. code-block:: ipython
428+
429+
In [4]: df = pd.DataFrame([pd.Timedelta(days=1)])
430+
431+
In [5]: df - np.nan
432+
Out[5]:
433+
0
434+
0 NaT
435+
436+
409437
.. _whatsnew_0240.api.extension:
410438

411439
ExtensionType Changes
@@ -539,6 +567,16 @@ Datetimelike
539567
- Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`)
540568
- Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`)
541569
- Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`)
570+
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`,:issue:`22163`)
571+
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`,:issue:`22163`)
572+
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`,:issue:`22163`)
573+
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`,:issue:`22163`)
574+
- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`,:issue:`22163`)
575+
- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
576+
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
577+
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
578+
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
579+
-
542580

543581
Timedelta
544582
^^^^^^^^^
@@ -586,6 +624,7 @@ Numeric
586624
when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``),
587625
a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`).
588626
- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
627+
- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`)
589628
-
590629

591630
Strings

pandas/core/frame.py

+8
Original file line numberDiff line numberDiff line change
@@ -4835,6 +4835,14 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
48354835
return self._constructor(new_data)
48364836

48374837
def _combine_const(self, other, func, errors='raise', try_cast=True):
4838+
if lib.is_scalar(other) or np.ndim(other) == 0:
4839+
new_data = {i: func(self.iloc[:, i], other)
4840+
for i, col in enumerate(self.columns)}
4841+
4842+
result = self._constructor(new_data, index=self.index, copy=False)
4843+
result.columns = self.columns
4844+
return result
4845+
48384846
new_data = self._data.eval(func=func, other=other,
48394847
errors=errors,
48404848
try_cast=try_cast)

pandas/core/ops.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1350,7 +1350,7 @@ def na_op(x, y):
13501350
with np.errstate(all='ignore'):
13511351
result = method(y)
13521352
if result is NotImplemented:
1353-
raise TypeError("invalid type comparison")
1353+
return invalid_comparison(x, y, op)
13541354
else:
13551355
result = op(x, y)
13561356

@@ -1366,6 +1366,10 @@ def wrapper(self, other, axis=None):
13661366

13671367
res_name = get_op_result_name(self, other)
13681368

1369+
if isinstance(other, list):
1370+
# TODO: same for tuples?
1371+
other = np.asarray(other)
1372+
13691373
if isinstance(other, ABCDataFrame): # pragma: no cover
13701374
# Defer to DataFrame implementation; fail early
13711375
return NotImplemented
@@ -1459,8 +1463,6 @@ def wrapper(self, other, axis=None):
14591463

14601464
else:
14611465
values = self.get_values()
1462-
if isinstance(other, list):
1463-
other = np.asarray(other)
14641466

14651467
with np.errstate(all='ignore'):
14661468
res = na_op(values, other)
@@ -1741,7 +1743,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
17411743
if fill_value is not None:
17421744
self = self.fillna(fill_value)
17431745

1744-
return self._combine_const(other, na_op, try_cast=True)
1746+
pass_op = op if lib.is_scalar(other) else na_op
1747+
return self._combine_const(other, pass_op, try_cast=True)
17451748

17461749
f.__name__ = op_name
17471750

pandas/tests/arithmetic/test_datetime64.py

+34-13
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,15 @@ def test_tz_aware_scalar_comparison(self, timestamps):
6363
expected = pd.DataFrame({'test': [False, False]})
6464
tm.assert_frame_equal(df == -1, expected)
6565

66+
def test_dt64_nat_comparison(self):
67+
# GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly
68+
ts = pd.Timestamp.now()
69+
df = pd.DataFrame([ts, pd.NaT])
70+
expected = pd.DataFrame([True, False])
71+
72+
result = df == ts
73+
tm.assert_frame_equal(result, expected)
74+
6675

6776
class TestDatetime64SeriesComparison(object):
6877
# TODO: moved from tests.series.test_operators; needs cleanup
@@ -640,10 +649,22 @@ def test_dti_cmp_object_dtype(self):
640649
# Arithmetic
641650

642651
class TestFrameArithmetic(object):
652+
def test_dt64arr_sub_dtscalar(self, box):
653+
# GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype
654+
idx = pd.date_range('2013-01-01', periods=3)
655+
idx = tm.box_expected(idx, box)
656+
657+
ts = pd.Timestamp('2013-01-01')
658+
# TODO: parametrize over scalar types
659+
660+
expected = pd.TimedeltaIndex(['0 Days', '1 Day', '2 Days'])
661+
expected = tm.box_expected(expected, box)
662+
663+
result = idx - ts
664+
tm.assert_equal(result, expected)
643665

644-
@pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
645-
strict=True)
646666
def test_df_sub_datetime64_not_ns(self):
667+
# GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano
647668
df = pd.DataFrame(pd.date_range('20130101', periods=3))
648669
dt64 = np.datetime64('2013-01-01')
649670
assert dt64.dtype == 'datetime64[D]'
@@ -992,9 +1013,11 @@ def test_dti_add_sub_float(self, op, other):
9921013
with pytest.raises(TypeError):
9931014
op(dti, other)
9941015

995-
def test_dti_add_timestamp_raises(self):
1016+
def test_dti_add_timestamp_raises(self, box):
1017+
# GH#22163 ensure DataFrame doesn't cast Timestamp to i8
9961018
idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
997-
msg = "cannot add DatetimeIndex and Timestamp"
1019+
idx = tm.box_expected(idx, box)
1020+
msg = "cannot add"
9981021
with tm.assert_raises_regex(TypeError, msg):
9991022
idx + Timestamp('2011-01-01')
10001023

@@ -1090,13 +1113,17 @@ def test_dti_add_intarray_no_freq(self, box):
10901113
# -------------------------------------------------------------
10911114
# Binary operations DatetimeIndex and timedelta-like
10921115

1093-
def test_dti_add_timedeltalike(self, tz_naive_fixture, delta):
1116+
def test_dti_add_timedeltalike(self, tz_naive_fixture, delta, box):
1117+
# GH#22005, GH#22163 check DataFrame doesn't raise TypeError
10941118
tz = tz_naive_fixture
10951119
rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
1120+
rng = tm.box_expected(rng, box)
1121+
10961122
result = rng + delta
10971123
expected = pd.date_range('2000-01-01 02:00',
10981124
'2000-02-01 02:00', tz=tz)
1099-
tm.assert_index_equal(result, expected)
1125+
expected = tm.box_expected(expected, box)
1126+
tm.assert_equal(result, expected)
11001127

11011128
def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta):
11021129
tz = tz_naive_fixture
@@ -1662,14 +1689,8 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names):
16621689
res3 = dti - other
16631690
tm.assert_series_equal(res3, expected_sub)
16641691

1665-
@pytest.mark.parametrize('box', [
1666-
pd.Index,
1667-
pd.Series,
1668-
pytest.param(pd.DataFrame,
1669-
marks=pytest.mark.xfail(reason="Returns object dtype",
1670-
strict=True))
1671-
], ids=lambda x: x.__name__)
16721692
def test_dti_add_offset_tzaware(self, tz_aware_fixture, box):
1693+
# GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype
16731694
timezone = tz_aware_fixture
16741695
if timezone == 'US/Pacific':
16751696
dates = date_range('2012-11-01', periods=3, tz=timezone)

pandas/tests/arithmetic/test_numeric.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,6 @@ def test_ops_series(self):
5858
tm.assert_series_equal(expected, td * other)
5959
tm.assert_series_equal(expected, other * td)
6060

61-
@pytest.mark.parametrize('box', [
62-
pd.Index,
63-
Series,
64-
pytest.param(pd.DataFrame,
65-
marks=pytest.mark.xfail(reason="block.eval incorrect",
66-
strict=True))
67-
])
6861
@pytest.mark.parametrize('index', [
6962
pd.Int64Index(range(1, 11)),
7063
pd.UInt64Index(range(1, 11)),
@@ -79,7 +72,7 @@ def test_ops_series(self):
7972
def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
8073
# GH#19333
8174

82-
if (box is Series and
75+
if (box in [Series, pd.DataFrame] and
8376
type(scalar_td) is timedelta and index.dtype == 'f8'):
8477
raise pytest.xfail(reason="Cannot multiply timedelta by float")
8578

pandas/tests/frame/test_arithmetic.py

+66-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,53 @@
1717
# Comparisons
1818

1919
class TestFrameComparisons(object):
20+
def test_flex_comparison_nat(self):
21+
# GH#15697, GH#22163 df.eq(pd.NaT) should behave like df == pd.NaT,
22+
# and _definitely_ not be NaN
23+
df = pd.DataFrame([pd.NaT])
24+
25+
result = df == pd.NaT
26+
# result.iloc[0, 0] is a np.bool_ object
27+
assert result.iloc[0, 0].item() is False
28+
29+
result = df.eq(pd.NaT)
30+
assert result.iloc[0, 0].item() is False
31+
32+
result = df != pd.NaT
33+
assert result.iloc[0, 0].item() is True
34+
35+
result = df.ne(pd.NaT)
36+
assert result.iloc[0, 0].item() is True
37+
38+
def test_mixed_comparison(self):
39+
# GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
40+
# not raise TypeError
41+
# (this appears to be fixed before #22163, not sure when)
42+
df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]])
43+
other = pd.DataFrame([['a', 'b'], ['c', 'd']])
44+
45+
result = df == other
46+
assert not result.any().any()
47+
48+
result = df != other
49+
assert result.all().all()
50+
51+
def test_df_numeric_cmp_dt64_raises(self):
52+
# GH#8932, GH#22163
53+
ts = pd.Timestamp.now()
54+
df = pd.DataFrame({'x': range(5)})
55+
with pytest.raises(TypeError):
56+
df > ts
57+
with pytest.raises(TypeError):
58+
df < ts
59+
with pytest.raises(TypeError):
60+
ts < df
61+
with pytest.raises(TypeError):
62+
ts > df
63+
64+
assert not (df == ts).any().any()
65+
assert (df != ts).all().all()
66+
2067
def test_df_boolean_comparison_error(self):
2168
# GH#4576
2269
# boolean comparisons with a tuple/list give unexpected results
@@ -32,8 +79,8 @@ def test_df_float_none_comparison(self):
3279
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
3380
columns=['A', 'B', 'C'])
3481

35-
with pytest.raises(TypeError):
36-
df.__eq__(None)
82+
result = df.__eq__(None)
83+
assert not result.any().any()
3784

3885
def test_df_string_comparison(self):
3986
df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
@@ -251,3 +298,20 @@ def test_arith_flex_zero_len_raises(self):
251298

252299
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
253300
df_len0.sub(df['A'], axis=None, fill_value=3)
301+
302+
303+
class TestFrameArithmetic(object):
304+
def test_df_bool_mul_int(self):
305+
# GH#22047, GH#22163 multiplication by 1 should result in int dtype,
306+
# not object dtype
307+
df = pd.DataFrame([[False, True], [False, False]])
308+
result = df * 1
309+
310+
# On appveyor this comes back as np.int32 instead of np.int64,
311+
# so we check dtype.kind instead of just dtype
312+
kinds = result.dtypes.apply(lambda x: x.kind)
313+
assert (kinds == 'i').all()
314+
315+
result = 1 * df
316+
kinds = result.dtypes.apply(lambda x: x.kind)
317+
assert (kinds == 'i').all()

pandas/tests/frame/test_indexing.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ def test_getitem_boolean(self):
273273
# test df[df > 0]
274274
for df in [self.tsframe, self.mixed_frame,
275275
self.mixed_float, self.mixed_int]:
276+
if compat.PY3 and df is self.mixed_frame:
277+
continue
276278

277279
data = df._get_numeric_data()
278280
bif = df[df > 0]
@@ -2468,8 +2470,11 @@ def test_boolean_indexing_mixed(self):
24682470
assert_frame_equal(df2, expected)
24692471

24702472
df['foo'] = 'test'
2471-
with tm.assert_raises_regex(TypeError, 'boolean setting '
2472-
'on mixed-type'):
2473+
msg = ("boolean setting on mixed-type|"
2474+
"not supported between|"
2475+
"unorderable types")
2476+
with tm.assert_raises_regex(TypeError, msg):
2477+
# TODO: This message should be the same in PY2/PY3
24732478
df[df > 0.3] = 1
24742479

24752480
def test_where(self):
@@ -2502,6 +2507,10 @@ def _check_get(df, cond, check_dtypes=True):
25022507
# check getting
25032508
for df in [default_frame, self.mixed_frame,
25042509
self.mixed_float, self.mixed_int]:
2510+
if compat.PY3 and df is self.mixed_frame:
2511+
with pytest.raises(TypeError):
2512+
df > 0
2513+
continue
25052514
cond = df > 0
25062515
_check_get(df, cond)
25072516

@@ -2549,6 +2558,10 @@ def _check_align(df, cond, other, check_dtypes=True):
25492558
assert (rs.dtypes == df.dtypes).all()
25502559

25512560
for df in [self.mixed_frame, self.mixed_float, self.mixed_int]:
2561+
if compat.PY3 and df is self.mixed_frame:
2562+
with pytest.raises(TypeError):
2563+
df > 0
2564+
continue
25522565

25532566
# other is a frame
25542567
cond = (df > 0)[1:]
@@ -2594,6 +2607,10 @@ def _check_set(df, cond, check_dtypes=True):
25942607

25952608
for df in [default_frame, self.mixed_frame, self.mixed_float,
25962609
self.mixed_int]:
2610+
if compat.PY3 and df is self.mixed_frame:
2611+
with pytest.raises(TypeError):
2612+
df > 0
2613+
continue
25972614

25982615
cond = df > 0
25992616
_check_set(df, cond)
@@ -2759,9 +2776,14 @@ def test_where_datetime(self):
27592776
C=np.random.randn(5)))
27602777

27612778
stamp = datetime(2013, 1, 3)
2762-
result = df[df > stamp]
2779+
with pytest.raises(TypeError):
2780+
df > stamp
2781+
2782+
result = df[df.iloc[:, :-1] > stamp]
2783+
27632784
expected = df.copy()
27642785
expected.loc[[0, 1], 'A'] = np.nan
2786+
expected.loc[:, 'C'] = np.nan
27652787
assert_frame_equal(result, expected)
27662788

27672789
def test_where_none(self):

0 commit comments

Comments
 (0)