Skip to content

Commit 8406a26

Browse files
committed
API/BUG: Fix Series ops inconsistencies
1 parent 453bc26 commit 8406a26

File tree

5 files changed

+314
-43
lines changed

5 files changed

+314
-43
lines changed

doc/source/whatsnew/v0.19.0.txt

+106
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,112 @@ New Behavior:
475475

476476
type(s.tolist()[0])
477477

478+
.. _whatsnew_0190.api.series_ops:
479+
480+
``Series`` operators for different indexes
481+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
482+
483+
Following ``Series`` operators has been changed to make all operators consistent,
484+
including ``DataFrame`` (:issue:`1134`, :issue:`4581`, :issue:`13538`)
485+
486+
- ``Series`` comparison operators now raise ``ValueError`` when ``index`` are different.
487+
- ``Series`` logical operators align both ``index``.
488+
489+
.. warning::
490+
Until 0.18.1, comparing ``Series`` with the same length has been succeeded even if
491+
these ``index`` are different (the result ignores ``index``).
492+
As of 0.19.1, it raises ``ValueError`` to be more strict.
493+
494+
As a result, ``Series`` and ``DataFrame`` operators behave as below:
495+
496+
Arithmetic operators
497+
""""""""""""""""""""
498+
499+
Arithmetic operators align both ``index`` (no changes).
500+
501+
.. ipython:: python
502+
503+
s1 = pd.Series([1, 2, 3], index=list('ABC'))
504+
s2 = pd.Series([2, 2, 2], index=list('ABD'))
505+
s1 + s2
506+
507+
df1 = pd.DataFrame([1, 2, 3], index=list('ABC'))
508+
df2 = pd.DataFrame([2, 2, 2], index=list('ABD'))
509+
df1 + df2
510+
511+
Comparison operators
512+
""""""""""""""""""""
513+
514+
Comparison operators raise ``ValueError`` when ``index`` are different.
515+
516+
Previous Behavior (``Series``):
517+
518+
``Series`` compares values ignoring ``index`` as long as both lengthes are the same.
519+
520+
.. code-block:: ipython
521+
522+
In [1]: s1 == s2
523+
Out[1]:
524+
A False
525+
B True
526+
C False
527+
dtype: bool
528+
529+
New Behavior (``Series``):
530+
531+
.. code-block:: ipython
532+
533+
In [2]: s1 == s2
534+
Out[2]:
535+
ValueError: Can only compare identically-labeled Series objects
536+
537+
Current Behavior (``DataFrame``, no change):
538+
539+
.. code-block:: ipython
540+
541+
In [3]: df1 == df2
542+
Out[3]:
543+
ValueError: Can only compare identically-labeled DataFrame objects
544+
545+
Logical operators
546+
"""""""""""""""""
547+
548+
Logical operators align both ``index``.
549+
550+
Previous Behavior (``Series``):
551+
552+
Only left hand side ``index`` is kept.
553+
554+
.. code-block:: ipython
555+
556+
In [4]: s1 = pd.Series([True, False, True], index=list('ABC'))
557+
In [5]: s2 = pd.Series([True, True, True], index=list('ABD'))
558+
In [6]: s1 & s2
559+
Out[6]:
560+
A True
561+
B False
562+
C False
563+
dtype: bool
564+
565+
New Behavior (``Series``):
566+
567+
.. ipython:: python
568+
569+
s1 = pd.Series([True, False, True], index=list('ABC'))
570+
s2 = pd.Series([True, True, True], index=list('ABD'))
571+
s1 & s2
572+
573+
.. note::
574+
``Series`` logical operators fill ``NaN`` result with ``False``.
575+
576+
Current Behavior (``DataFrame``, no change):
577+
578+
.. ipython:: python
579+
580+
df1 = pd.DataFrame([True, False, True], index=list('ABC'))
581+
df2 = pd.DataFrame([True, True, True], index=list('ABD'))
582+
df1 & df2
583+
478584
.. _whatsnew_0190.api.promote:
479585

480586
``Series`` type promotion on assignment

pandas/core/ops.py

+35-14
Original file line numberDiff line numberDiff line change
@@ -311,17 +311,6 @@ def get_op(cls, left, right, name, na_op):
311311
is_datetime_lhs = (is_datetime64_dtype(left) or
312312
is_datetime64tz_dtype(left))
313313

314-
if isinstance(left, ABCSeries) and isinstance(right, ABCSeries):
315-
# avoid repated alignment
316-
if not left.index.equals(right.index):
317-
left, right = left.align(right, copy=False)
318-
319-
index, lidx, ridx = left.index.join(right.index, how='outer',
320-
return_indexers=True)
321-
# if DatetimeIndex have different tz, convert to UTC
322-
left.index = index
323-
right.index = index
324-
325314
if not (is_datetime_lhs or is_timedelta_lhs):
326315
return _Op(left, right, name, na_op)
327316
else:
@@ -603,6 +592,33 @@ def _is_offset(self, arr_or_obj):
603592
return False
604593

605594

595+
def _align_method_SERIES(left, right, align_asobject=False):
596+
""" algin lhs and rhs Series """
597+
598+
# ToDo: Different from _align_method_FRAME, list, tuple and ndarray
599+
# are not coerced here
600+
# because Series has inconsistencies described in #13637
601+
602+
if isinstance(right, ABCSeries):
603+
# avoid repated alignment
604+
if not left.index.equals(right.index):
605+
606+
if align_asobject:
607+
# to keep original value's dtype for bool ops
608+
left = left.astype(object)
609+
right = right.astype(object)
610+
611+
left, right = left.align(right, copy=False)
612+
613+
index, lidx, ridx = left.index.join(right.index, how='outer',
614+
return_indexers=True)
615+
# if DatetimeIndex have different tz, convert to UTC
616+
left.index = index
617+
right.index = index
618+
619+
return left, right
620+
621+
606622
def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None,
607623
**eval_kwargs):
608624
"""
@@ -654,6 +670,8 @@ def wrapper(left, right, name=name, na_op=na_op):
654670
if isinstance(right, pd.DataFrame):
655671
return NotImplemented
656672

673+
left, right = _align_method_SERIES(left, right)
674+
657675
converted = _Op.get_op(left, right, name, na_op)
658676

659677
left, right = converted.left, converted.right
@@ -761,8 +779,9 @@ def wrapper(self, other, axis=None):
761779

762780
if isinstance(other, ABCSeries):
763781
name = _maybe_match_name(self, other)
764-
if len(self) != len(other):
765-
raise ValueError('Series lengths must match to compare')
782+
if not self._indexed_same(other):
783+
msg = 'Can only compare identically-labeled Series objects'
784+
raise ValueError(msg)
766785
return self._constructor(na_op(self.values, other.values),
767786
index=self.index, name=name)
768787
elif isinstance(other, pd.DataFrame): # pragma: no cover
@@ -784,6 +803,7 @@ def wrapper(self, other, axis=None):
784803

785804
return self._constructor(na_op(self.values, np.asarray(other)),
786805
index=self.index).__finalize__(self)
806+
787807
elif isinstance(other, pd.Categorical):
788808
if not is_categorical_dtype(self):
789809
msg = ("Cannot compare a Categorical for op {op} with Series "
@@ -856,9 +876,10 @@ def wrapper(self, other):
856876
fill_int = lambda x: x.fillna(0)
857877
fill_bool = lambda x: x.fillna(False).astype(bool)
858878

879+
self, other = _align_method_SERIES(self, other, align_asobject=True)
880+
859881
if isinstance(other, ABCSeries):
860882
name = _maybe_match_name(self, other)
861-
other = other.reindex_like(self)
862883
is_other_int_dtype = is_integer_dtype(other.dtype)
863884
other = fill_int(other) if is_other_int_dtype else fill_bool(other)
864885

pandas/io/tests/json/test_ujson.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -1306,43 +1306,45 @@ def testSeries(self):
13061306

13071307
# column indexed
13081308
outp = Series(ujson.decode(ujson.encode(s))).sort_values()
1309-
self.assertTrue((s == outp).values.all())
1309+
exp = Series([10, 20, 30, 40, 50, 60],
1310+
index=['6', '7', '8', '9', '10', '15'])
1311+
tm.assert_series_equal(outp, exp)
13101312

13111313
outp = Series(ujson.decode(ujson.encode(s), numpy=True)).sort_values()
1312-
self.assertTrue((s == outp).values.all())
1314+
tm.assert_series_equal(outp, exp)
13131315

13141316
dec = _clean_dict(ujson.decode(ujson.encode(s, orient="split")))
13151317
outp = Series(**dec)
1316-
self.assertTrue((s == outp).values.all())
1317-
self.assertTrue(s.name == outp.name)
1318+
tm.assert_series_equal(outp, s)
13181319

13191320
dec = _clean_dict(ujson.decode(ujson.encode(s, orient="split"),
13201321
numpy=True))
13211322
outp = Series(**dec)
1322-
self.assertTrue((s == outp).values.all())
1323-
self.assertTrue(s.name == outp.name)
13241323

1325-
outp = Series(ujson.decode(ujson.encode(
1326-
s, orient="records"), numpy=True))
1327-
self.assertTrue((s == outp).values.all())
1324+
outp = Series(ujson.decode(ujson.encode(s, orient="records"),
1325+
numpy=True))
1326+
exp = Series([10, 20, 30, 40, 50, 60])
1327+
tm.assert_series_equal(outp, exp)
13281328

13291329
outp = Series(ujson.decode(ujson.encode(s, orient="records")))
1330-
self.assertTrue((s == outp).values.all())
1330+
tm.assert_series_equal(outp, exp)
13311331

1332-
outp = Series(ujson.decode(
1333-
ujson.encode(s, orient="values"), numpy=True))
1334-
self.assertTrue((s == outp).values.all())
1332+
outp = Series(ujson.decode(ujson.encode(s, orient="values"),
1333+
numpy=True))
1334+
tm.assert_series_equal(outp, exp)
13351335

13361336
outp = Series(ujson.decode(ujson.encode(s, orient="values")))
1337-
self.assertTrue((s == outp).values.all())
1337+
tm.assert_series_equal(outp, exp)
13381338

13391339
outp = Series(ujson.decode(ujson.encode(
13401340
s, orient="index"))).sort_values()
1341-
self.assertTrue((s == outp).values.all())
1341+
exp = Series([10, 20, 30, 40, 50, 60],
1342+
index=['6', '7', '8', '9', '10', '15'])
1343+
tm.assert_series_equal(outp, exp)
13421344

13431345
outp = Series(ujson.decode(ujson.encode(
13441346
s, orient="index"), numpy=True)).sort_values()
1345-
self.assertTrue((s == outp).values.all())
1347+
tm.assert_series_equal(outp, exp)
13461348

13471349
def testSeriesNested(self):
13481350
s = Series([10, 20, 30, 40, 50, 60], name="series",

pandas/tests/indexes/common.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,8 @@ def test_equals_op(self):
676676
index_a == series_d
677677
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
678678
index_a == array_d
679-
with tm.assertRaisesRegexp(ValueError, "Series lengths must match"):
679+
msg = "Can only compare identically-labeled Series objects"
680+
with tm.assertRaisesRegexp(ValueError, msg):
680681
series_a == series_d
681682
with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
682683
series_a == array_d

0 commit comments

Comments
 (0)