Skip to content

Commit 1754bb5

Browse files
committed
Merge pull request #5266 from cpcloud/index-fix-sort-union-5039
BUG: union should not try to sort inplace because platform impls differ as to when sorting occurs for objects that cannot be compared
2 parents 166266d + 508bc0c commit 1754bb5

File tree

3 files changed

+144
-72
lines changed

3 files changed

+144
-72
lines changed

pandas/computation/tests/test_eval.py

+112-62
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/env python
22

3+
import warnings
4+
import operator
35
from itertools import product
46
from distutils.version import LooseVersion
57

@@ -28,7 +30,7 @@
2830
from pandas.util.testing import (assert_frame_equal, randbool,
2931
assertRaisesRegexp,
3032
assert_produces_warning, assert_series_equal)
31-
from pandas.compat import PY3, u
33+
from pandas.compat import PY3, u, reduce
3234

3335
_series_frame_incompatible = _bool_ops_syms
3436
_scalar_skip = 'in', 'not in'
@@ -699,6 +701,16 @@ def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs):
699701
#-------------------------------------
700702
# basic and complex alignment
701703

704+
def _is_datetime(x):
705+
return issubclass(x.dtype.type, np.datetime64)
706+
707+
708+
def should_warn(*args):
709+
not_mono = not any(map(operator.attrgetter('is_monotonic'), args))
710+
only_one_dt = reduce(operator.xor, map(_is_datetime, args))
711+
return not_mono and only_one_dt
712+
713+
702714
class TestAlignment(object):
703715

704716
index_types = 'i', 'u', 'dt'
@@ -719,13 +731,20 @@ def check_basic_frame_alignment(self, engine, parser):
719731
tm.skip_if_no_ne(engine)
720732
args = product(self.lhs_index_types, self.index_types,
721733
self.index_types)
722-
for lr_idx_type, rr_idx_type, c_idx_type in args:
723-
df = mkdf(10, 10, data_gen_f=f, r_idx_type=lr_idx_type,
724-
c_idx_type=c_idx_type)
725-
df2 = mkdf(20, 10, data_gen_f=f, r_idx_type=rr_idx_type,
726-
c_idx_type=c_idx_type)
727-
res = pd.eval('df + df2', engine=engine, parser=parser)
728-
assert_frame_equal(res, df + df2)
734+
with warnings.catch_warnings(record=True):
735+
warnings.simplefilter('always', RuntimeWarning)
736+
for lr_idx_type, rr_idx_type, c_idx_type in args:
737+
df = mkdf(10, 10, data_gen_f=f, r_idx_type=lr_idx_type,
738+
c_idx_type=c_idx_type)
739+
df2 = mkdf(20, 10, data_gen_f=f, r_idx_type=rr_idx_type,
740+
c_idx_type=c_idx_type)
741+
# only warns if not monotonic and not sortable
742+
if should_warn(df.index, df2.index):
743+
with tm.assert_produces_warning(RuntimeWarning):
744+
res = pd.eval('df + df2', engine=engine, parser=parser)
745+
else:
746+
res = pd.eval('df + df2', engine=engine, parser=parser)
747+
assert_frame_equal(res, df + df2)
729748

730749
def test_basic_frame_alignment(self):
731750
for engine, parser in ENGINES_PARSERS:
@@ -754,12 +773,20 @@ def check_medium_complex_frame_alignment(self, engine, parser):
754773
args = product(self.lhs_index_types, self.index_types,
755774
self.index_types, self.index_types)
756775

757-
for r1, c1, r2, c2 in args:
758-
df = mkdf(3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
759-
df2 = mkdf(4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
760-
df3 = mkdf(5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
761-
res = pd.eval('df + df2 + df3', engine=engine, parser=parser)
762-
assert_frame_equal(res, df + df2 + df3)
776+
with warnings.catch_warnings(record=True):
777+
warnings.simplefilter('always', RuntimeWarning)
778+
779+
for r1, c1, r2, c2 in args:
780+
df = mkdf(3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
781+
df2 = mkdf(4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
782+
df3 = mkdf(5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
783+
if should_warn(df.index, df2.index, df3.index):
784+
with tm.assert_produces_warning(RuntimeWarning):
785+
res = pd.eval('df + df2 + df3', engine=engine,
786+
parser=parser)
787+
else:
788+
res = pd.eval('df + df2 + df3', engine=engine, parser=parser)
789+
assert_frame_equal(res, df + df2 + df3)
763790

764791
@slow
765792
def test_medium_complex_frame_alignment(self):
@@ -775,20 +802,24 @@ def testit(r_idx_type, c_idx_type, index_name):
775802
index = getattr(df, index_name)
776803
s = Series(np.random.randn(5), index[:5])
777804

778-
res = pd.eval('df + s', engine=engine, parser=parser)
805+
if should_warn(df.index, s.index):
806+
with tm.assert_produces_warning(RuntimeWarning):
807+
res = pd.eval('df + s', engine=engine, parser=parser)
808+
else:
809+
res = pd.eval('df + s', engine=engine, parser=parser)
810+
779811
if r_idx_type == 'dt' or c_idx_type == 'dt':
780-
if engine == 'numexpr':
781-
expected = df.add(s)
782-
else:
783-
expected = df + s
812+
expected = df.add(s) if engine == 'numexpr' else df + s
784813
else:
785814
expected = df + s
786815
assert_frame_equal(res, expected)
787816

788817
args = product(self.lhs_index_types, self.index_types,
789818
('index', 'columns'))
790-
for r_idx_type, c_idx_type, index_name in args:
791-
testit(r_idx_type, c_idx_type, index_name)
819+
with warnings.catch_warnings(record=True):
820+
warnings.simplefilter('always', RuntimeWarning)
821+
for r_idx_type, c_idx_type, index_name in args:
822+
testit(r_idx_type, c_idx_type, index_name)
792823

793824
def test_basic_frame_series_alignment(self):
794825
for engine, parser in ENGINES_PARSERS:
@@ -802,13 +833,14 @@ def testit(r_idx_type, c_idx_type, index_name):
802833
c_idx_type=c_idx_type)
803834
index = getattr(df, index_name)
804835
s = Series(np.random.randn(5), index[:5])
836+
if should_warn(s.index, df.index):
837+
with tm.assert_produces_warning(RuntimeWarning):
838+
res = pd.eval('s + df', engine=engine, parser=parser)
839+
else:
840+
res = pd.eval('s + df', engine=engine, parser=parser)
805841

806-
res = pd.eval('s + df', engine=engine, parser=parser)
807842
if r_idx_type == 'dt' or c_idx_type == 'dt':
808-
if engine == 'numexpr':
809-
expected = df.add(s)
810-
else:
811-
expected = s + df
843+
expected = df.add(s) if engine == 'numexpr' else s + df
812844
else:
813845
expected = s + df
814846
assert_frame_equal(res, expected)
@@ -820,8 +852,10 @@ def testit(r_idx_type, c_idx_type, index_name):
820852

821853
# dt with dt
822854
args = product(['dt'], ['dt'], ('index', 'columns'))
823-
for r_idx_type, c_idx_type, index_name in args:
824-
testit(r_idx_type, c_idx_type, index_name)
855+
with warnings.catch_warnings(record=True):
856+
warnings.simplefilter('always', RuntimeWarning)
857+
for r_idx_type, c_idx_type, index_name in args:
858+
testit(r_idx_type, c_idx_type, index_name)
825859

826860
def test_basic_series_frame_alignment(self):
827861
for engine, parser in ENGINES_PARSERS:
@@ -831,20 +865,29 @@ def check_series_frame_commutativity(self, engine, parser):
831865
tm.skip_if_no_ne(engine)
832866
args = product(self.lhs_index_types, self.index_types, ('+', '*'),
833867
('index', 'columns'))
834-
for r_idx_type, c_idx_type, op, index_name in args:
835-
df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
836-
c_idx_type=c_idx_type)
837-
index = getattr(df, index_name)
838-
s = Series(np.random.randn(5), index[:5])
839868

840-
lhs = 's {0} df'.format(op)
841-
rhs = 'df {0} s'.format(op)
842-
a = pd.eval(lhs, engine=engine, parser=parser)
843-
b = pd.eval(rhs, engine=engine, parser=parser)
869+
with warnings.catch_warnings(record=True):
870+
warnings.simplefilter('always', RuntimeWarning)
871+
for r_idx_type, c_idx_type, op, index_name in args:
872+
df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
873+
c_idx_type=c_idx_type)
874+
index = getattr(df, index_name)
875+
s = Series(np.random.randn(5), index[:5])
876+
877+
lhs = 's {0} df'.format(op)
878+
rhs = 'df {0} s'.format(op)
879+
if should_warn(df.index, s.index):
880+
with tm.assert_produces_warning(RuntimeWarning):
881+
a = pd.eval(lhs, engine=engine, parser=parser)
882+
with tm.assert_produces_warning(RuntimeWarning):
883+
b = pd.eval(rhs, engine=engine, parser=parser)
884+
else:
885+
a = pd.eval(lhs, engine=engine, parser=parser)
886+
b = pd.eval(rhs, engine=engine, parser=parser)
844887

845-
if r_idx_type != 'dt' and c_idx_type != 'dt':
846-
if engine == 'numexpr':
847-
assert_frame_equal(a, b)
888+
if r_idx_type != 'dt' and c_idx_type != 'dt':
889+
if engine == 'numexpr':
890+
assert_frame_equal(a, b)
848891

849892
def test_series_frame_commutativity(self):
850893
for engine, parser in ENGINES_PARSERS:
@@ -860,34 +903,41 @@ def check_complex_series_frame_alignment(self, engine, parser):
860903
m1 = 5
861904
m2 = 2 * m1
862905

863-
for r1, r2, c1, c2 in args:
864-
index_name = random.choice(['index', 'columns'])
865-
obj_name = random.choice(['df', 'df2'])
866-
867-
df = mkdf(m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
868-
df2 = mkdf(m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
869-
index = getattr(locals().get(obj_name), index_name)
870-
s = Series(np.random.randn(n), index[:n])
871-
872-
if r2 == 'dt' or c2 == 'dt':
873-
if engine == 'numexpr':
874-
expected2 = df2.add(s)
906+
with warnings.catch_warnings(record=True):
907+
warnings.simplefilter('always', RuntimeWarning)
908+
for r1, r2, c1, c2 in args:
909+
index_name = random.choice(['index', 'columns'])
910+
obj_name = random.choice(['df', 'df2'])
911+
912+
df = mkdf(m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
913+
df2 = mkdf(m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
914+
index = getattr(locals().get(obj_name), index_name)
915+
s = Series(np.random.randn(n), index[:n])
916+
917+
if r2 == 'dt' or c2 == 'dt':
918+
if engine == 'numexpr':
919+
expected2 = df2.add(s)
920+
else:
921+
expected2 = df2 + s
875922
else:
876923
expected2 = df2 + s
877-
else:
878-
expected2 = df2 + s
879924

880-
if r1 == 'dt' or c1 == 'dt':
881-
if engine == 'numexpr':
882-
expected = expected2.add(df)
925+
if r1 == 'dt' or c1 == 'dt':
926+
if engine == 'numexpr':
927+
expected = expected2.add(df)
928+
else:
929+
expected = expected2 + df
883930
else:
884931
expected = expected2 + df
885-
else:
886-
expected = expected2 + df
887932

888-
res = pd.eval('df2 + s + df', engine=engine, parser=parser)
889-
tm.assert_equal(res.shape, expected.shape)
890-
assert_frame_equal(res, expected)
933+
if should_warn(df2.index, s.index, df.index):
934+
with tm.assert_produces_warning(RuntimeWarning):
935+
res = pd.eval('df2 + s + df', engine=engine,
936+
parser=parser)
937+
else:
938+
res = pd.eval('df2 + s + df', engine=engine, parser=parser)
939+
tm.assert_equal(res.shape, expected.shape)
940+
assert_frame_equal(res, expected)
891941

892942
@slow
893943
def test_complex_series_frame_alignment(self):

pandas/core/index.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# pylint: disable=E1101,E1103,W0232
22
import datetime
3+
import warnings
34
from functools import partial
45
import warnings
56
from pandas.compat import range, zip, lrange, lzip, u, reduce
@@ -997,29 +998,36 @@ def union(self, other):
997998
result.extend([x for x in other.values if x not in value_set])
998999
else:
9991000
indexer = self.get_indexer(other)
1000-
indexer = (indexer == -1).nonzero()[0]
1001+
indexer, = (indexer == -1).nonzero()
10011002

10021003
if len(indexer) > 0:
10031004
other_diff = com.take_nd(other.values, indexer,
10041005
allow_fill=False)
10051006
result = com._concat_compat((self.values, other_diff))
1007+
10061008
try:
1009+
self.values[0] < other_diff[0]
1010+
except TypeError as e:
1011+
warnings.warn("%s, sort order is undefined for "
1012+
"incomparable objects" % e, RuntimeWarning)
1013+
else:
10071014
result.sort()
1008-
except Exception:
1009-
pass
1015+
10101016
else:
1011-
# contained in
1017+
result = self.values
1018+
10121019
try:
1013-
result = np.sort(self.values)
1014-
except TypeError: # pragma: no cover
1015-
result = self.values
1020+
result = np.sort(result)
1021+
except TypeError as e:
1022+
warnings.warn("%s, sort order is undefined for "
1023+
"incomparable objects" % e, RuntimeWarning)
10161024

10171025
# for subclasses
10181026
return self._wrap_union_result(other, result)
10191027

10201028
def _wrap_union_result(self, other, result):
10211029
name = self.name if self.name == other.name else None
1022-
return type(self)(data=result, name=name)
1030+
return self.__class__(data=result, name=name)
10231031

10241032
def intersection(self, other):
10251033
"""

pandas/tests/test_index.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from datetime import datetime, timedelta
44
from pandas.compat import range, lrange, lzip, u, zip
5+
import sys
56
import operator
67
import pickle
78
import re
@@ -15,7 +16,6 @@
1516
from pandas.core.index import (Index, Float64Index, Int64Index, MultiIndex,
1617
InvalidIndexError)
1718
from pandas.tseries.index import DatetimeIndex
18-
from pandas.core.frame import DataFrame
1919
from pandas.core.series import Series
2020
from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
2121
assert_copy)
@@ -844,11 +844,13 @@ def test_slice_keep_name(self):
844844
self.assertEqual(idx.name, idx[1:].name)
845845

846846
def test_join_self(self):
847+
# instance attributes of the form self.<name>Index
847848
indices = 'unicode', 'str', 'date', 'int', 'float'
848849
kinds = 'outer', 'inner', 'left', 'right'
849850
for index_kind in indices:
851+
res = getattr(self, '{0}Index'.format(index_kind))
852+
850853
for kind in kinds:
851-
res = getattr(self, '{0}Index'.format(index_kind))
852854
joined = res.join(res, how=kind)
853855
self.assertIs(res, joined)
854856

@@ -860,6 +862,17 @@ def test_indexing_doesnt_change_class(self):
860862
self.assertTrue(idx[[0,1]].identical(
861863
pd.Index([1, 2], dtype=np.object_)))
862864

865+
def test_outer_join_sort(self):
866+
left_idx = Index(np.random.permutation(15))
867+
right_idx = tm.makeDateIndex(10)
868+
869+
with tm.assert_produces_warning(RuntimeWarning):
870+
joined = left_idx.join(right_idx, how='outer')
871+
# right_idx in this case because DatetimeIndex has join precedence over
872+
# Int64Index
873+
expected = right_idx.astype(object).union(left_idx.astype(object))
874+
tm.assert_index_equal(joined, expected)
875+
863876

864877
class TestFloat64Index(tm.TestCase):
865878
_multiprocess_can_split_ = True
@@ -2765,6 +2778,7 @@ def test_get_combined_index():
27652778
assert(result.equals(Index([])))
27662779

27672780

2781+
27682782
if __name__ == '__main__':
27692783
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
27702784
exit=False)

0 commit comments

Comments
 (0)