Skip to content

Commit b977278

Browse files
Albert Villanova del Moraljreback
Albert Villanova del Moral
authored andcommitted
Address requested changes
1 parent 784fe75 commit b977278

File tree

6 files changed

+124
-90
lines changed

6 files changed

+124
-90
lines changed

pandas/core/frame.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -4463,16 +4463,15 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='',
44634463
* left: use calling frame's index (or column if on is specified)
44644464
* right: use other frame's index
44654465
* outer: form union of calling frame's index (or column if on is
4466-
specified) with other frame's index
4466+
specified) with other frame's index, and sort it
44674467
* inner: form intersection of calling frame's index (or column if
44684468
on is specified) with other frame's index
44694469
lsuffix : string
44704470
Suffix to use from left frame's overlapping columns
44714471
rsuffix : string
44724472
Suffix to use from right frame's overlapping columns
44734473
sort : boolean, default False
4474-
Order result DataFrame lexicographically by the join key. If False,
4475-
preserves the index order of the calling (left) DataFrame
4474+
Order result DataFrame lexicographically by the join key
44764475
44774476
Notes
44784477
-----

pandas/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2089,8 +2089,8 @@ def intersection(self, other):
20892089
"""
20902090
Form the intersection of two Index objects.
20912091
2092-
This returns a new Index with elements common to the index and `other`.
2093-
Sortedness of the result is not guaranteed.
2092+
This returns a new Index with elements common to the index and `other`,
2093+
preserving the calling index order.
20942094
20952095
Parameters
20962096
----------

pandas/tests/frame/test_join.py

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from __future__ import print_function
4+
5+
from datetime import datetime
6+
7+
import numpy as np
8+
from numpy import nan
9+
10+
import pandas as pd
11+
12+
from pandas import DataFrame, Index, Series, Timestamp, date_range
13+
from pandas.compat import lrange
14+
15+
from pandas.tests.frame.common import TestData
16+
17+
import pandas.util.testing as tm
18+
from pandas.util.testing import (assertRaisesRegexp,
19+
assert_frame_equal,
20+
assert_series_equal)
21+
22+
23+
class TestDataFrameJoin(tm.TestCase, TestData):
24+
25+
def test_join(self):
26+
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
27+
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])
28+
29+
result = df1.join(df2, how='inner')
30+
expected = pd.DataFrame({'a': [20, 10], 'b': [200, 100]}, index=[2, 1])
31+
self.assert_frame_equal(result, expected)
32+
33+
def test_join_index(self):
34+
# left / right
35+
36+
f = self.frame.loc[self.frame.index[:10], ['A', 'B']]
37+
f2 = self.frame.loc[self.frame.index[5:], ['C', 'D']].iloc[::-1]
38+
39+
joined = f.join(f2)
40+
self.assert_index_equal(f.index, joined.index)
41+
expected_columns = pd.Index(['A', 'B', 'C', 'D'])
42+
self.assert_index_equal(joined.columns, expected_columns)
43+
44+
joined = f.join(f2, how='left')
45+
self.assert_index_equal(joined.index, f.index)
46+
self.assert_index_equal(joined.columns, expected_columns)
47+
48+
joined = f.join(f2, how='right')
49+
self.assert_index_equal(joined.index, f2.index)
50+
self.assert_index_equal(joined.columns, expected_columns)
51+
52+
# inner
53+
54+
joined = f.join(f2, how='inner')
55+
self.assert_index_equal(joined.index, f.index[5:10])
56+
self.assert_index_equal(joined.columns, expected_columns)
57+
58+
# outer
59+
60+
joined = f.join(f2, how='outer')
61+
self.assert_index_equal(joined.index, self.frame.index.sort_values())
62+
self.assert_index_equal(joined.columns, expected_columns)
63+
64+
assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo')
65+
66+
# corner case - overlapping columns
67+
for how in ('outer', 'left', 'inner'):
68+
with assertRaisesRegexp(ValueError, 'columns overlap but '
69+
'no suffix'):
70+
self.frame.join(self.frame, how=how)
71+
72+
def test_join_index_more(self):
73+
af = self.frame.loc[:, ['A', 'B']]
74+
bf = self.frame.loc[::2, ['C', 'D']]
75+
76+
expected = af.copy()
77+
expected['C'] = self.frame['C'][::2]
78+
expected['D'] = self.frame['D'][::2]
79+
80+
result = af.join(bf)
81+
assert_frame_equal(result, expected)
82+
83+
result = af.join(bf, how='right')
84+
assert_frame_equal(result, expected[::2])
85+
86+
result = bf.join(af, how='right')
87+
assert_frame_equal(result, expected.loc[:, result.columns])
88+
89+
def test_join_index_series(self):
90+
df = self.frame.copy()
91+
s = df.pop(self.frame.columns[-1])
92+
joined = df.join(s)
93+
94+
# TODO should this check_names ?
95+
assert_frame_equal(joined, self.frame, check_names=False)
96+
97+
s.name = None
98+
assertRaisesRegexp(ValueError, 'must have a name', df.join, s)
99+
100+
def test_join_overlap(self):
101+
df1 = self.frame.loc[:, ['A', 'B', 'C']]
102+
df2 = self.frame.loc[:, ['B', 'C', 'D']]
103+
104+
joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2')
105+
df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1')
106+
df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2')
107+
108+
no_overlap = self.frame.loc[:, ['A', 'D']]
109+
expected = df1_suf.join(df2_suf).join(no_overlap)
110+
111+
# column order not necessarily sorted
112+
assert_frame_equal(joined, expected.loc[:, joined.columns])
113+

pandas/tests/frame/test_misc_api.py

-81
Original file line numberDiff line numberDiff line change
@@ -57,87 +57,6 @@ def test_get_value(self):
5757
expected = self.frame[col][idx]
5858
tm.assert_almost_equal(result, expected)
5959

60-
def test_join_index(self):
61-
# left / right
62-
63-
f = self.frame.reindex(columns=['A', 'B'])[:10]
64-
f2 = self.frame.reindex(columns=['C', 'D'])[5:][::-1]
65-
66-
joined = f.join(f2)
67-
self.assert_index_equal(f.index, joined.index)
68-
expected_columns = pd.Index(['A', 'B', 'C', 'D'])
69-
self.assert_index_equal(joined.columns, expected_columns)
70-
71-
joined = f.join(f2, how='left')
72-
self.assert_index_equal(joined.index, f.index)
73-
self.assert_index_equal(joined.columns, expected_columns)
74-
75-
joined = f.join(f2, how='right')
76-
self.assert_index_equal(joined.index, f2.index)
77-
self.assert_index_equal(joined.columns, expected_columns)
78-
79-
# inner
80-
81-
joined = f.join(f2, how='inner')
82-
self.assert_index_equal(joined.index, f.index.intersection(f2.index))
83-
self.assert_index_equal(joined.columns, expected_columns)
84-
85-
# outer
86-
87-
joined = f.join(f2, how='outer')
88-
self.assert_index_equal(joined.index, self.frame.index.sort_values())
89-
self.assert_index_equal(joined.columns, expected_columns)
90-
91-
assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo')
92-
93-
# corner case - overlapping columns
94-
for how in ('outer', 'left', 'inner'):
95-
with assertRaisesRegexp(ValueError, 'columns overlap but '
96-
'no suffix'):
97-
self.frame.join(self.frame, how=how)
98-
99-
def test_join_index_more(self):
100-
af = self.frame.loc[:, ['A', 'B']]
101-
bf = self.frame.loc[::2, ['C', 'D']]
102-
103-
expected = af.copy()
104-
expected['C'] = self.frame['C'][::2]
105-
expected['D'] = self.frame['D'][::2]
106-
107-
result = af.join(bf)
108-
assert_frame_equal(result, expected)
109-
110-
result = af.join(bf, how='right')
111-
assert_frame_equal(result, expected[::2])
112-
113-
result = bf.join(af, how='right')
114-
assert_frame_equal(result, expected.loc[:, result.columns])
115-
116-
def test_join_index_series(self):
117-
df = self.frame.copy()
118-
s = df.pop(self.frame.columns[-1])
119-
joined = df.join(s)
120-
121-
# TODO should this check_names ?
122-
assert_frame_equal(joined, self.frame, check_names=False)
123-
124-
s.name = None
125-
assertRaisesRegexp(ValueError, 'must have a name', df.join, s)
126-
127-
def test_join_overlap(self):
128-
df1 = self.frame.loc[:, ['A', 'B', 'C']]
129-
df2 = self.frame.loc[:, ['B', 'C', 'D']]
130-
131-
joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2')
132-
df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1')
133-
df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2')
134-
135-
no_overlap = self.frame.loc[:, ['A', 'D']]
136-
expected = df1_suf.join(df2_suf).join(no_overlap)
137-
138-
# column order not necessarily sorted
139-
assert_frame_equal(joined, expected.loc[:, joined.columns])
140-
14160
def test_add_prefix_suffix(self):
14261
with_prefix = self.frame.add_prefix('foo#')
14362
expected = pd.Index(['foo#%s' % c for c in self.frame.columns])

pandas/tests/indexes/test_base.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -629,13 +629,11 @@ def test_intersection(self):
629629
expected = Index([5, 3, 4], name='idx')
630630
result = idx1.intersection(idx2)
631631
self.assert_index_equal(result, expected)
632-
self.assertEqual(result.name, expected.name)
633632

634633
idx2 = Index([4, 7, 6, 5, 3], name='other')
635634
expected = Index([5, 3, 4], name=None)
636635
result = idx1.intersection(idx2)
637636
self.assert_index_equal(result, expected)
638-
self.assertEqual(result.name, expected.name)
639637

640638
# non-monotonic non-unique
641639
idx1 = Index(['A', 'B', 'A', 'C'])
@@ -644,6 +642,11 @@ def test_intersection(self):
644642
result = idx1.intersection(idx2)
645643
self.assert_index_equal(result, expected)
646644

645+
idx2 = Index(['B', 'D', 'A'])
646+
expected = Index(['A', 'B', 'A'], dtype='object')
647+
result = idx1.intersection(idx2)
648+
self.assert_index_equal(result, expected)
649+
647650
# preserve names
648651
first = self.strIndex[5:20]
649652
second = self.strIndex[:10]

pandas/tseries/index.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1203,8 +1203,8 @@ def intersection(self, other):
12031203
not other.offset.isAnchored() or
12041204
(not self.is_monotonic or not other.is_monotonic)):
12051205
result = Index.intersection(self, other)
1206-
result = self._simple_new(result._values, name=result.name,
1207-
tz=result.tz)
1206+
result = self._shallow_copy(result._values, name=result.name,
1207+
tz=result.tz, freq=None)
12081208
if result.freq is None:
12091209
result.offset = to_offset(result.inferred_freq)
12101210
return result

0 commit comments

Comments
 (0)