@@ -39,19 +39,19 @@ a simple example:
39
39
' B' : [' B0' , ' B1' , ' B2' , ' B3' ],
40
40
' C' : [' C0' , ' C1' , ' C2' , ' C3' ],
41
41
' D' : [' D0' , ' D1' , ' D2' , ' D3' ]},
42
- index = [0 , 1 , 2 , 3 ])
42
+ index = [0 , 1 , 2 , 3 ])
43
43
44
44
df2 = pd.DataFrame({' A' : [' A4' , ' A5' , ' A6' , ' A7' ],
45
45
' B' : [' B4' , ' B5' , ' B6' , ' B7' ],
46
46
' C' : [' C4' , ' C5' , ' C6' , ' C7' ],
47
47
' D' : [' D4' , ' D5' , ' D6' , ' D7' ]},
48
- index = [4 , 5 , 6 , 7 ])
48
+ index = [4 , 5 , 6 , 7 ])
49
49
50
50
df3 = pd.DataFrame({' A' : [' A8' , ' A9' , ' A10' , ' A11' ],
51
51
' B' : [' B8' , ' B9' , ' B10' , ' B11' ],
52
52
' C' : [' C8' , ' C9' , ' C10' , ' C11' ],
53
53
' D' : [' D8' , ' D9' , ' D10' , ' D11' ]},
54
- index = [8 , 9 , 10 , 11 ])
54
+ index = [8 , 9 , 10 , 11 ])
55
55
56
56
frames = [df1, df2, df3]
57
57
result = pd.concat(frames)
@@ -380,7 +380,7 @@ Through the ``keys`` argument we can override the existing column names.
380
380
381
381
.. ipython :: python
382
382
383
- pd.concat([s3, s4, s5], axis = 1 , keys = [' red' ,' blue' ,' yellow' ])
383
+ pd.concat([s3, s4, s5], axis = 1 , keys = [' red' , ' blue' , ' yellow' ])
384
384
385
385
Let's consider a variation of the very first example presented:
386
386
@@ -437,8 +437,8 @@ do so using the ``levels`` argument:
437
437
.. ipython :: python
438
438
439
439
result = pd.concat(pieces, keys = [' x' , ' y' , ' z' ],
440
- levels = [[' z' , ' y' , ' x' , ' w' ]],
441
- names = [' group_key' ])
440
+ levels = [[' z' , ' y' , ' x' , ' w' ]],
441
+ names = [' group_key' ])
442
442
443
443
.. ipython :: python
444
444
:suppress:
@@ -726,9 +726,9 @@ Here is another example with duplicate join keys in DataFrames:
726
726
727
727
.. ipython :: python
728
728
729
- left = pd.DataFrame({' A' : [1 ,2 ], ' B' : [2 , 2 ]})
729
+ left = pd.DataFrame({' A' : [1 , 2 ], ' B' : [2 , 2 ]})
730
730
731
- right = pd.DataFrame({' A' : [4 ,5 , 6 ], ' B' : [2 ,2 , 2 ]})
731
+ right = pd.DataFrame({' A' : [4 , 5 , 6 ], ' B' : [2 , 2 , 2 ]})
732
732
733
733
result = pd.merge(left, right, on = ' B' , how = ' outer' )
734
734
@@ -801,8 +801,8 @@ that takes on values:
801
801
802
802
.. ipython :: python
803
803
804
- df1 = pd.DataFrame({' col1' : [0 , 1 ], ' col_left' :[' a' , ' b' ]})
805
- df2 = pd.DataFrame({' col1' : [1 , 2 , 2 ],' col_right' :[2 , 2 , 2 ]})
804
+ df1 = pd.DataFrame({' col1' : [0 , 1 ], ' col_left' : [' a' , ' b' ]})
805
+ df2 = pd.DataFrame({' col1' : [1 , 2 , 2 ], ' col_right' : [2 , 2 , 2 ]})
806
806
pd.merge(df1, df2, on = ' col1' , how = ' outer' , indicator = True )
807
807
808
808
The ``indicator `` argument will also accept string arguments, in which case the indicator function will use the value of the passed string as the name for the indicator column.
@@ -857,19 +857,18 @@ The left frame.
857
857
X = X.astype(CategoricalDtype(categories = [' foo' , ' bar' ]))
858
858
859
859
left = pd.DataFrame({' X' : X,
860
- ' Y' : np.random.choice([' one' , ' two' , ' three' ], size = (10 ,))})
860
+ ' Y' : np.random.choice([' one' , ' two' , ' three' ],
861
+ size = (10 ,))})
861
862
left
862
863
left.dtypes
863
864
864
865
The right frame.
865
866
866
867
.. ipython :: python
867
868
868
- right = pd.DataFrame({
869
- ' X' : pd.Series([' foo' , ' bar' ],
870
- dtype = CategoricalDtype([' foo' , ' bar' ])),
871
- ' Z' : [1 , 2 ]
872
- })
869
+ right = pd.DataFrame({' X' : pd.Series([' foo' , ' bar' ],
870
+ dtype = CategoricalDtype([' foo' , ' bar' ])),
871
+ ' Z' : [1 , 2 ]})
873
872
right
874
873
right.dtypes
875
874
@@ -903,11 +902,11 @@ potentially differently-indexed ``DataFrames`` into a single result
903
902
904
903
left = pd.DataFrame({' A' : [' A0' , ' A1' , ' A2' ],
905
904
' B' : [' B0' , ' B1' , ' B2' ]},
906
- index = [' K0' , ' K1' , ' K2' ])
905
+ index = [' K0' , ' K1' , ' K2' ])
907
906
908
907
right = pd.DataFrame({' C' : [' C0' , ' C2' , ' C3' ],
909
908
' D' : [' D0' , ' D2' , ' D3' ]},
910
- index = [' K0' , ' K2' , ' K3' ])
909
+ index = [' K0' , ' K2' , ' K3' ])
911
910
912
911
result = left.join(right)
913
912
@@ -999,7 +998,7 @@ join key), using ``join`` may be more convenient. Here is a simple example:
999
998
1000
999
right = pd.DataFrame({' C' : [' C0' , ' C1' ],
1001
1000
' D' : [' D0' , ' D1' ]},
1002
- index = [' K0' , ' K1' ])
1001
+ index = [' K0' , ' K1' ])
1003
1002
1004
1003
result = left.join(right, on = ' key' )
1005
1004
@@ -1038,8 +1037,8 @@ To join on multiple keys, the passed DataFrame must have a ``MultiIndex``:
1038
1037
index = pd.MultiIndex.from_tuples([(' K0' , ' K0' ), (' K1' , ' K0' ),
1039
1038
(' K2' , ' K0' ), (' K2' , ' K1' )])
1040
1039
right = pd.DataFrame({' C' : [' C0' , ' C1' , ' C2' , ' C3' ],
1041
- ' D' : [' D0' , ' D1' , ' D2' , ' D3' ]},
1042
- index = index)
1040
+ ' D' : [' D0' , ' D1' , ' D2' , ' D3' ]},
1041
+ index = index)
1043
1042
1044
1043
Now this can be joined by passing the two key column names:
1045
1044
@@ -1134,12 +1133,12 @@ the left argument, as in this example:
1134
1133
1135
1134
leftindex = pd.MultiIndex.from_product([list (' abc' ), list (' xy' ), [1 , 2 ]],
1136
1135
names = [' abc' , ' xy' , ' num' ])
1137
- left = pd.DataFrame({' v1' : range (12 )}, index = leftindex)
1136
+ left = pd.DataFrame({' v1' : range (12 )}, index = leftindex)
1138
1137
left
1139
1138
1140
1139
rightindex = pd.MultiIndex.from_product([list (' abc' ), list (' xy' )],
1141
1140
names = [' abc' , ' xy' ])
1142
- right = pd.DataFrame({' v2' : [100 * i for i in range (1 , 7 )]}, index = rightindex)
1141
+ right = pd.DataFrame({' v2' : [100 * i for i in range (1 , 7 )]}, index = rightindex)
1143
1142
right
1144
1143
1145
1144
left.join(right, on = [' abc' , ' xy' ], how = ' inner' )
@@ -1154,17 +1153,17 @@ done using the following code.
1154
1153
names = [' key' , ' X' ])
1155
1154
left = pd.DataFrame({' A' : [' A0' , ' A1' , ' A2' ],
1156
1155
' B' : [' B0' , ' B1' , ' B2' ]},
1157
- index = leftindex)
1156
+ index = leftindex)
1158
1157
1159
1158
rightindex = pd.MultiIndex.from_tuples([(' K0' , ' Y0' ), (' K1' , ' Y1' ),
1160
1159
(' K2' , ' Y2' ), (' K2' , ' Y3' )],
1161
1160
names = [' key' , ' Y' ])
1162
1161
right = pd.DataFrame({' C' : [' C0' , ' C1' , ' C2' , ' C3' ],
1163
1162
' D' : [' D0' , ' D1' , ' D2' , ' D3' ]},
1164
- index = rightindex)
1163
+ index = rightindex)
1165
1164
1166
1165
result = pd.merge(left.reset_index(), right.reset_index(),
1167
- on = [' key' ], how = ' inner' ).set_index([' key' ,' X' ,' Y' ])
1166
+ on = [' key' ], how = ' inner' ).set_index([' key' , ' X' , ' Y' ])
1168
1167
1169
1168
.. ipython :: python
1170
1169
:suppress:
0 commit comments