@@ -1208,8 +1208,8 @@ def test_join_multi_levels2(self):
1208
1208
.reindex (columns = ['share' , 'log_return' ]))
1209
1209
1210
1210
result = (merge (household .reset_index (), log_return .reset_index (),
1211
- on = ['asset_id' ], how = 'outer' )
1212
- .set_index (['household_id' , 'asset_id' , 't' ]))
1211
+ on = ['asset_id' ], how = 'outer' )
1212
+ .set_index (['household_id' , 'asset_id' , 't' ]))
1213
1213
1214
1214
assert_frame_equal (result , expected )
1215
1215
@@ -1220,132 +1220,133 @@ def test_join_multi_levels3(self):
1220
1220
pd .DataFrame (
1221
1221
dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1222
1222
Destination = [1 , 2 , 1 , 3 , 1 ],
1223
- Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ],
1223
+ Period = ['AM' , 'PM' , 'IP' , 'AM' , 'OP' ],
1224
1224
TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1225
1225
Trips = [1987 , 3647 , 2470 , 4296 , 4444 ]),
1226
1226
columns = ['Origin' , 'Destination' , 'Period' ,
1227
1227
'TripPurp' , 'Trips' ])
1228
1228
.set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1229
-
1229
+
1230
1230
distances = (
1231
1231
pd .DataFrame (
1232
- dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1232
+ dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1233
1233
Destination = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1234
- Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ,'IP' , 'AM' ],
1234
+ Period = ['AM' , 'PM' , 'IP' , 'AM' , 'OP' , 'IP' , 'AM' ],
1235
1235
LinkType = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1236
1236
Distance = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1237
- columns = ['Origin' , 'Destination' , 'Period' ,
1237
+ columns = ['Origin' , 'Destination' , 'Period' ,
1238
1238
'LinkType' , 'Distance' ])
1239
- .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1240
-
1239
+ .set_index (['Origin' , 'Destination' , 'Period' , 'LinkType' ]))
1240
+
1241
1241
expected = (
1242
1242
pd .DataFrame (
1243
1243
dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1244
1244
Destination = [1 , 2 , 1 , 3 , 1 ],
1245
- Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ],
1245
+ Period = ['AM' , 'PM' , 'IP' , 'AM' , 'OP' ],
1246
1246
TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1247
1247
Trips = [1987 , 3647 , 2470 , 4296 , 4444 ],
1248
1248
Trips_joined = [1987 , 3647 , 2470 , 4296 , 4444 ]),
1249
1249
columns = ['Origin' , 'Destination' , 'Period' ,
1250
1250
'TripPurp' , 'Trips' , 'Trips_joined' ])
1251
1251
.set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1252
-
1253
- result = matrix .join (matrix , how = 'inner' , rsuffix = '_joined' )
1252
+
1253
+ result = matrix .join (matrix , how = 'inner' , rsuffix = '_joined' )
1254
1254
assert_frame_equal (result , expected )
1255
-
1256
- #Left join
1255
+
1256
+ # Left join
1257
1257
expected = (
1258
1258
pd .DataFrame (
1259
- dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1259
+ dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1260
1260
Destination = [1 , 2 , 1 , 3 , 1 ],
1261
- Period = ['AM' ,'PM' ,'IP' , 'AM' , 'OP' ],
1261
+ Period = ['AM' , 'PM' , 'IP' , 'AM' , 'OP' ],
1262
1262
TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1263
1263
Trips = [1987 , 3647 , 2470 , 4296 , 4444 ],
1264
1264
Distance = [100 , 80 , 90 , np .nan , 75 ]),
1265
- columns = ['Origin' , 'Destination' , 'Period' , 'TripPurp' ,
1265
+ columns = ['Origin' , 'Destination' , 'Period' , 'TripPurp' ,
1266
1266
'Trips' , 'Distance' ])
1267
1267
.set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1268
-
1268
+
1269
1269
result = matrix .join (distances , how = 'left' )
1270
1270
assert_frame_equal (result , expected )
1271
-
1272
- #Right join
1271
+
1272
+ # Right join
1273
1273
expected = (
1274
1274
pd .DataFrame (
1275
- dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1275
+ dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1276
1276
Destination = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1277
- Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ,'IP' , 'AM' ],
1277
+ Period = ['AM' , 'PM' , 'IP' , 'AM' , 'OP' , 'IP' , 'AM' ],
1278
1278
LinkType = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1279
1279
Trips = [1987 , 3647 , 2470 , np .nan , 4444 , np .nan , np .nan ],
1280
1280
Distance = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1281
- columns = ['Origin' , 'Destination' , 'Period' ,
1281
+ columns = ['Origin' , 'Destination' , 'Period' ,
1282
1282
'LinkType' , 'Trips' , 'Distance' ])
1283
- .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1284
-
1283
+ .set_index (['Origin' , 'Destination' , 'Period' , 'LinkType' ]))
1284
+
1285
1285
result = matrix .join (distances , how = 'right' )
1286
1286
assert_frame_equal (result , expected )
1287
-
1288
- #Inner join
1287
+
1288
+ # Inner join
1289
1289
expected = (
1290
1290
pd .DataFrame (
1291
- dict (Origin = [1 , 1 , 2 , 3 ],
1291
+ dict (Origin = [1 , 1 , 2 , 3 ],
1292
1292
Destination = [1 , 2 , 1 , 1 ],
1293
- Period = ['AM' ,'PM' ,'IP' , 'OP' ],
1293
+ Period = ['AM' , 'PM' , 'IP' , 'OP' ],
1294
1294
Trips = [1987 , 3647 , 2470 , 4444 ],
1295
1295
Distance = [100 , 80 , 90 , 75 ]),
1296
- columns = ['Origin' , 'Destination' , 'Period' , 'Trips' , 'Distance' ])
1296
+ columns = ['Origin' , 'Destination' , 'Period' ,
1297
+ 'Trips' , 'Distance' ])
1297
1298
.set_index (['Origin' , 'Destination' , 'Period' ]))
1298
-
1299
+
1299
1300
result = matrix .join (distances , how = 'inner' )
1300
1301
assert_frame_equal (result , expected )
1301
1302
1302
- #Outer join
1303
+ # Outer join
1303
1304
expected = (
1304
1305
pd .DataFrame (
1305
- dict (Origin = [1 , 1 , 2 , 2 , 2 , 3 , 3 , 5 ],
1306
+ dict (Origin = [1 , 1 , 2 , 2 , 2 , 3 , 3 , 5 ],
1306
1307
Destination = [1 , 2 , 1 , 2 , 3 , 1 , 2 , 6 ],
1307
- Period = ['AM' ,'PM' ,'IP' , 'AM' , 'AM' , 'OP' , 'IP' , 'AM' ],
1308
+ Period = ['AM' , 'PM' , 'IP' , 'AM' , 'AM' , 'OP' , 'IP' , 'AM' ],
1308
1309
TripPurp = ['hbw' , 'nhb' , 'hbo' , np .nan , 'nhb' ,
1309
1310
'hbw' , np .nan , np .nan ],
1310
1311
LinkType = ['a' , 'a' , 'c' , 'b' , np .nan , 'a' , 'b' , 'a' ],
1311
- Trips = [1987 , 3647 , 2470 , np .nan , 4296 , 4444 , np .nan , np .nan ],
1312
+ Trips = [1987 , 3647 , 2470 , np .nan ,
1313
+ 4296 , 4444 , np .nan , np .nan ],
1312
1314
Distance = [100 , 80 , 90 , 80 , np .nan , 75 , 35 , 55 ]),
1313
- columns = ['Origin' , 'Destination' , 'Period' , 'TripPurp' , 'LinkType' ,
1314
- 'Trips' , 'Distance' ])
1315
- .set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' , 'LinkType' ]))
1316
-
1317
-
1315
+ columns = ['Origin' , 'Destination' , 'Period' , 'TripPurp' ,
1316
+ 'LinkType' , ' Trips' , 'Distance' ])
1317
+ .set_index (['Origin' , 'Destination' , 'Period' ,
1318
+ 'TripPurp' , 'LinkType' ]))
1319
+
1318
1320
result = matrix .join (distances , how = 'outer' )
1319
1321
assert_frame_equal (result , expected )
1320
-
1321
- #Non-unique resulting index
1322
+
1323
+ # Non-unique resulting index
1322
1324
distances2 = (
1323
1325
pd .DataFrame (
1324
- dict (Origin = [1 , 1 , 2 ],
1326
+ dict (Origin = [1 , 1 , 2 ],
1325
1327
Destination = [1 , 1 , 1 ],
1326
- Period = ['AM' ,'AM' , 'PM' ],
1328
+ Period = ['AM' , 'AM' , 'PM' ],
1327
1329
LinkType = ['a' , 'b' , 'a' ],
1328
1330
Distance = [100 , 110 , 120 ]),
1329
- columns = ['Origin' , 'Destination' , 'Period' ,
1331
+ columns = ['Origin' , 'Destination' , 'Period' ,
1330
1332
'LinkType' , 'Distance' ])
1331
- .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1332
-
1333
+ .set_index (['Origin' , 'Destination' , 'Period' , 'LinkType' ]))
1334
+
1333
1335
def f ():
1334
1336
matrix .join (distances2 , how = 'left' )
1335
1337
pytest .raises (TypeError , f )
1336
-
1337
- #No-overlapping level names
1338
+
1339
+ # No-overlapping level names
1338
1340
distances2 = (
1339
1341
pd .DataFrame (
1340
- dict (Orig = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1342
+ dict (Orig = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1341
1343
Dest = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1342
- Per = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ,'IP' , 'AM' ],
1344
+ Per = ['AM' , 'PM' , 'IP' , 'AM' , 'OP' , 'IP' , 'AM' ],
1343
1345
LinkTyp = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1344
1346
Dist = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1345
- columns = ['Orig' , 'Dest' , 'Per' ,
1346
- 'LinkTyp' , 'Dist' ])
1347
- .set_index (['Orig' , 'Dest' ,'Per' , 'LinkTyp' ]))
1348
-
1347
+ columns = ['Orig' , 'Dest' , 'Per' , 'LinkTyp' , 'Dist' ])
1348
+ .set_index (['Orig' , 'Dest' , 'Per' , 'LinkTyp' ]))
1349
+
1349
1350
def f ():
1350
1351
matrix .join (distances2 , how = 'left' )
1351
1352
pytest .raises (ValueError , f )
@@ -1355,29 +1356,29 @@ def f():
1355
1356
pd .DataFrame (
1356
1357
dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1357
1358
Destination = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1358
- Period = [np .nan , np . nan , np . nan , np . nan , np . nan , np . nan , np . nan ] ,
1359
+ Period = [np .nan ] * 7 ,
1359
1360
LinkType = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1360
1361
Distance = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1361
- columns = ['Origin' , 'Destination' , 'Period' ,
1362
+ columns = ['Origin' , 'Destination' , 'Period' ,
1362
1363
'LinkType' , 'Distance' ])
1363
- .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1364
-
1365
-
1364
+ .set_index (['Origin' , 'Destination' , 'Period' , 'LinkType' ]))
1365
+
1366
1366
expected = (
1367
1367
pd .DataFrame (
1368
1368
dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1369
1369
Destination = [1 , 2 , 1 , 3 , 1 ],
1370
- Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ],
1370
+ Period = ['AM' , 'PM' , 'IP' , 'AM' , 'OP' ],
1371
1371
TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1372
1372
Trips = [1987 , 3647 , 2470 , 4296 , 4444 ],
1373
- Distance = [np .nan , np . nan , np . nan , np . nan , np . nan ] ),
1373
+ Distance = [np .nan ] * 5 ),
1374
1374
columns = ['Origin' , 'Destination' , 'Period' ,
1375
1375
'TripPurp' , 'Trips' , 'Distance' ])
1376
1376
.set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1377
-
1377
+
1378
1378
result = matrix .join (distances2 , how = 'left' )
1379
1379
assert_frame_equal (result , expected )
1380
1380
1381
+
1381
1382
@pytest .fixture
1382
1383
def df ():
1383
1384
return DataFrame (
0 commit comments