@@ -1487,21 +1487,20 @@ def test_union(self):
1487
1487
almost = True ,
1488
1488
)
1489
1489
1490
- if LooseVersion (pd .__version__ ) >= LooseVersion ("1.3" ):
1491
- # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
1492
- pass
1493
- else :
1494
- self .assert_eq (psidx2 .union (psidx1 ), pidx2 .union (pidx1 ))
1495
- self .assert_eq (
1496
- psidx2 .union ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ]),
1497
- pidx2 .union ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ]),
1498
- almost = True ,
1499
- )
1500
- self .assert_eq (
1501
- psidx2 .union (ps .Series ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ])),
1502
- pidx2 .union (pd .Series ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ])),
1503
- almost = True ,
1504
- )
1490
+ # Manually create the expected result here since there is a bug in Index.union
1491
+ # dropping duplicated values in pandas < 1.3.
1492
+ expected = pd .Index ([1 , 2 , 3 , 3 , 3 , 4 , 4 , 4 , 5 , 6 ])
1493
+ self .assert_eq (psidx2 .union (psidx1 ), expected )
1494
+ self .assert_eq (
1495
+ psidx2 .union ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ]),
1496
+ expected ,
1497
+ almost = True ,
1498
+ )
1499
+ self .assert_eq (
1500
+ psidx2 .union (ps .Series ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ])),
1501
+ expected ,
1502
+ almost = True ,
1503
+ )
1505
1504
1506
1505
# MultiIndex
1507
1506
pmidx1 = pd .MultiIndex .from_tuples ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )])
@@ -1513,80 +1512,85 @@ def test_union(self):
1513
1512
psmidx3 = ps .from_pandas (pmidx3 )
1514
1513
psmidx4 = ps .from_pandas (pmidx4 )
1515
1514
1516
- if LooseVersion (pd .__version__ ) >= LooseVersion ("1.3" ):
1517
- # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
1518
- pass
1519
- else :
1520
- self .assert_eq (psmidx1 .union (psmidx2 ), pmidx1 .union (pmidx2 ))
1521
- self .assert_eq (psmidx2 .union (psmidx1 ), pmidx2 .union (pmidx1 ))
1522
- self .assert_eq (psmidx3 .union (psmidx4 ), pmidx3 .union (pmidx4 ))
1523
- self .assert_eq (psmidx4 .union (psmidx3 ), pmidx4 .union (pmidx3 ))
1524
- self .assert_eq (
1525
- psmidx1 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]),
1526
- pmidx1 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]),
1527
- )
1528
- self .assert_eq (
1529
- psmidx2 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )]),
1530
- pmidx2 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )]),
1531
- )
1532
- self .assert_eq (
1533
- psmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]),
1534
- pmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]),
1535
- )
1536
- self .assert_eq (
1537
- psmidx4 .union ([(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )]),
1538
- pmidx4 .union ([(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )]),
1539
- )
1515
+ # Manually create the expected result here since there is a bug in MultiIndex.union
1516
+ # dropping duplicated values in pandas < 1.3.
1517
+ expected = pd .MultiIndex .from_tuples (
1518
+ [("x" , "a" ), ("x" , "a" ), ("x" , "b" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]
1519
+ )
1520
+ self .assert_eq (psmidx1 .union (psmidx2 ), expected )
1521
+ self .assert_eq (psmidx2 .union (psmidx1 ), expected )
1522
+ self .assert_eq (
1523
+ psmidx1 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]),
1524
+ expected ,
1525
+ )
1526
+ self .assert_eq (
1527
+ psmidx2 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )]),
1528
+ expected ,
1529
+ )
1530
+
1531
+ expected = pd .MultiIndex .from_tuples (
1532
+ [(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 3 ), (1 , 4 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]
1533
+ )
1534
+ self .assert_eq (psmidx3 .union (psmidx4 ), expected )
1535
+ self .assert_eq (psmidx4 .union (psmidx3 ), expected )
1536
+ self .assert_eq (
1537
+ psmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]),
1538
+ expected ,
1539
+ )
1540
+ self .assert_eq (
1541
+ psmidx4 .union ([(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )]),
1542
+ expected ,
1543
+ )
1540
1544
1541
- if LooseVersion (pd .__version__ ) >= LooseVersion ("1.3" ):
1542
- # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
1543
- pass
1544
1545
# Testing if the result is correct after sort=False.
1545
1546
# The `sort` argument is added in pandas 0.24.
1546
- elif LooseVersion (pd .__version__ ) >= LooseVersion ("0.24" ):
1547
+ if LooseVersion (pd .__version__ ) >= LooseVersion ("0.24" ):
1548
+ # Manually create the expected result here since there is a bug in MultiIndex.union
1549
+ # dropping duplicated values in pandas < 1.3.
1550
+ expected = pd .MultiIndex .from_tuples (
1551
+ [("x" , "a" ), ("x" , "a" ), ("x" , "b" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]
1552
+ )
1547
1553
self .assert_eq (
1548
1554
psmidx1 .union (psmidx2 , sort = False ).sort_values (),
1549
- pmidx1 . union ( pmidx2 , sort = False ). sort_values () ,
1555
+ expected ,
1550
1556
)
1551
1557
self .assert_eq (
1552
1558
psmidx2 .union (psmidx1 , sort = False ).sort_values (),
1553
- pmidx2 .union (pmidx1 , sort = False ).sort_values (),
1554
- )
1555
- self .assert_eq (
1556
- psmidx3 .union (psmidx4 , sort = False ).sort_values (),
1557
- pmidx3 .union (pmidx4 , sort = False ).sort_values (),
1558
- )
1559
- self .assert_eq (
1560
- psmidx4 .union (psmidx3 , sort = False ).sort_values (),
1561
- pmidx4 .union (pmidx3 , sort = False ).sort_values (),
1559
+ expected ,
1562
1560
)
1563
1561
self .assert_eq (
1564
1562
psmidx1 .union (
1565
1563
[("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )], sort = False
1566
1564
).sort_values (),
1567
- pmidx1 .union (
1568
- [("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )], sort = False
1569
- ).sort_values (),
1565
+ expected ,
1570
1566
)
1571
1567
self .assert_eq (
1572
1568
psmidx2 .union (
1573
1569
[("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )], sort = False
1574
1570
).sort_values (),
1575
- pmidx2 .union (
1576
- [("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )], sort = False
1577
- ).sort_values (),
1571
+ expected ,
1572
+ )
1573
+
1574
+ expected = pd .MultiIndex .from_tuples (
1575
+ [(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 3 ), (1 , 4 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]
1576
+ )
1577
+ self .assert_eq (
1578
+ psmidx3 .union (psmidx4 , sort = False ).sort_values (),
1579
+ expected ,
1580
+ )
1581
+ self .assert_eq (
1582
+ psmidx4 .union (psmidx3 , sort = False ).sort_values (),
1583
+ expected ,
1578
1584
)
1579
1585
self .assert_eq (
1580
1586
psmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )], sort = False ).sort_values (),
1581
- pmidx3 . union ([( 1 , 3 ), ( 1 , 4 ), ( 1 , 5 ), ( 1 , 6 )], sort = False ). sort_values () ,
1587
+ expected ,
1582
1588
)
1583
1589
self .assert_eq (
1584
1590
psmidx4 .union (
1585
1591
[(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )], sort = False
1586
1592
).sort_values (),
1587
- pmidx4 .union (
1588
- [(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )], sort = False
1589
- ).sort_values (),
1593
+ expected ,
1590
1594
)
1591
1595
1592
1596
self .assertRaises (NotImplementedError , lambda : psidx1 .union (psmidx1 ))
0 commit comments