@@ -1527,21 +1527,20 @@ def test_union(self):
1527
1527
almost = True ,
1528
1528
)
1529
1529
1530
- if LooseVersion (pd .__version__ ) >= LooseVersion ("1.3" ):
1531
- # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
1532
- pass
1533
- else :
1534
- self .assert_eq (psidx2 .union (psidx1 ), pidx2 .union (pidx1 ))
1535
- self .assert_eq (
1536
- psidx2 .union ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ]),
1537
- pidx2 .union ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ]),
1538
- almost = True ,
1539
- )
1540
- self .assert_eq (
1541
- psidx2 .union (ps .Series ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ])),
1542
- pidx2 .union (pd .Series ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ])),
1543
- almost = True ,
1544
- )
1530
+ # Manually create the expected result here since there is a bug in Index.union
1531
+ # dropping duplicated values in pandas < 1.3.
1532
+ expected = pd .Index ([1 , 2 , 3 , 3 , 3 , 4 , 4 , 4 , 5 , 6 ])
1533
+ self .assert_eq (psidx2 .union (psidx1 ), expected )
1534
+ self .assert_eq (
1535
+ psidx2 .union ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ]),
1536
+ expected ,
1537
+ almost = True ,
1538
+ )
1539
+ self .assert_eq (
1540
+ psidx2 .union (ps .Series ([1 , 2 , 3 , 4 , 3 , 4 , 3 , 4 ])),
1541
+ expected ,
1542
+ almost = True ,
1543
+ )
1545
1544
1546
1545
# MultiIndex
1547
1546
pmidx1 = pd .MultiIndex .from_tuples ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )])
@@ -1553,80 +1552,85 @@ def test_union(self):
1553
1552
psmidx3 = ps .from_pandas (pmidx3 )
1554
1553
psmidx4 = ps .from_pandas (pmidx4 )
1555
1554
1556
- if LooseVersion (pd .__version__ ) >= LooseVersion ("1.3" ):
1557
- # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
1558
- pass
1559
- else :
1560
- self .assert_eq (psmidx1 .union (psmidx2 ), pmidx1 .union (pmidx2 ))
1561
- self .assert_eq (psmidx2 .union (psmidx1 ), pmidx2 .union (pmidx1 ))
1562
- self .assert_eq (psmidx3 .union (psmidx4 ), pmidx3 .union (pmidx4 ))
1563
- self .assert_eq (psmidx4 .union (psmidx3 ), pmidx4 .union (pmidx3 ))
1564
- self .assert_eq (
1565
- psmidx1 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]),
1566
- pmidx1 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]),
1567
- )
1568
- self .assert_eq (
1569
- psmidx2 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )]),
1570
- pmidx2 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )]),
1571
- )
1572
- self .assert_eq (
1573
- psmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]),
1574
- pmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]),
1575
- )
1576
- self .assert_eq (
1577
- psmidx4 .union ([(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )]),
1578
- pmidx4 .union ([(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )]),
1579
- )
1555
+ # Manually create the expected result here since there is a bug in MultiIndex.union
1556
+ # dropping duplicated values in pandas < 1.3.
1557
+ expected = pd .MultiIndex .from_tuples (
1558
+ [("x" , "a" ), ("x" , "a" ), ("x" , "b" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]
1559
+ )
1560
+ self .assert_eq (psmidx1 .union (psmidx2 ), expected )
1561
+ self .assert_eq (psmidx2 .union (psmidx1 ), expected )
1562
+ self .assert_eq (
1563
+ psmidx1 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]),
1564
+ expected ,
1565
+ )
1566
+ self .assert_eq (
1567
+ psmidx2 .union ([("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )]),
1568
+ expected ,
1569
+ )
1570
+
1571
+ expected = pd .MultiIndex .from_tuples (
1572
+ [(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 3 ), (1 , 4 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]
1573
+ )
1574
+ self .assert_eq (psmidx3 .union (psmidx4 ), expected )
1575
+ self .assert_eq (psmidx4 .union (psmidx3 ), expected )
1576
+ self .assert_eq (
1577
+ psmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]),
1578
+ expected ,
1579
+ )
1580
+ self .assert_eq (
1581
+ psmidx4 .union ([(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )]),
1582
+ expected ,
1583
+ )
1580
1584
1581
- if LooseVersion (pd .__version__ ) >= LooseVersion ("1.3" ):
1582
- # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
1583
- pass
1584
1585
# Testing if the result is correct after sort=False.
1585
1586
# The `sort` argument is added in pandas 0.24.
1586
- elif LooseVersion (pd .__version__ ) >= LooseVersion ("0.24" ):
1587
+ if LooseVersion (pd .__version__ ) >= LooseVersion ("0.24" ):
1588
+ # Manually create the expected result here since there is a bug in MultiIndex.union
1589
+ # dropping duplicated values in pandas < 1.3.
1590
+ expected = pd .MultiIndex .from_tuples (
1591
+ [("x" , "a" ), ("x" , "a" ), ("x" , "b" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )]
1592
+ )
1587
1593
self .assert_eq (
1588
1594
psmidx1 .union (psmidx2 , sort = False ).sort_values (),
1589
- pmidx1 . union ( pmidx2 , sort = False ). sort_values () ,
1595
+ expected ,
1590
1596
)
1591
1597
self .assert_eq (
1592
1598
psmidx2 .union (psmidx1 , sort = False ).sort_values (),
1593
- pmidx2 .union (pmidx1 , sort = False ).sort_values (),
1594
- )
1595
- self .assert_eq (
1596
- psmidx3 .union (psmidx4 , sort = False ).sort_values (),
1597
- pmidx3 .union (pmidx4 , sort = False ).sort_values (),
1598
- )
1599
- self .assert_eq (
1600
- psmidx4 .union (psmidx3 , sort = False ).sort_values (),
1601
- pmidx4 .union (pmidx3 , sort = False ).sort_values (),
1599
+ expected ,
1602
1600
)
1603
1601
self .assert_eq (
1604
1602
psmidx1 .union (
1605
1603
[("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )], sort = False
1606
1604
).sort_values (),
1607
- pmidx1 .union (
1608
- [("x" , "a" ), ("x" , "b" ), ("x" , "c" ), ("x" , "d" )], sort = False
1609
- ).sort_values (),
1605
+ expected ,
1610
1606
)
1611
1607
self .assert_eq (
1612
1608
psmidx2 .union (
1613
1609
[("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )], sort = False
1614
1610
).sort_values (),
1615
- pmidx2 .union (
1616
- [("x" , "a" ), ("x" , "b" ), ("x" , "a" ), ("x" , "b" )], sort = False
1617
- ).sort_values (),
1611
+ expected ,
1612
+ )
1613
+
1614
+ expected = pd .MultiIndex .from_tuples (
1615
+ [(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 3 ), (1 , 4 ), (1 , 4 ), (1 , 5 ), (1 , 6 )]
1616
+ )
1617
+ self .assert_eq (
1618
+ psmidx3 .union (psmidx4 , sort = False ).sort_values (),
1619
+ expected ,
1620
+ )
1621
+ self .assert_eq (
1622
+ psmidx4 .union (psmidx3 , sort = False ).sort_values (),
1623
+ expected ,
1618
1624
)
1619
1625
self .assert_eq (
1620
1626
psmidx3 .union ([(1 , 3 ), (1 , 4 ), (1 , 5 ), (1 , 6 )], sort = False ).sort_values (),
1621
- pmidx3 . union ([( 1 , 3 ), ( 1 , 4 ), ( 1 , 5 ), ( 1 , 6 )], sort = False ). sort_values () ,
1627
+ expected ,
1622
1628
)
1623
1629
self .assert_eq (
1624
1630
psmidx4 .union (
1625
1631
[(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )], sort = False
1626
1632
).sort_values (),
1627
- pmidx4 .union (
1628
- [(1 , 1 ), (1 , 2 ), (1 , 3 ), (1 , 4 ), (1 , 3 ), (1 , 4 )], sort = False
1629
- ).sort_values (),
1633
+ expected ,
1630
1634
)
1631
1635
1632
1636
self .assertRaises (NotImplementedError , lambda : psidx1 .union (psmidx1 ))
0 commit comments