|
8 | 8 | read_csv, isnull, Series, date_range,
|
9 | 9 | Index, Panel, MultiIndex, Timestamp,
|
10 | 10 | DatetimeIndex, Categorical, CategoricalIndex)
|
11 |
| -from pandas.types.concat import union_categoricals |
12 | 11 | from pandas.util import testing as tm
|
13 | 12 | from pandas.util.testing import (assert_frame_equal,
|
14 | 13 | makeCustomDataframe as mkdf,
|
@@ -1511,337 +1510,6 @@ def test_concat_keys_with_none(self):
|
1511 | 1510 | keys=['b', 'c', 'd', 'e'])
|
1512 | 1511 | tm.assert_frame_equal(result, expected)
|
1513 | 1512 |
|
1514 |
| - def test_union_categorical(self): |
1515 |
| - # GH 13361 |
1516 |
| - data = [ |
1517 |
| - (list('abc'), list('abd'), list('abcabd')), |
1518 |
| - ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), |
1519 |
| - ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), |
1520 |
| - |
1521 |
| - (['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'], |
1522 |
| - ['b', 'b', np.nan, 'a', 'a', np.nan, 'c']), |
1523 |
| - |
1524 |
| - (pd.date_range('2014-01-01', '2014-01-05'), |
1525 |
| - pd.date_range('2014-01-06', '2014-01-07'), |
1526 |
| - pd.date_range('2014-01-01', '2014-01-07')), |
1527 |
| - |
1528 |
| - (pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'), |
1529 |
| - pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'), |
1530 |
| - pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')), |
1531 |
| - |
1532 |
| - (pd.period_range('2014-01-01', '2014-01-05'), |
1533 |
| - pd.period_range('2014-01-06', '2014-01-07'), |
1534 |
| - pd.period_range('2014-01-01', '2014-01-07')), |
1535 |
| - ] |
1536 |
| - |
1537 |
| - for a, b, combined in data: |
1538 |
| - for box in [Categorical, CategoricalIndex, Series]: |
1539 |
| - result = union_categoricals([box(Categorical(a)), |
1540 |
| - box(Categorical(b))]) |
1541 |
| - expected = Categorical(combined) |
1542 |
| - tm.assert_categorical_equal(result, expected, |
1543 |
| - check_category_order=True) |
1544 |
| - |
1545 |
| - # new categories ordered by appearance |
1546 |
| - s = Categorical(['x', 'y', 'z']) |
1547 |
| - s2 = Categorical(['a', 'b', 'c']) |
1548 |
| - result = union_categoricals([s, s2]) |
1549 |
| - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], |
1550 |
| - categories=['x', 'y', 'z', 'a', 'b', 'c']) |
1551 |
| - tm.assert_categorical_equal(result, expected) |
1552 |
| - |
1553 |
| - s = Categorical([0, 1.2, 2], ordered=True) |
1554 |
| - s2 = Categorical([0, 1.2, 2], ordered=True) |
1555 |
| - result = union_categoricals([s, s2]) |
1556 |
| - expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) |
1557 |
| - tm.assert_categorical_equal(result, expected) |
1558 |
| - |
1559 |
| - # must exactly match types |
1560 |
| - s = Categorical([0, 1.2, 2]) |
1561 |
| - s2 = Categorical([2, 3, 4]) |
1562 |
| - msg = 'dtype of categories must be the same' |
1563 |
| - with tm.assertRaisesRegexp(TypeError, msg): |
1564 |
| - union_categoricals([s, s2]) |
1565 |
| - |
1566 |
| - msg = 'No Categoricals to union' |
1567 |
| - with tm.assertRaisesRegexp(ValueError, msg): |
1568 |
| - union_categoricals([]) |
1569 |
| - |
1570 |
| - def test_union_categoricals_nan(self): |
1571 |
| - # GH 13759 |
1572 |
| - res = union_categoricals([pd.Categorical([1, 2, np.nan]), |
1573 |
| - pd.Categorical([3, 2, np.nan])]) |
1574 |
| - exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) |
1575 |
| - tm.assert_categorical_equal(res, exp) |
1576 |
| - |
1577 |
| - res = union_categoricals([pd.Categorical(['A', 'B']), |
1578 |
| - pd.Categorical(['B', 'B', np.nan])]) |
1579 |
| - exp = Categorical(['A', 'B', 'B', 'B', np.nan]) |
1580 |
| - tm.assert_categorical_equal(res, exp) |
1581 |
| - |
1582 |
| - val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), |
1583 |
| - pd.NaT] |
1584 |
| - val2 = [pd.NaT, pd.Timestamp('2011-01-01'), |
1585 |
| - pd.Timestamp('2011-02-01')] |
1586 |
| - |
1587 |
| - res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) |
1588 |
| - exp = Categorical(val1 + val2, |
1589 |
| - categories=[pd.Timestamp('2011-01-01'), |
1590 |
| - pd.Timestamp('2011-03-01'), |
1591 |
| - pd.Timestamp('2011-02-01')]) |
1592 |
| - tm.assert_categorical_equal(res, exp) |
1593 |
| - |
1594 |
| - # all NaN |
1595 |
| - res = union_categoricals([pd.Categorical([np.nan, np.nan]), |
1596 |
| - pd.Categorical(['X'])]) |
1597 |
| - exp = Categorical([np.nan, np.nan, 'X']) |
1598 |
| - tm.assert_categorical_equal(res, exp) |
1599 |
| - |
1600 |
| - res = union_categoricals([pd.Categorical([np.nan, np.nan]), |
1601 |
| - pd.Categorical([np.nan, np.nan])]) |
1602 |
| - exp = Categorical([np.nan, np.nan, np.nan, np.nan]) |
1603 |
| - tm.assert_categorical_equal(res, exp) |
1604 |
| - |
1605 |
| - def test_union_categoricals_empty(self): |
1606 |
| - # GH 13759 |
1607 |
| - res = union_categoricals([pd.Categorical([]), |
1608 |
| - pd.Categorical([])]) |
1609 |
| - exp = Categorical([]) |
1610 |
| - tm.assert_categorical_equal(res, exp) |
1611 |
| - |
1612 |
| - res = union_categoricals([pd.Categorical([]), |
1613 |
| - pd.Categorical([1.0])]) |
1614 |
| - exp = Categorical([1.0]) |
1615 |
| - tm.assert_categorical_equal(res, exp) |
1616 |
| - |
1617 |
| - # to make dtype equal |
1618 |
| - nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) |
1619 |
| - res = union_categoricals([nanc, |
1620 |
| - pd.Categorical([])]) |
1621 |
| - tm.assert_categorical_equal(res, nanc) |
1622 |
| - |
1623 |
| - def test_union_categorical_same_category(self): |
1624 |
| - # check fastpath |
1625 |
| - c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) |
1626 |
| - c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) |
1627 |
| - res = union_categoricals([c1, c2]) |
1628 |
| - exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], |
1629 |
| - categories=[1, 2, 3, 4]) |
1630 |
| - tm.assert_categorical_equal(res, exp) |
1631 |
| - |
1632 |
| - c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z']) |
1633 |
| - c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z']) |
1634 |
| - res = union_categoricals([c1, c2]) |
1635 |
| - exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'], |
1636 |
| - categories=['x', 'y', 'z']) |
1637 |
| - tm.assert_categorical_equal(res, exp) |
1638 |
| - |
1639 |
| - def test_union_categoricals_ordered(self): |
1640 |
| - c1 = Categorical([1, 2, 3], ordered=True) |
1641 |
| - c2 = Categorical([1, 2, 3], ordered=False) |
1642 |
| - |
1643 |
| - msg = 'Categorical.ordered must be the same' |
1644 |
| - with tm.assertRaisesRegexp(TypeError, msg): |
1645 |
| - union_categoricals([c1, c2]) |
1646 |
| - |
1647 |
| - res = union_categoricals([c1, c1]) |
1648 |
| - exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) |
1649 |
| - tm.assert_categorical_equal(res, exp) |
1650 |
| - |
1651 |
| - c1 = Categorical([1, 2, 3, np.nan], ordered=True) |
1652 |
| - c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) |
1653 |
| - |
1654 |
| - res = union_categoricals([c1, c2]) |
1655 |
| - exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) |
1656 |
| - tm.assert_categorical_equal(res, exp) |
1657 |
| - |
1658 |
| - c1 = Categorical([1, 2, 3], ordered=True) |
1659 |
| - c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) |
1660 |
| - |
1661 |
| - msg = "to union ordered Categoricals, all categories must be the same" |
1662 |
| - with tm.assertRaisesRegexp(TypeError, msg): |
1663 |
| - union_categoricals([c1, c2]) |
1664 |
| - |
1665 |
| - def test_union_categoricals_ignore_order(self): |
1666 |
| - # GH 15219 |
1667 |
| - c1 = Categorical([1, 2, 3], ordered=True) |
1668 |
| - c2 = Categorical([1, 2, 3], ordered=False) |
1669 |
| - |
1670 |
| - res = union_categoricals([c1, c2], ignore_order=True) |
1671 |
| - exp = Categorical([1, 2, 3, 1, 2, 3]) |
1672 |
| - tm.assert_categorical_equal(res, exp) |
1673 |
| - |
1674 |
| - msg = 'Categorical.ordered must be the same' |
1675 |
| - with tm.assertRaisesRegexp(TypeError, msg): |
1676 |
| - union_categoricals([c1, c2], ignore_order=False) |
1677 |
| - |
1678 |
| - res = union_categoricals([c1, c1], ignore_order=True) |
1679 |
| - exp = Categorical([1, 2, 3, 1, 2, 3]) |
1680 |
| - tm.assert_categorical_equal(res, exp) |
1681 |
| - |
1682 |
| - res = union_categoricals([c1, c1], ignore_order=False) |
1683 |
| - exp = Categorical([1, 2, 3, 1, 2, 3], |
1684 |
| - categories=[1, 2, 3], ordered=True) |
1685 |
| - tm.assert_categorical_equal(res, exp) |
1686 |
| - |
1687 |
| - c1 = Categorical([1, 2, 3, np.nan], ordered=True) |
1688 |
| - c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) |
1689 |
| - |
1690 |
| - res = union_categoricals([c1, c2], ignore_order=True) |
1691 |
| - exp = Categorical([1, 2, 3, np.nan, 3, 2]) |
1692 |
| - tm.assert_categorical_equal(res, exp) |
1693 |
| - |
1694 |
| - c1 = Categorical([1, 2, 3], ordered=True) |
1695 |
| - c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) |
1696 |
| - |
1697 |
| - res = union_categoricals([c1, c2], ignore_order=True) |
1698 |
| - exp = Categorical([1, 2, 3, 1, 2, 3]) |
1699 |
| - tm.assert_categorical_equal(res, exp) |
1700 |
| - |
1701 |
| - res = union_categoricals([c2, c1], ignore_order=True, |
1702 |
| - sort_categories=True) |
1703 |
| - exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) |
1704 |
| - tm.assert_categorical_equal(res, exp) |
1705 |
| - |
1706 |
| - c1 = Categorical([1, 2, 3], ordered=True) |
1707 |
| - c2 = Categorical([4, 5, 6], ordered=True) |
1708 |
| - result = union_categoricals([c1, c2], ignore_order=True) |
1709 |
| - expected = Categorical([1, 2, 3, 4, 5, 6]) |
1710 |
| - tm.assert_categorical_equal(result, expected) |
1711 |
| - |
1712 |
| - msg = "to union ordered Categoricals, all categories must be the same" |
1713 |
| - with tm.assertRaisesRegexp(TypeError, msg): |
1714 |
| - union_categoricals([c1, c2], ignore_order=False) |
1715 |
| - |
1716 |
| - with tm.assertRaisesRegexp(TypeError, msg): |
1717 |
| - union_categoricals([c1, c2]) |
1718 |
| - |
1719 |
| - def test_union_categoricals_sort(self): |
1720 |
| - # GH 13846 |
1721 |
| - c1 = Categorical(['x', 'y', 'z']) |
1722 |
| - c2 = Categorical(['a', 'b', 'c']) |
1723 |
| - result = union_categoricals([c1, c2], sort_categories=True) |
1724 |
| - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], |
1725 |
| - categories=['a', 'b', 'c', 'x', 'y', 'z']) |
1726 |
| - tm.assert_categorical_equal(result, expected) |
1727 |
| - |
1728 |
| - # fastpath |
1729 |
| - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) |
1730 |
| - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) |
1731 |
| - result = union_categoricals([c1, c2], sort_categories=True) |
1732 |
| - expected = Categorical(['a', 'b', 'b', 'c'], |
1733 |
| - categories=['a', 'b', 'c']) |
1734 |
| - tm.assert_categorical_equal(result, expected) |
1735 |
| - |
1736 |
| - c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b']) |
1737 |
| - c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b']) |
1738 |
| - result = union_categoricals([c1, c2], sort_categories=True) |
1739 |
| - expected = Categorical(['a', 'b', 'b', 'c'], |
1740 |
| - categories=['a', 'b', 'c']) |
1741 |
| - tm.assert_categorical_equal(result, expected) |
1742 |
| - |
1743 |
| - # fastpath - skip resort |
1744 |
| - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) |
1745 |
| - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) |
1746 |
| - result = union_categoricals([c1, c2], sort_categories=True) |
1747 |
| - expected = Categorical(['a', 'b', 'b', 'c'], |
1748 |
| - categories=['a', 'b', 'c']) |
1749 |
| - tm.assert_categorical_equal(result, expected) |
1750 |
| - |
1751 |
| - c1 = Categorical(['x', np.nan]) |
1752 |
| - c2 = Categorical([np.nan, 'b']) |
1753 |
| - result = union_categoricals([c1, c2], sort_categories=True) |
1754 |
| - expected = Categorical(['x', np.nan, np.nan, 'b'], |
1755 |
| - categories=['b', 'x']) |
1756 |
| - tm.assert_categorical_equal(result, expected) |
1757 |
| - |
1758 |
| - c1 = Categorical([np.nan]) |
1759 |
| - c2 = Categorical([np.nan]) |
1760 |
| - result = union_categoricals([c1, c2], sort_categories=True) |
1761 |
| - expected = Categorical([np.nan, np.nan], categories=[]) |
1762 |
| - tm.assert_categorical_equal(result, expected) |
1763 |
| - |
1764 |
| - c1 = Categorical([]) |
1765 |
| - c2 = Categorical([]) |
1766 |
| - result = union_categoricals([c1, c2], sort_categories=True) |
1767 |
| - expected = Categorical([]) |
1768 |
| - tm.assert_categorical_equal(result, expected) |
1769 |
| - |
1770 |
| - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) |
1771 |
| - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) |
1772 |
| - with tm.assertRaises(TypeError): |
1773 |
| - union_categoricals([c1, c2], sort_categories=True) |
1774 |
| - |
1775 |
| - def test_union_categoricals_sort_false(self): |
1776 |
| - # GH 13846 |
1777 |
| - c1 = Categorical(['x', 'y', 'z']) |
1778 |
| - c2 = Categorical(['a', 'b', 'c']) |
1779 |
| - result = union_categoricals([c1, c2], sort_categories=False) |
1780 |
| - expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'], |
1781 |
| - categories=['x', 'y', 'z', 'a', 'b', 'c']) |
1782 |
| - tm.assert_categorical_equal(result, expected) |
1783 |
| - |
1784 |
| - # fastpath |
1785 |
| - c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c']) |
1786 |
| - c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c']) |
1787 |
| - result = union_categoricals([c1, c2], sort_categories=False) |
1788 |
| - expected = Categorical(['a', 'b', 'b', 'c'], |
1789 |
| - categories=['b', 'a', 'c']) |
1790 |
| - tm.assert_categorical_equal(result, expected) |
1791 |
| - |
1792 |
| - # fastpath - skip resort |
1793 |
| - c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c']) |
1794 |
| - c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c']) |
1795 |
| - result = union_categoricals([c1, c2], sort_categories=False) |
1796 |
| - expected = Categorical(['a', 'b', 'b', 'c'], |
1797 |
| - categories=['a', 'b', 'c']) |
1798 |
| - tm.assert_categorical_equal(result, expected) |
1799 |
| - |
1800 |
| - c1 = Categorical(['x', np.nan]) |
1801 |
| - c2 = Categorical([np.nan, 'b']) |
1802 |
| - result = union_categoricals([c1, c2], sort_categories=False) |
1803 |
| - expected = Categorical(['x', np.nan, np.nan, 'b'], |
1804 |
| - categories=['x', 'b']) |
1805 |
| - tm.assert_categorical_equal(result, expected) |
1806 |
| - |
1807 |
| - c1 = Categorical([np.nan]) |
1808 |
| - c2 = Categorical([np.nan]) |
1809 |
| - result = union_categoricals([c1, c2], sort_categories=False) |
1810 |
| - expected = Categorical([np.nan, np.nan], categories=[]) |
1811 |
| - tm.assert_categorical_equal(result, expected) |
1812 |
| - |
1813 |
| - c1 = Categorical([]) |
1814 |
| - c2 = Categorical([]) |
1815 |
| - result = union_categoricals([c1, c2], sort_categories=False) |
1816 |
| - expected = Categorical([]) |
1817 |
| - tm.assert_categorical_equal(result, expected) |
1818 |
| - |
1819 |
| - c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True) |
1820 |
| - c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True) |
1821 |
| - result = union_categoricals([c1, c2], sort_categories=False) |
1822 |
| - expected = Categorical(['b', 'a', 'a', 'c'], |
1823 |
| - categories=['b', 'a', 'c'], ordered=True) |
1824 |
| - tm.assert_categorical_equal(result, expected) |
1825 |
| - |
1826 |
| - def test_union_categorical_unwrap(self): |
1827 |
| - # GH 14173 |
1828 |
| - c1 = Categorical(['a', 'b']) |
1829 |
| - c2 = pd.Series(['b', 'c'], dtype='category') |
1830 |
| - result = union_categoricals([c1, c2]) |
1831 |
| - expected = Categorical(['a', 'b', 'b', 'c']) |
1832 |
| - tm.assert_categorical_equal(result, expected) |
1833 |
| - |
1834 |
| - c2 = CategoricalIndex(c2) |
1835 |
| - result = union_categoricals([c1, c2]) |
1836 |
| - tm.assert_categorical_equal(result, expected) |
1837 |
| - |
1838 |
| - c1 = Series(c1) |
1839 |
| - result = union_categoricals([c1, c2]) |
1840 |
| - tm.assert_categorical_equal(result, expected) |
1841 |
| - |
1842 |
| - with tm.assertRaises(TypeError): |
1843 |
| - union_categoricals([c1, ['a', 'b', 'c']]) |
1844 |
| - |
1845 | 1513 | def test_concat_bug_1719(self):
|
1846 | 1514 | ts1 = tm.makeTimeSeries()
|
1847 | 1515 | ts2 = tm.makeTimeSeries()[::2]
|
|
0 commit comments