Skip to content

Commit 90a0cf7

Browse files
jrebackAnkurDedania
authored andcommitted
TST: break out union_categoricals to separate test file
1 parent 1a0599d commit 90a0cf7

File tree

2 files changed

+339
-332
lines changed

2 files changed

+339
-332
lines changed

pandas/tests/tools/test_concat.py

-332
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
read_csv, isnull, Series, date_range,
99
Index, Panel, MultiIndex, Timestamp,
1010
DatetimeIndex, Categorical, CategoricalIndex)
11-
from pandas.types.concat import union_categoricals
1211
from pandas.util import testing as tm
1312
from pandas.util.testing import (assert_frame_equal,
1413
makeCustomDataframe as mkdf,
@@ -1511,337 +1510,6 @@ def test_concat_keys_with_none(self):
15111510
keys=['b', 'c', 'd', 'e'])
15121511
tm.assert_frame_equal(result, expected)
15131512

1514-
def test_union_categorical(self):
1515-
# GH 13361
1516-
data = [
1517-
(list('abc'), list('abd'), list('abcabd')),
1518-
([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
1519-
([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
1520-
1521-
(['b', 'b', np.nan, 'a'], ['a', np.nan, 'c'],
1522-
['b', 'b', np.nan, 'a', 'a', np.nan, 'c']),
1523-
1524-
(pd.date_range('2014-01-01', '2014-01-05'),
1525-
pd.date_range('2014-01-06', '2014-01-07'),
1526-
pd.date_range('2014-01-01', '2014-01-07')),
1527-
1528-
(pd.date_range('2014-01-01', '2014-01-05', tz='US/Central'),
1529-
pd.date_range('2014-01-06', '2014-01-07', tz='US/Central'),
1530-
pd.date_range('2014-01-01', '2014-01-07', tz='US/Central')),
1531-
1532-
(pd.period_range('2014-01-01', '2014-01-05'),
1533-
pd.period_range('2014-01-06', '2014-01-07'),
1534-
pd.period_range('2014-01-01', '2014-01-07')),
1535-
]
1536-
1537-
for a, b, combined in data:
1538-
for box in [Categorical, CategoricalIndex, Series]:
1539-
result = union_categoricals([box(Categorical(a)),
1540-
box(Categorical(b))])
1541-
expected = Categorical(combined)
1542-
tm.assert_categorical_equal(result, expected,
1543-
check_category_order=True)
1544-
1545-
# new categories ordered by appearance
1546-
s = Categorical(['x', 'y', 'z'])
1547-
s2 = Categorical(['a', 'b', 'c'])
1548-
result = union_categoricals([s, s2])
1549-
expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
1550-
categories=['x', 'y', 'z', 'a', 'b', 'c'])
1551-
tm.assert_categorical_equal(result, expected)
1552-
1553-
s = Categorical([0, 1.2, 2], ordered=True)
1554-
s2 = Categorical([0, 1.2, 2], ordered=True)
1555-
result = union_categoricals([s, s2])
1556-
expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True)
1557-
tm.assert_categorical_equal(result, expected)
1558-
1559-
# must exactly match types
1560-
s = Categorical([0, 1.2, 2])
1561-
s2 = Categorical([2, 3, 4])
1562-
msg = 'dtype of categories must be the same'
1563-
with tm.assertRaisesRegexp(TypeError, msg):
1564-
union_categoricals([s, s2])
1565-
1566-
msg = 'No Categoricals to union'
1567-
with tm.assertRaisesRegexp(ValueError, msg):
1568-
union_categoricals([])
1569-
1570-
def test_union_categoricals_nan(self):
1571-
# GH 13759
1572-
res = union_categoricals([pd.Categorical([1, 2, np.nan]),
1573-
pd.Categorical([3, 2, np.nan])])
1574-
exp = Categorical([1, 2, np.nan, 3, 2, np.nan])
1575-
tm.assert_categorical_equal(res, exp)
1576-
1577-
res = union_categoricals([pd.Categorical(['A', 'B']),
1578-
pd.Categorical(['B', 'B', np.nan])])
1579-
exp = Categorical(['A', 'B', 'B', 'B', np.nan])
1580-
tm.assert_categorical_equal(res, exp)
1581-
1582-
val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'),
1583-
pd.NaT]
1584-
val2 = [pd.NaT, pd.Timestamp('2011-01-01'),
1585-
pd.Timestamp('2011-02-01')]
1586-
1587-
res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)])
1588-
exp = Categorical(val1 + val2,
1589-
categories=[pd.Timestamp('2011-01-01'),
1590-
pd.Timestamp('2011-03-01'),
1591-
pd.Timestamp('2011-02-01')])
1592-
tm.assert_categorical_equal(res, exp)
1593-
1594-
# all NaN
1595-
res = union_categoricals([pd.Categorical([np.nan, np.nan]),
1596-
pd.Categorical(['X'])])
1597-
exp = Categorical([np.nan, np.nan, 'X'])
1598-
tm.assert_categorical_equal(res, exp)
1599-
1600-
res = union_categoricals([pd.Categorical([np.nan, np.nan]),
1601-
pd.Categorical([np.nan, np.nan])])
1602-
exp = Categorical([np.nan, np.nan, np.nan, np.nan])
1603-
tm.assert_categorical_equal(res, exp)
1604-
1605-
def test_union_categoricals_empty(self):
1606-
# GH 13759
1607-
res = union_categoricals([pd.Categorical([]),
1608-
pd.Categorical([])])
1609-
exp = Categorical([])
1610-
tm.assert_categorical_equal(res, exp)
1611-
1612-
res = union_categoricals([pd.Categorical([]),
1613-
pd.Categorical([1.0])])
1614-
exp = Categorical([1.0])
1615-
tm.assert_categorical_equal(res, exp)
1616-
1617-
# to make dtype equal
1618-
nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
1619-
res = union_categoricals([nanc,
1620-
pd.Categorical([])])
1621-
tm.assert_categorical_equal(res, nanc)
1622-
1623-
def test_union_categorical_same_category(self):
1624-
# check fastpath
1625-
c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
1626-
c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4])
1627-
res = union_categoricals([c1, c2])
1628-
exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan],
1629-
categories=[1, 2, 3, 4])
1630-
tm.assert_categorical_equal(res, exp)
1631-
1632-
c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z'])
1633-
c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z'])
1634-
res = union_categoricals([c1, c2])
1635-
exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'],
1636-
categories=['x', 'y', 'z'])
1637-
tm.assert_categorical_equal(res, exp)
1638-
1639-
def test_union_categoricals_ordered(self):
1640-
c1 = Categorical([1, 2, 3], ordered=True)
1641-
c2 = Categorical([1, 2, 3], ordered=False)
1642-
1643-
msg = 'Categorical.ordered must be the same'
1644-
with tm.assertRaisesRegexp(TypeError, msg):
1645-
union_categoricals([c1, c2])
1646-
1647-
res = union_categoricals([c1, c1])
1648-
exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True)
1649-
tm.assert_categorical_equal(res, exp)
1650-
1651-
c1 = Categorical([1, 2, 3, np.nan], ordered=True)
1652-
c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
1653-
1654-
res = union_categoricals([c1, c2])
1655-
exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True)
1656-
tm.assert_categorical_equal(res, exp)
1657-
1658-
c1 = Categorical([1, 2, 3], ordered=True)
1659-
c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
1660-
1661-
msg = "to union ordered Categoricals, all categories must be the same"
1662-
with tm.assertRaisesRegexp(TypeError, msg):
1663-
union_categoricals([c1, c2])
1664-
1665-
def test_union_categoricals_ignore_order(self):
1666-
# GH 15219
1667-
c1 = Categorical([1, 2, 3], ordered=True)
1668-
c2 = Categorical([1, 2, 3], ordered=False)
1669-
1670-
res = union_categoricals([c1, c2], ignore_order=True)
1671-
exp = Categorical([1, 2, 3, 1, 2, 3])
1672-
tm.assert_categorical_equal(res, exp)
1673-
1674-
msg = 'Categorical.ordered must be the same'
1675-
with tm.assertRaisesRegexp(TypeError, msg):
1676-
union_categoricals([c1, c2], ignore_order=False)
1677-
1678-
res = union_categoricals([c1, c1], ignore_order=True)
1679-
exp = Categorical([1, 2, 3, 1, 2, 3])
1680-
tm.assert_categorical_equal(res, exp)
1681-
1682-
res = union_categoricals([c1, c1], ignore_order=False)
1683-
exp = Categorical([1, 2, 3, 1, 2, 3],
1684-
categories=[1, 2, 3], ordered=True)
1685-
tm.assert_categorical_equal(res, exp)
1686-
1687-
c1 = Categorical([1, 2, 3, np.nan], ordered=True)
1688-
c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
1689-
1690-
res = union_categoricals([c1, c2], ignore_order=True)
1691-
exp = Categorical([1, 2, 3, np.nan, 3, 2])
1692-
tm.assert_categorical_equal(res, exp)
1693-
1694-
c1 = Categorical([1, 2, 3], ordered=True)
1695-
c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
1696-
1697-
res = union_categoricals([c1, c2], ignore_order=True)
1698-
exp = Categorical([1, 2, 3, 1, 2, 3])
1699-
tm.assert_categorical_equal(res, exp)
1700-
1701-
res = union_categoricals([c2, c1], ignore_order=True,
1702-
sort_categories=True)
1703-
exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
1704-
tm.assert_categorical_equal(res, exp)
1705-
1706-
c1 = Categorical([1, 2, 3], ordered=True)
1707-
c2 = Categorical([4, 5, 6], ordered=True)
1708-
result = union_categoricals([c1, c2], ignore_order=True)
1709-
expected = Categorical([1, 2, 3, 4, 5, 6])
1710-
tm.assert_categorical_equal(result, expected)
1711-
1712-
msg = "to union ordered Categoricals, all categories must be the same"
1713-
with tm.assertRaisesRegexp(TypeError, msg):
1714-
union_categoricals([c1, c2], ignore_order=False)
1715-
1716-
with tm.assertRaisesRegexp(TypeError, msg):
1717-
union_categoricals([c1, c2])
1718-
1719-
def test_union_categoricals_sort(self):
1720-
# GH 13846
1721-
c1 = Categorical(['x', 'y', 'z'])
1722-
c2 = Categorical(['a', 'b', 'c'])
1723-
result = union_categoricals([c1, c2], sort_categories=True)
1724-
expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
1725-
categories=['a', 'b', 'c', 'x', 'y', 'z'])
1726-
tm.assert_categorical_equal(result, expected)
1727-
1728-
# fastpath
1729-
c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
1730-
c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
1731-
result = union_categoricals([c1, c2], sort_categories=True)
1732-
expected = Categorical(['a', 'b', 'b', 'c'],
1733-
categories=['a', 'b', 'c'])
1734-
tm.assert_categorical_equal(result, expected)
1735-
1736-
c1 = Categorical(['a', 'b'], categories=['c', 'a', 'b'])
1737-
c2 = Categorical(['b', 'c'], categories=['c', 'a', 'b'])
1738-
result = union_categoricals([c1, c2], sort_categories=True)
1739-
expected = Categorical(['a', 'b', 'b', 'c'],
1740-
categories=['a', 'b', 'c'])
1741-
tm.assert_categorical_equal(result, expected)
1742-
1743-
# fastpath - skip resort
1744-
c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
1745-
c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
1746-
result = union_categoricals([c1, c2], sort_categories=True)
1747-
expected = Categorical(['a', 'b', 'b', 'c'],
1748-
categories=['a', 'b', 'c'])
1749-
tm.assert_categorical_equal(result, expected)
1750-
1751-
c1 = Categorical(['x', np.nan])
1752-
c2 = Categorical([np.nan, 'b'])
1753-
result = union_categoricals([c1, c2], sort_categories=True)
1754-
expected = Categorical(['x', np.nan, np.nan, 'b'],
1755-
categories=['b', 'x'])
1756-
tm.assert_categorical_equal(result, expected)
1757-
1758-
c1 = Categorical([np.nan])
1759-
c2 = Categorical([np.nan])
1760-
result = union_categoricals([c1, c2], sort_categories=True)
1761-
expected = Categorical([np.nan, np.nan], categories=[])
1762-
tm.assert_categorical_equal(result, expected)
1763-
1764-
c1 = Categorical([])
1765-
c2 = Categorical([])
1766-
result = union_categoricals([c1, c2], sort_categories=True)
1767-
expected = Categorical([])
1768-
tm.assert_categorical_equal(result, expected)
1769-
1770-
c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
1771-
c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
1772-
with tm.assertRaises(TypeError):
1773-
union_categoricals([c1, c2], sort_categories=True)
1774-
1775-
def test_union_categoricals_sort_false(self):
1776-
# GH 13846
1777-
c1 = Categorical(['x', 'y', 'z'])
1778-
c2 = Categorical(['a', 'b', 'c'])
1779-
result = union_categoricals([c1, c2], sort_categories=False)
1780-
expected = Categorical(['x', 'y', 'z', 'a', 'b', 'c'],
1781-
categories=['x', 'y', 'z', 'a', 'b', 'c'])
1782-
tm.assert_categorical_equal(result, expected)
1783-
1784-
# fastpath
1785-
c1 = Categorical(['a', 'b'], categories=['b', 'a', 'c'])
1786-
c2 = Categorical(['b', 'c'], categories=['b', 'a', 'c'])
1787-
result = union_categoricals([c1, c2], sort_categories=False)
1788-
expected = Categorical(['a', 'b', 'b', 'c'],
1789-
categories=['b', 'a', 'c'])
1790-
tm.assert_categorical_equal(result, expected)
1791-
1792-
# fastpath - skip resort
1793-
c1 = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
1794-
c2 = Categorical(['b', 'c'], categories=['a', 'b', 'c'])
1795-
result = union_categoricals([c1, c2], sort_categories=False)
1796-
expected = Categorical(['a', 'b', 'b', 'c'],
1797-
categories=['a', 'b', 'c'])
1798-
tm.assert_categorical_equal(result, expected)
1799-
1800-
c1 = Categorical(['x', np.nan])
1801-
c2 = Categorical([np.nan, 'b'])
1802-
result = union_categoricals([c1, c2], sort_categories=False)
1803-
expected = Categorical(['x', np.nan, np.nan, 'b'],
1804-
categories=['x', 'b'])
1805-
tm.assert_categorical_equal(result, expected)
1806-
1807-
c1 = Categorical([np.nan])
1808-
c2 = Categorical([np.nan])
1809-
result = union_categoricals([c1, c2], sort_categories=False)
1810-
expected = Categorical([np.nan, np.nan], categories=[])
1811-
tm.assert_categorical_equal(result, expected)
1812-
1813-
c1 = Categorical([])
1814-
c2 = Categorical([])
1815-
result = union_categoricals([c1, c2], sort_categories=False)
1816-
expected = Categorical([])
1817-
tm.assert_categorical_equal(result, expected)
1818-
1819-
c1 = Categorical(['b', 'a'], categories=['b', 'a', 'c'], ordered=True)
1820-
c2 = Categorical(['a', 'c'], categories=['b', 'a', 'c'], ordered=True)
1821-
result = union_categoricals([c1, c2], sort_categories=False)
1822-
expected = Categorical(['b', 'a', 'a', 'c'],
1823-
categories=['b', 'a', 'c'], ordered=True)
1824-
tm.assert_categorical_equal(result, expected)
1825-
1826-
def test_union_categorical_unwrap(self):
1827-
# GH 14173
1828-
c1 = Categorical(['a', 'b'])
1829-
c2 = pd.Series(['b', 'c'], dtype='category')
1830-
result = union_categoricals([c1, c2])
1831-
expected = Categorical(['a', 'b', 'b', 'c'])
1832-
tm.assert_categorical_equal(result, expected)
1833-
1834-
c2 = CategoricalIndex(c2)
1835-
result = union_categoricals([c1, c2])
1836-
tm.assert_categorical_equal(result, expected)
1837-
1838-
c1 = Series(c1)
1839-
result = union_categoricals([c1, c2])
1840-
tm.assert_categorical_equal(result, expected)
1841-
1842-
with tm.assertRaises(TypeError):
1843-
union_categoricals([c1, ['a', 'b', 'c']])
1844-
18451513
def test_concat_bug_1719(self):
18461514
ts1 = tm.makeTimeSeries()
18471515
ts2 = tm.makeTimeSeries()[::2]

0 commit comments

Comments
 (0)