Skip to content

Commit 7ab70c1

Browse files
committed
fix insert, delete
1 parent 0c38792 commit 7ab70c1

File tree

4 files changed

+152
-12
lines changed

4 files changed

+152
-12
lines changed

pandas/core/index.py

+49-2
Original file line numberDiff line numberDiff line change
@@ -2559,12 +2559,17 @@ def create_categorical(data=data, categories=categories):
25592559

25602560
if isinstance(data, com.ABCCategorical):
25612561
data = create_categorical(data, categories)
2562-
elif data is None or np.isscalar(data):
2563-
cls._scalar_data_error(data)
25642562
elif isinstance(data, CategoricalIndex):
25652563
data = data._data
25662564
data = create_categorical(data, categories)
25672565
else:
2566+
2567+
# don't allow scalars
2568+
# if data is None, then categories must be provided
2569+
if lib.isscalar(data):
2570+
if data is not None or categories is None:
2571+
cls._scalar_data_error(data)
2572+
data = []
25682573
from pandas.core.categorical import Categorical
25692574
data = Categorical(data, categories=categories, ordered=True)
25702575

@@ -2718,6 +2723,48 @@ def get_indexer(self, target, method=None, limit=None):
27182723

27192724
return com._ensure_platform_int(indexer)
27202725

2726+
def delete(self, loc):
2727+
"""
2728+
Make new Index with passed location(-s) deleted
2729+
2730+
Returns
2731+
-------
2732+
new_index : Index
2733+
"""
2734+
from pandas import Categorical
2735+
cat = Categorical.from_codes(np.delete(self.codes, loc), categories=self.categories)
2736+
return CategoricalIndex(cat, name=self.name)
2737+
2738+
def insert(self, loc, item):
2739+
"""
2740+
Make new Index inserting new item at location. Follows
2741+
Python list.append semantics for negative values
2742+
2743+
Parameters
2744+
----------
2745+
loc : int
2746+
item : object
2747+
2748+
Returns
2749+
-------
2750+
new_index : Index
2751+
2752+
Raises
2753+
------
2754+
ValueError if the item is not in the categories
2755+
2756+
"""
2757+
code = self.categories.get_indexer([item])
2758+
if (code == -1):
2759+
raise ValueError("cannot insert a non-category item to a CategoricalIndex")
2760+
2761+
from pandas import Categorical
2762+
codes = self.codes
2763+
idx = np.concatenate(
2764+
(codes[:loc], code, codes[loc:]))
2765+
cat = Categorical.from_codes(idx, categories=self.categories)
2766+
return CategoricalIndex(cat, name=self.name)
2767+
27212768

27222769
CategoricalIndex._add_numeric_methods_disabled()
27232770
CategoricalIndex._add_logical_methods_disabled()

pandas/tests/test_categorical.py

+20
Original file line numberDiff line numberDiff line change
@@ -2506,6 +2506,8 @@ def f():
25062506
dfx['grade'].cat.categories
25072507
self.assert_numpy_array_equal(df['grade'].cat.categories, dfx['grade'].cat.categories)
25082508

2509+
def test_concat_preserve(self):
2510+
25092511
# GH 8641
25102512
# series concat not preserving category dtype
25112513
s = Series(list('abc'),dtype='category')
@@ -2523,6 +2525,24 @@ def f():
25232525
expected = Series(list('abcabc'),index=[0,1,2,0,1,2]).astype('category')
25242526
tm.assert_series_equal(result, expected)
25252527

2528+
a = Series(np.arange(6,dtype='int64'))
2529+
b = Series(list('aabbca'))
2530+
2531+
df2 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('cab')) })
2532+
result = pd.concat([df2,df2])
2533+
expected = DataFrame({'A' : pd.concat([a,a]), 'B' : pd.concat([b,b]).astype('category',categories=list('cab')) })
2534+
tm.assert_frame_equal(result, expected)
2535+
2536+
def test_categorical_index_preserver(self):
2537+
2538+
a = Series(np.arange(6,dtype='int64'))
2539+
b = Series(list('aabbca'))
2540+
2541+
df2 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('cab')) }).set_index('B')
2542+
result = pd.concat([df2,df2])
2543+
expected = DataFrame({'A' : pd.concat([a,a]), 'B' : pd.concat([b,b]).astype('category',categories=list('cab')) }).set_index('B')
2544+
tm.assert_frame_equal(result, expected)
2545+
25262546
def test_append(self):
25272547
cat = pd.Categorical(["a","b"], categories=["a","b"])
25282548
vals = [1,2]

pandas/tests/test_frame.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -10699,12 +10699,10 @@ def test_indexing_categorical_index(self):
1069910699

1070010700
# value not in the categories
1070110701
self.assertRaises(KeyError, lambda : df.loc['d'])
10702+
def f():
10703+
df.loc['d'] = 10
1070210704

10703-
df.loc['d'] = 10
10704-
expected = DataFrame({'A' : [20,20,2,3,4,20,10],
10705-
'B' : Series(list('aabbcad'))}).set_index('B')
10706-
expected.index.name = None
10707-
assert_frame_equal(df, expected)
10705+
self.assertRaises(ValueError, f)
1070810706

1070910707
def test_sort_nan(self):
1071010708
# GH3917

pandas/tests/test_index.py

+80-5
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,8 @@ def create_index(self):
13291329
return CategoricalIndex(list('aabbcdefg'))
13301330

13311331
def test_construction(self):
1332+
1333+
categories = list('abcd')
13321334
idx = self.create_index()
13331335

13341336
result = Index(idx)
@@ -1337,23 +1339,28 @@ def test_construction(self):
13371339
result = Index(idx.values)
13381340
self.assertTrue(result.equals(idx))
13391341

1342+
# empty
1343+
result = CategoricalIndex(categories=categories)
1344+
self.assertTrue(result.categories.equals(Index(categories)))
1345+
self.assert_numpy_array_equal(result.codes,np.array([],dtype='int8'))
1346+
13401347
# passing categories
1341-
result = CategoricalIndex(list('aabbca'),categories=list('abcd'))
1342-
self.assertTrue(result.categories.equals(Index(list('abcd'))))
1348+
result = CategoricalIndex(list('aabbca'),categories=categories)
1349+
self.assertTrue(result.categories.equals(Index(categories)))
13431350
self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
13441351

13451352
c = pd.Categorical(list('aabbca'))
13461353
result = CategoricalIndex(c)
13471354
self.assertTrue(result.categories.equals(Index(list('abc'))))
13481355
self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
13491356

1350-
result = CategoricalIndex(c,categories=list('abcd'))
1351-
self.assertTrue(result.categories.equals(Index(list('abcd'))))
1357+
result = CategoricalIndex(c,categories=categories)
1358+
self.assertTrue(result.categories.equals(Index(categories)))
13521359
self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
13531360

13541361
ci = CategoricalIndex(c,categories=list('abcd'))
13551362
result = CategoricalIndex(ci)
1356-
self.assertTrue(result.categories.equals(Index(list('abcd'))))
1363+
self.assertTrue(result.categories.equals(Index(categories)))
13571364
self.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8'))
13581365

13591366
result = CategoricalIndex(ci, categories=list('ab'))
@@ -1368,6 +1375,74 @@ def test_construction(self):
13681375
self.assertIsInstance(result, Index)
13691376
self.assertNotIsInstance(result, CategoricalIndex)
13701377

1378+
def test_append(self):
1379+
1380+
categories = list('cab')
1381+
ci = CategoricalIndex(list('aabbca'), categories=categories)
1382+
1383+
# append cats with the same categories
1384+
result = ci[:3].append(ci[3:])
1385+
self.assertTrue(result.equals(ci))
1386+
1387+
foos = [ci[:1], ci[1:3], ci[3:]]
1388+
result = foos[0].append(foos[1:])
1389+
self.assertTrue(result.equals(ci))
1390+
1391+
# empty
1392+
result = ci.append([])
1393+
self.assertTrue(result.equals(ci))
1394+
1395+
# appending with different categories
1396+
self.assertRaises(ValueError, lambda : ci.append(ci.values.reorder_categories(list('abc'))))
1397+
1398+
# with objects
1399+
result = ci.append(['c','a'])
1400+
expected = CategoricalIndex(list('aabbcaca'), categories=categories)
1401+
self.assertTrue(result.equals(expected))
1402+
1403+
# invalid objects
1404+
self.assertRaises(ValueError, lambda : ci.append(['a','d']))
1405+
1406+
def test_insert(self):
1407+
1408+
categories = list('cab')
1409+
ci = CategoricalIndex(list('aabbca'), categories=categories)
1410+
1411+
#test 0th element
1412+
result = ci.insert(0, 'a')
1413+
expected = CategoricalIndex(list('aaabbca'),categories=categories)
1414+
self.assertTrue(result.equals(expected))
1415+
1416+
#test Nth element that follows Python list behavior
1417+
result = ci.insert(-1, 'a')
1418+
expected = CategoricalIndex(list('aabbcaa'),categories=categories)
1419+
self.assertTrue(result.equals(expected))
1420+
1421+
#test empty
1422+
result = CategoricalIndex(categories=categories).insert(0, 'a')
1423+
expected = CategoricalIndex(['a'],categories=categories)
1424+
self.assertTrue(result.equals(expected))
1425+
1426+
# invalid
1427+
self.assertRaises(ValueError, lambda : ci.insert(0,'d'))
1428+
1429+
def test_delete(self):
1430+
1431+
categories = list('cab')
1432+
ci = CategoricalIndex(list('aabbca'), categories=categories)
1433+
1434+
result = ci.delete(0)
1435+
expected = CategoricalIndex(list('abbca'),categories=categories)
1436+
self.assertTrue(result.equals(expected))
1437+
1438+
result = ci.delete(-1)
1439+
expected = CategoricalIndex(list('aabbc'),categories=categories)
1440+
self.assertTrue(result.equals(expected))
1441+
1442+
with tm.assertRaises((IndexError, ValueError)):
1443+
# either depeidnig on numpy version
1444+
result = ci.delete(10)
1445+
13711446
def test_astype(self):
13721447

13731448
idx = self.create_index()

0 commit comments

Comments
 (0)