Skip to content

Commit a993a03

Browse files
committed
Merge pull request #3627 from jreback/multi_nan
BUG: (GH3588) fix pivoting with nan in the index
2 parents c468d2f + 1892a60 commit a993a03

File tree

6 files changed

+78
-30
lines changed

6 files changed

+78
-30
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ pandas 0.11.1
144144
- Fix plotting of unordered DatetimeIndex (GH3601_)
145145
- ``sql.write_frame`` failing when writing a single column to sqlite (GH3628_),
146146
thanks to @stonebig
147+
- Fix pivoting with ``nan`` in the index (GH3558_)
147148

148149
.. _GH3164: https://github.com/pydata/pandas/issues/3164
149150
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -194,6 +195,7 @@ pandas 0.11.1
194195
.. _GH3617: https://github.com/pydata/pandas/issues/3617
195196
.. _GH3435: https://github.com/pydata/pandas/issues/3435
196197
.. _GH3611: https://github.com/pydata/pandas/issues/3611
198+
.. _GH3558: https://github.com/pydata/pandas/issues/3558
197199
.. _GH3062: https://github.com/pydata/pandas/issues/3062
198200
.. _GH3624: https://github.com/pydata/pandas/issues/3624
199201
.. _GH3626: https://github.com/pydata/pandas/issues/3626

pandas/core/indexing.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -469,11 +469,14 @@ def _reindex(keys, level=None):
469469

470470
missing = com._ensure_platform_int(missing)
471471
missing_labels = keyarr.take(missing)
472-
missing_labels_indexer = com._ensure_int64(l[~check])
472+
missing_indexer = com._ensure_int64(l[~check])
473473
cur_labels = result._get_axis(axis).values
474-
cur_labels_indexer = com._ensure_int64(l[check])
475-
new_labels = lib.combine_from_indexers(cur_labels, cur_labels_indexer,
476-
missing_labels, missing_labels_indexer)
474+
cur_indexer = com._ensure_int64(l[check])
475+
476+
new_labels = np.empty(tuple([len(indexer)]),dtype=object)
477+
new_labels[cur_indexer] = cur_labels
478+
new_labels[missing_indexer] = missing_labels
479+
477480
result = result.reindex_axis(new_labels,axis=axis)
478481

479482
return result

pandas/core/reshape.py

+47-5
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010

1111
from pandas.core.categorical import Categorical
1212
from pandas.core.common import (notnull, _ensure_platform_int, _maybe_promote,
13-
_maybe_upcast)
13+
_maybe_upcast, isnull)
1414
from pandas.core.groupby import (get_group_index, _compress_group_index,
1515
decons_group_index)
1616
import pandas.core.common as com
1717
import pandas.algos as algos
18-
18+
from pandas import lib
1919

2020
from pandas.core.index import MultiIndex, Index
2121

@@ -67,7 +67,14 @@ def __init__(self, values, index, level=-1, value_columns=None):
6767
self.index = index
6868
self.level = self.index._get_level_number(level)
6969

70-
self.new_index_levels = list(index.levels)
70+
levels = index.levels
71+
labels = index.labels
72+
def _make_index(lev,lab):
73+
i = lev.__class__(_make_index_array_level(lev.values,lab))
74+
i.name = lev.name
75+
return i
76+
77+
self.new_index_levels = list([ _make_index(lev,lab) for lev,lab in zip(levels,labels) ])
7178
self.new_index_names = list(index.names)
7279

7380
self.removed_name = self.new_index_names.pop(self.level)
@@ -140,6 +147,19 @@ def get_result(self):
140147
values = com.take_nd(values, inds, axis=1)
141148
columns = columns[inds]
142149

150+
# we might have a missing index
151+
if len(index) != values.shape[0]:
152+
mask = isnull(index)
153+
if mask.any():
154+
l = np.arange(len(index))
155+
values, orig_values = np.empty((len(index),values.shape[1])), values
156+
values.fill(np.nan)
157+
values_indexer = com._ensure_int64(l[~mask])
158+
for i, j in enumerate(values_indexer):
159+
values[j] = orig_values[i]
160+
else:
161+
index = index.take(self.unique_groups)
162+
143163
return DataFrame(values, index=index, columns=columns)
144164

145165
def get_new_values(self):
@@ -201,11 +221,13 @@ def get_new_columns(self):
201221
def get_new_index(self):
202222
result_labels = []
203223
for cur in self.sorted_labels[:-1]:
204-
result_labels.append(cur.take(self.compressor))
224+
labels = cur.take(self.compressor)
225+
labels = _make_index_array_level(labels,cur)
226+
result_labels.append(labels)
205227

206228
# construct the new index
207229
if len(self.new_index_levels) == 1:
208-
new_index = self.new_index_levels[0].take(self.unique_groups)
230+
new_index = self.new_index_levels[0]
209231
new_index.name = self.new_index_names[0]
210232
else:
211233
new_index = MultiIndex(levels=self.new_index_levels,
@@ -215,6 +237,26 @@ def get_new_index(self):
215237
return new_index
216238

217239

240+
def _make_index_array_level(lev,lab):
241+
""" create the combined index array, preserving nans, return an array """
242+
mask = lab == -1
243+
if not mask.any():
244+
return lev
245+
246+
l = np.arange(len(lab))
247+
mask_labels = np.empty(len(mask[mask]),dtype=object)
248+
mask_labels.fill(np.nan)
249+
mask_indexer = com._ensure_int64(l[mask])
250+
251+
labels = lev
252+
labels_indexer = com._ensure_int64(l[~mask])
253+
254+
new_labels = np.empty(tuple([len(lab)]),dtype=object)
255+
new_labels[labels_indexer] = labels
256+
new_labels[mask_indexer] = mask_labels
257+
258+
return new_labels
259+
218260
def _unstack_multiple(data, clocs):
219261
if len(clocs) == 0:
220262
return data

pandas/lib.pyx

-20
Original file line numberDiff line numberDiff line change
@@ -416,26 +416,6 @@ def dicts_to_array(list dicts, list columns):
416416

417417
return result
418418

419-
@cython.wraparound(False)
420-
@cython.boundscheck(False)
421-
def combine_from_indexers(ndarray a, ndarray[int64_t] a_indexer,
422-
ndarray b, ndarray[int64_t] b_indexer):
423-
cdef:
424-
Py_ssize_t i, n_a, n_b
425-
ndarray result
426-
427-
n_a = len(a)
428-
n_b = len(b)
429-
result = np.empty(n_a+n_b,dtype=object)
430-
431-
for i in range(n_a):
432-
result[a_indexer[i]] = a[i]
433-
for i in range(n_b):
434-
result[b_indexer[i]] = b[i]
435-
436-
return result
437-
438-
439419
def fast_zip(list ndarrays):
440420
'''
441421
For zipping multiple ndarrays into an ndarray of tuples

pandas/tests/test_indexing.py

+10
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,16 @@ def test_set_index_nan(self):
840840
result = df.set_index(['year','PRuid','QC']).reset_index().reindex(columns=df.columns)
841841
assert_frame_equal(result,df)
842842

843+
def test_multi_nan_indexing(self):
844+
845+
# GH 3588
846+
df = DataFrame({"a":['R1', 'R2', np.nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, np.nan , 20]})
847+
result = df.set_index(['a','b'], drop=False)
848+
expected = DataFrame({"a":['R1', 'R2', np.nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, np.nan , 20]},
849+
index = [Index(['R1','R2',np.nan,'R4'],name='a'),Index(['C1','C2','C3','C4'],name='b')])
850+
assert_frame_equal(result,expected)
851+
852+
843853
def test_iloc_panel_issue(self):
844854

845855
# GH 3617

pandas/tools/tests/test_pivot.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import DataFrame, Series
5+
from pandas import DataFrame, Series, Index
66
from pandas.tools.merge import concat
77
from pandas.tools.pivot import pivot_table, crosstab
88
import pandas.util.testing as tm
@@ -129,6 +129,17 @@ def test_pivot_multi_functions(self):
129129
expected = concat([means, stds], keys=['mean', 'std'], axis=1)
130130
tm.assert_frame_equal(result, expected)
131131

132+
def test_pivot_index_with_nan(self):
133+
# GH 3588
134+
nan = np.nan
135+
df = DataFrame({"a":['R1', 'R2', nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, nan , 20]})
136+
result = df.pivot('a','b','c')
137+
expected = DataFrame([[nan,nan,nan,nan],[nan,10,nan,nan],
138+
[nan,nan,nan,nan],[nan,nan,15,20]],
139+
index = Index(['R1','R2',nan,'R4'],name='a'),
140+
columns = Index(['C1','C2','C3','C4'],name='b'))
141+
tm.assert_frame_equal(result, expected)
142+
132143
def test_margins(self):
133144
def _check_output(res, col, rows=['A', 'B'], cols=['C']):
134145
cmarg = res['All'][:-1]

0 commit comments

Comments
 (0)